Python机器学习——人脸性别识别

一、选题背景

人脸识别技术是模式识别和计算机视觉领域最富挑战性的研究课题之一，也是近年来的研究热点，人脸性别识别作为人脸识别技术的重要组成部分也受到了广泛地关注。人脸性别识别就是向计算机输入人脸图像，经过某种方法或运算，得出其性别。这种识别对人眼来说很简单，但对计算机却并不是一件容易的事情。

二、机器学习案例设计方案

从网站中下载相关的数据集，对数据集进行整理，在python的环境中，给数据集中的文件进行划分，对数据进行预处理，利用keras，构建神经网络，训练模型，导入图片测试模型。

数据来源：kaggle，网址：https://www.kaggle.com/maciejgronczynski/biggest-genderface-recognition-dataset

数据集包含27167个jpg文件，其中17678个是男性面部照片，9489个是女性照片。

三、机器学习的实验步骤

1.下载数据集

2.导入需要用到的库

import os
import random
from shutil import copy
from matplotlib import pyplot as plt
from keras import optimizers
from keras import models
from keras import layers
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model
from PIL import Image

3.数据集划分，由总的数据集生成分别生成训练集，测试集和验证集

# 女性图片训练集想保存到的根路径
woman_train_dir = r'sex\faces\train\woman'
# 女性图片验证集想保存到的根路径
woman_validation_dir = r'sex\faces\validation\woman'
# 女性图片测试集想保存到的根路径
woman_test_dir = r'sex\faces\test\woman'

# 男性图片训练集想保存到的根路径
man_train_dir = r'sex\faces\train\man'
# 男性图片验证集想保存到的根路径
man_validation_dir = r'sex\faces\validation\man'
# 男性图片测试集想保存到的根路径
man_test_dir = r'sex\faces\test\man'

# 创建列表，保存上方6个路径
dir_list = [woman_train_dir, woman_validation_dir, woman_test_dir,
            man_train_dir, man_validation_dir, man_test_dir]
# 如果目录不存在，则创建
for dir_child in dir_list:
    if not os.path.isdir(dir_child):
        os.makedirs(dir_child)

# 女性图片根路径
woman_path = r'sex\faces\woman'
# 获取 woman_path 下的文件夹列表
woman_path_list = os.listdir(woman_path)
# 遍历列表，取6000张图片加入训练集，3000张图片加入验证集，其余加入测试集
for i in range(len(woman_path_list)):
    child_path = os.path.join(woman_path, woman_path_list[i])
    if i < 6000:
        to_path = woman_train_dir
    elif i < 9000:
        to_path = woman_validation_dir
    else:
        to_path = woman_test_dir
    copy(child_path, to_path)

# 男性图片根路径
man_path = r'sex\faces\man'
# 获取 man_path 下的文件夹列表
man_path_list = os.listdir(man_path)
# 遍历列表，取6000张图片加入训练集，3000张图片加入验证集，其余加入测试集
for i in range(len(man_path_list)):
    child_path = os.path.join(man_path, man_path_list[i])
    if i < 6000:
        to_path = man_train_dir
    elif i < 9000:
        to_path = man_validation_dir
    else:
        to_path = man_test_dir
    copy(child_path, to_path)

# 输出各目录中的文件数目
train_path = "sex/faces/train/"
print('total training woman images:', len(os.listdir(train_path+"woman")))
print('total training man images:', len(os.listdir(train_path+"man")))

valid_path = "sex/faces/validation/"
print('total validation woman images:', len(os.listdir(valid_path+"woman")))
print('total validation man images:', len(os.listdir(valid_path+"man")))

test_path = "sex/faces/test/"
print('total test woman images:', len(os.listdir(test_path+"woman")))
print('total test man images:', len(os.listdir(test_path+"man")))

4.查看图像以及对应标签

# 查看图像以及对应的标签
fit, ax = plt.subplots(nrows=3, ncols=3, figsize=(10, 7))
# 查看图像的根路径
test_view_path = r'sex\faces\test\man'
# 获取 test_view_path 下的文件夹列表
test_view_list = os.listdir(test_view_path)
for i, a in enumerate(ax.flat):
    view_path = os.path.join(test_view_path, test_view_list[i])
    # 读取源图
    a.imshow(plt.imread(view_path))
    # 添加图像名称
    a.set_title(man_path_list[i])
plt.tight_layout()  # 自动调整子图参数，使之填充整个图像区域
plt.show()

5.图片预处理

# 图片预处理
# 批量大小
BATCH_SIZE = 20
# 输入图片的大小
IMG_SIZE = (150, 150)

# 归一化处理
train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_dir = 'sex/faces/train'     # 指向训练集图片目录路径

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,  # 输入训练图像尺寸
    batch_size=BATCH_SIZE,
    color_mode='rgb',
    class_mode='binary')

validation_dir = 'sex/faces/validation'  # 指向验证集图片目录路径

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    color_mode='rgb',
    class_mode='binary')

test_dir = 'sex/faces/test'  # 指向测试集图片目录路径

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    color_mode='rgb',
    class_mode='binary')

6.查看经过处理的图片以及它的binary标签

# 查看经过处理的图片以及它的binary标签
fit, ax = plt.subplots(nrows=3, ncols=3, figsize=(10, 7))

for i, a in enumerate(ax.flat):
    img, label = test_generator.next()
    a.imshow(img[0],)
    a.set_title(label[0])

plt.tight_layout()
plt.show()

7.构建神经网络并对模型进行训练

# 构建神经网络
model = models.Sequential()

# 1.Conv2D层，32个过滤器。输出图片尺寸：150-3+1=148*148，参数数量：32*3*3*3+32=896
model.add(layers.Conv2D(32, (3, 3),
                        activation='relu',
                        input_shape=(150, 150, 3)))  # 卷积层1
model.add(layers.MaxPooling2D((2, 2)))  # 最大值池化层1。输出图片尺寸：148/2=74*74

# 2.Conv2D层，64个过滤器。输出图片尺寸：74-3+1=72*72，参数数量：64*3*3*32+64=18496
model.add(layers.Conv2D(64, (3, 3),
                        activation='relu'))  # 卷积层2
model.add(layers.MaxPooling2D((2, 2)))  # 最大值池化层2。输出图片尺寸：72/2=36*36

# 3.Conv2D层，128个过滤器。输出图片尺寸：36-3+1=34*34，参数数量：128*3*3*64+128=73856
model.add(layers.Conv2D(128, (3, 3),
                        activation='relu'))  # 卷积层3
model.add(layers.MaxPooling2D((2, 2)))  # 最大值池化层3。输出图片尺寸：34/2=17*17

# 4.Conv2D层，128个过滤器。输出图片尺寸：17-3+1=15*15，参数数量：128*3*3*128+128=147584
model.add(layers.Conv2D(128, (3, 3),
                        activation='relu'))  # 卷积层4
model.add(layers.MaxPooling2D((2, 2)))  # 最大值池化层4。输出图片尺寸：15/2=7*7

# 将输入层的数据压缩成1维数据，全连接层只能处理一维数据
model.add(layers.Flatten())

# 全连接层
model.add(layers.Dense(512,
                       activation='relu'))  # 全连接层1
model.add(layers.Dense(1,
                       activation='sigmoid'))  # 全连接层2，作为输出层。sigmoid分类，输出是两类别

# 编译模型
# RMSprop 优化器。因为网络最后一层是单一sigmoid单元，
# 所以使用二元交叉熵作为损失函数
model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['acc'])

# 看一下特征图的维度如何随着每层变化
model.summary()

# 训练模型50轮次
history_save = model.fit(
                    train_generator,
                    steps_per_epoch=100,
                    epochs=50,
                    validation_data=validation_generator,
                    validation_steps=50)
# 将训练过程产生的数据保存为h5文件
model.save('sex/faces/sex_model.h5')

8.绘制损失曲线和精度曲线图

# 绘制损失曲线和精度曲线图
accuracy = history_save.history['acc']  # 训练集精度
loss = history_save.history['loss']  # 训练集损失
val_loss = history_save.history['val_loss']  # 验证集精度
val_accuracy = history_save.history['val_acc']  # 验证集损失
plt.figure(figsize=(17, 7))

# 训练集精度和验证集精度曲线图图
plt.subplot(2, 2, 1)
plt.plot(range(50), accuracy, 'bo', label='Training Accuracy')
plt.plot(range(50), val_accuracy, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend(loc='center right')

# 训练集损失和验证集损失图
plt.subplot(2, 2, 2)
plt.plot(range(50), loss, 'bo', label='Training Loss')
plt.plot(range(50), val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend(loc='center right')

# 训练集精度和损失散点图
plt.subplot(2, 2, 3)
plt.scatter(range(50), accuracy, label="Training Accuracy", color='b', s=25, marker="o")
plt.scatter(range(50), loss, label="Training Loss", color='r', s=25, marker="o")
plt.title('Training : Accuracy and Loss')
plt.legend(loc='center right')

# 验证集精度和损失散点图
plt.subplot(2, 2, 4)
plt.scatter(range(50), val_accuracy, label="Validation Accuracy", color='b', s=25, marker="o")
plt.scatter(range(50), val_loss, label="Validation Loss", color='r', s=25, marker="o")
plt.title('Validation : Accuracy and Loss')
plt.legend(loc='center right')

plt.show()

9.用ImageDataGenerator数据增强

train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=40,  # 将图像随机旋转40度
                                   width_shift_range=0.2,  # 在水平方向上平移比例为0.2
                                   height_shift_range=0.2,  # 在垂直方向上平移比例为0.2
                                   shear_range=0.2,  # 随机错切变换的角度为0.2
                                   zoom_range=0.2,  # 图片随机缩放的范围为0.2
                                   horizontal_flip=True,  # 随机将一半图像水平翻转
                                   fill_mode='nearest')  # 填充创建像素
validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,  # 输入训练图像尺寸
    batch_size=BATCH_SIZE,
    class_mode='binary')

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary')

再次训练模型，并绘制绘制损失曲线和精度曲线图，得到结果图:

10.随机选取测试集的图片进行预测

# 将图片缩小到（150，150）的大小
def convertjpg(jpgfile, outdir, width=150, height=150):
    img = Image.open(jpgfile)
    try:
        new_img = img.resize((width, height), Image.BILINEAR)
        new_img.save(os.path.join(outdir, os.path.basename(jpgfile)))
    except Exception as e:
        print(e)

# 从测试集随机获取一张男性图片
man_test = r'sex\faces\test\man'
man_test_list = os.listdir(man_test)
key = random.randint(0, len(man_test_list))
img_key = man_test_list[key]
jpg_file = os.path.join(man_test, img_key)
convertjpg(jpg_file, "sex/faces/test")  # 图像大小改变到（150,150）
img_scale = plt.imread('sex/faces/test/' + img_key)
plt.imshow(img_scale)  # 显示改变图像大小后的图片确实变到了（150,150）大小

# 调用训练模型结果进行预测
model = load_model('sex/faces/sex_model.h5')
img_scale = img_scale.reshape(1, 150, 150, 3).astype('float32')
img_scale = img_scale/255  # 归一化到0-1之间
result = model.predict(img_scale)  # 取图片信息
if result > 0.5:
    print('该图片是女性的概率为：', result)
else:
    print('该图片是男性的概率为：', 1-result)
plt.show()  # 打印尺寸改变后的图像

# 从测试集随机获取一张女性图片
woman_test = r'sex\faces\test\woman'
woman_test_list = os.listdir(woman_test)
key = random.randint(0, len(woman_test_list))
img_key = woman_test_list[key]
jpg_file = os.path.join(woman_test, img_key)
convertjpg(jpg_file, "sex/faces/test")  # 图像大小改变到（150,150）
img_scale = plt.imread('sex/faces/test/' + img_key)
plt.imshow(img_scale)  # 显示改变图像大小后的图片确实变到了（150,150）大小

# 调用训练模型结果进行预测
model = load_model('sex/faces/sex_model.h5')
img_scale = img_scale.reshape(1, 150, 150, 3).astype('float32')
img_scale = img_scale/255  # 归一化到0-1之间
result = model.predict(img_scale)  # 取图片信息
if result > 0.5:
    print('该图片是女性的概率为：', result)
else:
    print('该图片是男性的概率为：', 1-result)
plt.show()  # 打印尺寸改变后的图像

11.自定义一张图片进行预测

# 自定义一张男性图片进行预测
diy_img = 'sex/faces/man.jpg'
convertjpg(diy_img, "sex")  # 图像大小改变到（150,150）
img_scale = plt.imread('sex/man.jpg')
plt.imshow(img_scale)  # 显示改变图像大小后的图片确实变到了（150,150）大小

# 调用数据增强后的训练模型结果进行预测
model = load_model('sex/faces/sex_model_idg.h5')
img_scale = img_scale.reshape(1, 150, 150, 3).astype('float32')
img_scale = img_scale/255  # 归一化到0-1之间
result = model.predict(img_scale)  # 取图片信息
if result > 0.5:
    print('该图片是女性的概率为：', result)
else:
    print('该图片是男性的概率为：', 1-result)
plt.show()  # 打印尺寸改变后的图像

# 自定义一张女性图片进行预测
diy_img = 'sex/faces/woman_2.jpg'
convertjpg(diy_img, "sex")  # 图像大小改变到（150,150）
img_scale = plt.imread('sex/woman_2.jpg')
plt.imshow(img_scale)  # 显示改变图像大小后的图片确实变到了（150,150）大小

# 调用数据增强后的训练模型结果进行预测
model = load_model('sex/faces/sex_model.h5')
img_scale = img_scale.reshape(1, 150, 150, 3).astype('float32')
img_scale = img_scale/255  # 归一化到0-1之间
result = model.predict(img_scale)  # 取图片信息
if result > 0.5:
    print('该图片是女性的概率为：', result)
else:
    print('该图片是男性的概率为：', 1-result)
plt.show()  # 打印尺寸改变后的图像

12.完整代码：

import os
import random
from shutil import copy
from matplotlib import pyplot as plt
from keras import optimizers
from keras import models
from keras import layers
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model
from PIL import Image

# 女性图片训练集想保存到的根路径
woman_train_dir = r'sex\faces\train\woman'
# 女性图片验证集想保存到的根路径
woman_validation_dir = r'sex\faces\validation\woman'
# 女性图片测试集想保存到的根路径
woman_test_dir = r'sex\faces\test\woman'

# 男性图片训练集想保存到的根路径
man_train_dir = r'sex\faces\train\man'
# 男性图片验证集想保存到的根路径
man_validation_dir = r'sex\faces\validation\man'
# 男性图片测试集想保存到的根路径
man_test_dir = r'sex\faces\test\man'

# 创建列表，保存上方6个路径
dir_list = [woman_train_dir, woman_validation_dir, woman_test_dir,
            man_train_dir, man_validation_dir, man_test_dir]
# 如果目录不存在，则创建
for dir_child in dir_list:
    if not os.path.isdir(dir_child):
        os.makedirs(dir_child)

# 女性图片根路径
woman_path = r'sex\faces\woman'
# 获取 woman_path 下的文件夹列表
woman_path_list = os.listdir(woman_path)
# 遍历列表，取6000张图片加入训练集，3000张图片加入验证集，其余加入测试集
for i in range(len(woman_path_list)):
    child_path = os.path.join(woman_path, woman_path_list[i])
    if i < 6000:
        to_path = woman_train_dir
    elif i < 9000:
        to_path = woman_validation_dir
    else:
        to_path = woman_test_dir
    copy(child_path, to_path)

# 男性图片根路径
man_path = r'sex\faces\man'
# 获取 man_path 下的文件夹列表
man_path_list = os.listdir(man_path)
# 遍历列表，取6000张图片加入训练集，3000张图片加入验证集，其余加入测试集
for i in range(len(man_path_list)):
    child_path = os.path.join(man_path, man_path_list[i])
    if i < 6000:
        to_path = man_train_dir
    elif i < 9000:
        to_path = man_validation_dir
    else:
        to_path = man_test_dir
    copy(child_path, to_path)

# 输出各目录中的文件数目
train_path = "sex/faces/train/"
print('total training woman images:', len(os.listdir(train_path+"woman")))
print('total training man images:', len(os.listdir(train_path+"man")))

valid_path = "sex/faces/validation/"
print('total validation woman images:', len(os.listdir(valid_path+"woman")))
print('total validation man images:', len(os.listdir(valid_path+"man")))

test_path = "sex/faces/test/"
print('total test woman images:', len(os.listdir(test_path+"woman")))
print('total test man images:', len(os.listdir(test_path+"man")))

# 查看图像以及对应的标签
fit, ax = plt.subplots(nrows=3, ncols=3, figsize=(10, 7))
# 查看图像的根路径
test_view_path = r'sex\faces\test\man'
# 获取 test_view_path 下的文件夹列表
test_view_list = os.listdir(test_view_path)
for i, a in enumerate(ax.flat):
    view_path = os.path.join(test_view_path, test_view_list[i])
    # 读取源图
    a.imshow(plt.imread(view_path))
    # 添加图像名称
    a.set_title(man_path_list[i])
plt.tight_layout()  # 自动调整子图参数，使之填充整个图像区域
plt.show()

# 图片预处理
# 批量大小
BATCH_SIZE = 20
# 输入图片的大小
IMG_SIZE = (150, 150)

# 归一化处理
train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_dir = 'sex/faces/train'     # 指向训练集图片目录路径

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,  # 输入训练图像尺寸
    batch_size=BATCH_SIZE,
    color_mode='rgb',
    class_mode='binary')

validation_dir = 'sex/faces/validation'  # 指向验证集图片目录路径

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    color_mode='rgb',
    class_mode='binary')

test_dir = 'sex/faces/test'  # 指向测试集图片目录路径

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    color_mode='rgb',
    class_mode='binary')

# 查看经过处理的图片以及它的binary标签
fit, ax = plt.subplots(nrows=3, ncols=3, figsize=(10, 7))

for i, a in enumerate(ax.flat):
    img, label = test_generator.next()
    a.imshow(img[0],)
    a.set_title(label[0])

plt.tight_layout()
plt.show()

# 构建神经网络
model = models.Sequential()

# 1.Conv2D层，32个过滤器。输出图片尺寸：150-3+1=148*148，参数数量：32*3*3*3+32=896
model.add(layers.Conv2D(32, (3, 3),
                        activation='relu',
                        input_shape=(150, 150, 3)))  # 卷积层1
model.add(layers.MaxPooling2D((2, 2)))  # 最大值池化层1。输出图片尺寸：148/2=74*74

# 2.Conv2D层，64个过滤器。输出图片尺寸：74-3+1=72*72，参数数量：64*3*3*32+64=18496
model.add(layers.Conv2D(64, (3, 3),
                        activation='relu'))  # 卷积层2
model.add(layers.MaxPooling2D((2, 2)))  # 最大值池化层2。输出图片尺寸：72/2=36*36

# 3.Conv2D层，128个过滤器。输出图片尺寸：36-3+1=34*34，参数数量：128*3*3*64+128=73856
model.add(layers.Conv2D(128, (3, 3),
                        activation='relu'))  # 卷积层3
model.add(layers.MaxPooling2D((2, 2)))  # 最大值池化层3。输出图片尺寸：34/2=17*17

# 4.Conv2D层，128个过滤器。输出图片尺寸：17-3+1=15*15，参数数量：128*3*3*128+128=147584
model.add(layers.Conv2D(128, (3, 3),
                        activation='relu'))  # 卷积层4
model.add(layers.MaxPooling2D((2, 2)))  # 最大值池化层4。输出图片尺寸：15/2=7*7

# 将输入层的数据压缩成1维数据，全连接层只能处理一维数据
model.add(layers.Flatten())

# 全连接层
model.add(layers.Dense(512,
                       activation='relu'))  # 全连接层1
model.add(layers.Dense(1,
                       activation='sigmoid'))  # 全连接层2，作为输出层。sigmoid分类，输出是两类别

# 编译模型
# RMSprop 优化器。因为网络最后一层是单一sigmoid单元，
# 所以使用二元交叉熵作为损失函数
model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['acc'])

# 看一下特征图的维度如何随着每层变化
model.summary()
#

train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=40,  # 将图像随机旋转40度
                                   width_shift_range=0.2,  # 在水平方向上平移比例为0.2
                                   height_shift_range=0.2,  # 在垂直方向上平移比例为0.2
                                   shear_range=0.2,  # 随机错切变换的角度为0.2
                                   zoom_range=0.2,  # 图片随机缩放的范围为0.2
                                   horizontal_flip=True,  # 随机将一半图像水平翻转
                                   fill_mode='nearest')  # 填充创建像素
validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,  # 输入训练图像尺寸
    batch_size=BATCH_SIZE,
    class_mode='binary')

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary')
#
# 训练模型50轮次
history_save = model.fit(
                    train_generator,
                    steps_per_epoch=100,
                    epochs=50,
                    validation_data=validation_generator,
                    validation_steps=50)

# 将训练过程产生的数据保存为h5文件
model.save('sex/faces/sex_model.h5')
# 保存数据增强后的训练模型
model.save('sex/faces/sex_model_idg.h5')

# 绘制损失曲线和精度曲线图
accuracy = history_save.history['acc']  # 训练集精度
loss = history_save.history['loss']  # 训练集损失
val_loss = history_save.history['val_loss']  # 验证集精度
val_accuracy = history_save.history['val_acc']  # 验证集损失
plt.figure(figsize=(17, 7))

# 训练集精度和验证集精度曲线图图
plt.subplot(2, 2, 1)
plt.plot(range(50), accuracy, 'bo', label='Training Accuracy')
plt.plot(range(50), val_accuracy, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend(loc='center right')

# 训练集损失和验证集损失图
plt.subplot(2, 2, 2)
plt.plot(range(50), loss, 'bo', label='Training Loss')
plt.plot(range(50), val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend(loc='center right')

# 训练集精度和损失散点图
plt.subplot(2, 2, 3)
plt.scatter(range(50), accuracy, label="Training Accuracy", color='b', s=25, marker="o")
plt.scatter(range(50), loss, label="Training Loss", color='r', s=25, marker="o")
plt.title('Training : Accuracy and Loss')
plt.legend(loc='center right')

# 验证集精度和损失散点图
plt.subplot(2, 2, 4)
plt.scatter(range(50), val_accuracy, label="Validation Accuracy", color='b', s=25, marker="o")
plt.scatter(range(50), val_loss, label="Validation Loss", color='r', s=25, marker="o")
plt.title('Validation : Accuracy and Loss')
plt.legend(loc='center right')

plt.show()

# 将图片缩小到（150，150）的大小
def convertjpg(jpgfile, outdir, width=150, height=150):
    img = Image.open(jpgfile)
    try:
        new_img = img.resize((width, height), Image.BILINEAR)
        new_img.save(os.path.join(outdir, os.path.basename(jpgfile)))
    except Exception as e:
        print(e)

# 从测试集随机获取一张男性图片
man_test = r'sex\faces\test\man'
man_test_list = os.listdir(man_test)
key = random.randint(0, len(man_test_list))
img_key = man_test_list[key]
jpg_file = os.path.join(man_test, img_key)
convertjpg(jpg_file, "sex/faces/test")  # 图像大小改变到（150,150）
img_scale = plt.imread('sex/faces/test/' + img_key)
plt.imshow(img_scale)  # 显示改变图像大小后的图片确实变到了（150,150）大小

# 调用训练模型结果进行预测
model = load_model('sex/faces/sex_model.h5')
img_scale = img_scale.reshape(1, 150, 150, 3).astype('float32')
img_scale = img_scale/255  # 归一化到0-1之间
result = model.predict(img_scale)  # 取图片信息
if result > 0.5:
    print('该图片是女性的概率为：', result)
else:
    print('该图片是男性的概率为：', 1-result)
plt.show()  # 打印尺寸改变后的图像

# 从测试集随机获取一张女性图片
woman_test = r'sex\faces\test\woman'
woman_test_list = os.listdir(woman_test)
key = random.randint(0, len(woman_test_list))
img_key = woman_test_list[key]
jpg_file = os.path.join(woman_test, img_key)
convertjpg(jpg_file, "sex/faces/test")  # 图像大小改变到（150,150）
img_scale = plt.imread('sex/faces/test/' + img_key)
plt.imshow(img_scale)  # 显示改变图像大小后的图片确实变到了（150,150）大小

# 调用训练模型结果进行预测
model = load_model('sex/faces/sex_model.h5')
img_scale = img_scale.reshape(1, 150, 150, 3).astype('float32')
img_scale = img_scale/255  # 归一化到0-1之间
result = model.predict(img_scale)  # 取图片信息
if result > 0.5:
    print('该图片是女性的概率为：', result)
else:
    print('该图片是男性的概率为：', 1-result)
plt.show()  # 打印尺寸改变后的图像

# 自定义一张男性图片进行预测
diy_img = 'sex/faces/man.jpg'
convertjpg(diy_img, "sex")  # 图像大小改变到（150,150）
img_scale = plt.imread('sex/man.jpg')
plt.imshow(img_scale)  # 显示改变图像大小后的图片确实变到了（150,150）大小

# 调用数据增强后的训练模型结果进行预测
model = load_model('sex/faces/sex_model_idg.h5')
img_scale = img_scale.reshape(1, 150, 150, 3).astype('float32')
img_scale = img_scale/255  # 归一化到0-1之间
result = model.predict(img_scale)  # 取图片信息
if result > 0.5:
    print('该图片是女性的概率为：', result)
else:
    print('该图片是男性的概率为：', 1-result)
plt.show()  # 打印尺寸改变后的图像

# 自定义一张女性图片进行预测
diy_img = 'sex/faces/woman_2.jpg'
convertjpg(diy_img, "sex")  # 图像大小改变到（150,150）
img_scale = plt.imread('sex/woman_2.jpg')
plt.imshow(img_scale)  # 显示改变图像大小后的图片确实变到了（150,150）大小

# 调用数据增强后的训练模型结果进行预测
model = load_model('sex/faces/sex_model.h5')
img_scale = img_scale.reshape(1, 150, 150, 3).astype('float32')
img_scale = img_scale/255  # 归一化到0-1之间
result = model.predict(img_scale)  # 取图片信息
if result > 0.5:
    print('该图片是女性的概率为：', result)
else:
    print('该图片是男性的概率为：', 1-result)
plt.show()  # 打印尺寸改变后的图像

四、实验总结

机器学习就是通过利用数据，训练模型，然后模型预测的一种方法。这次学习主要是对二分类进行实践。二分类：所用到的二分类函数即sigmoid。用ImageDataGenerator数据增强进行二次训练。绘制两次训练的损失精度曲线图。相比第一次训练模型，第二次训练模型精度较低。但对图像进行识别的精确率仍是较准确的。

本次程序设计的不足：在数据增强上效果不是很明显，在设计过程中还遇到图像失真导致训练精度上升缓慢。