1. 实验目的

①理解并掌握误差反向传播算法；
②能够使用单层和多层神经网络，完成多分类任务；
③了解常用的激活函数。

2. 实验内容

①设计单层和多层神经网络结构，并使用TensorFlow建立模型，完成多分类任务；
②调试程序，通过调整超参数和训练模型参数，使模型在测试集上达到最优性能；
③测试模型，使用Matplotlib对结果进行可视化呈现。

3.实验过程

题目一：

分别使用单层神经网络和多层神经网络，对Iris数据集中的三种鸢尾花分类，并测试模型性能，以恰当的形式展现训练过程和结果。
要求：
⑴编写代码实现上述功能；
⑵记录实验过程和结果：
改变隐含层层数、隐含层中节点数等超参数，综合考虑准确率、交叉熵损失、和训练时间等，使模型在测试集达到最优的性能，并以恰当的方式记录和展示实验结果；
⑶分析和总结：
这个模型中的超参数有哪些？简要说明你寻找最佳超参数的过程，请分析它们对结果准确性和训练时间的影响，以表格或其他合适的图表形式展示。通过以上结果，可以得到什么结论，或对你有什么启发。
① 代码
单层神经网络

#导入库
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#加载数据
TRAIN_URL="http://download.tensorflow.org/data/iris_training.csv"
train_path=tf.keras.utils.get_file(TRAIN_URL.split('/')[-1],TRAIN_URL)
TEST_URL="http://download.tensorflow.org/data/iris_test.csv"
test_path=tf.keras.utils.get_file(TEST_URL.split('/')[-1],TEST_URL)

df_iris_train=pd.read_csv(train_path,header=0)#读取文件
df_iris_test=pd.read_csv(test_path,header=0)
iris_train=np.array(df_iris_train)#转化为numpy数组
iris_test=np.array(df_iris_test)

#数据预处理
x_train=iris_train[:,0:4]#取训练集的全部属性
y_train=iris_train[:,4]#取最后一列标签值
x_test=iris_test[:,0:4]
y_test=iris_test[:,4]
x_train=x_train-np.mean(x_train,axis=0)#对属性值进行标准化处理，使它均值为0
x_test=x_test-np.mean(x_test,axis=0)
X_train=tf.cast(x_train,tf.float32)#将属性值X转为32位浮点数
Y_train=tf.one_hot(tf.constant(y_train,dtype=tf.int32),3)#将标签值Y转化为独热编码
X_test=tf.cast(x_test,tf.float32)
Y_test=tf.one_hot(tf.constant(y_test,dtype=tf.int32),3)

#设置超参数，迭代次数和显示间隔
learn_rate=1.2
iter=50
display_step=10
#设置模型参数初始值
np.random.seed(612)
W=tf.Variable(np.random.randn(4,3),dtype=tf.float32)
B=tf.Variable(np.zeros([3]),dtype=tf.float32)
#训练模型
acc_train=[]
acc_test=[]
cce_train=[]
cce_test=[]
for i in range(0,iter+1):
    with tf.GradientTape() as tape:
        PRED_train=tf.nn.softmax(tf.matmul(X_train,W)+B)
        Loss_train=tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true=Y_train,y_pred=PRED_train))

    PRED_test=tf.nn.softmax(tf.matmul(X_test,W)+B)
    Loss_test=tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true=Y_test,y_pred=PRED_test))

    accuracy_train=tf.reduce_mean(tf.cast(tf.equal(tf.argmax(PRED_train.numpy(),axis=1),y_train),tf.float32))
    accuracy_test=tf.reduce_mean(tf.cast(tf.equal(tf.argmax(PRED_test.numpy(),axis=1),y_test),tf.float32))
    acc_train.append(accuracy_train)
    acc_test.append(accuracy_test)
    cce_train.append(Loss_train)
    cce_test.append(Loss_test)
    grads=tape.gradient(Loss_train,[W,B])
    W.assign_sub(learn_rate*grads[0])
    B.assign_sub(learn_rate*grads[1])
    if i % display_step==0:
        print("i:%i,TrainAcc:%f,TrainLoss:%f,TestAcc:%f,TestLoss:%f"%(i,accuracy_train,Loss_train,accuracy_test,Loss_test))
#结果可视化
plt.figure(figsize=(10,3))#创建画布

plt.subplot(121)
plt.plot(cce_train,color="blue",label="train")
plt.plot(cce_test,color="red",label="test")
plt.xlabel("Iteration")
plt.ylabel("Loss")
plt.legend()

plt.subplot(122)
plt.plot(acc_train,color="blue",label="train")
plt.plot(acc_test,color="red",label="test")
plt.xlabel("Iteration")
plt.ylabel("Accuracy")
plt.legend()

plt.show()

多层神经网络

import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#加载数据
TRAIN_URL="http://download.tensorflow.org/data/iris_training.csv"
train_path=tf.keras.utils.get_file(TRAIN_URL.split('/')[-1],TRAIN_URL)#导入鸢尾花训练数据集
TEST_URL="http://download.tensorflow.org/data/iris_test.csv"
test_path=tf.keras.utils.get_file(TEST_URL.split('/')[-1],TEST_URL)#导入鸢尾花测试数据集

df_iris_train=pd.read_csv(train_path,header=0)#读取文件
df_iris_test=pd.read_csv(test_path,header=0)
#数据预处理
iris_train=np.array(df_iris_train)#转化为numpy数组
iris_test=np.array(df_iris_test)

x_train=iris_train[:,0:4]#取训练集的全部属性
y_train=iris_train[:,4]#取最后一列标签值
x_test=iris_test[:,0:4]
y_test=iris_test[:,4]

x_train=x_train-np.mean(x_train,axis=0)#对属性值进行标准化处理，使它均值为0
x_test=x_test-np.mean(x_test,axis=0)

X_train=tf.cast(x_train,tf.float32)#将属性值X转为32位浮点数
Y_train=tf.one_hot(tf.constant(y_train,dtype=tf.int32),3)#标签值Y转化为独热编码
X_test=tf.cast(x_test,tf.float32)
Y_test=tf.one_hot(tf.constant(y_test,dtype=tf.int32),3)
#设置超参数、迭代次数、显示间隔
learn_rate=0.5
iter=50
display_step=10
#设置模型参数初始值
np.random.seed(612)
W1=tf.Variable(np.random.randn(4,16),dtype=tf.float32)#隐含层
B1=tf.Variable(np.zeros([16]),dtype=tf.float32)
W2=tf.Variable(np.random.randn(16,3),dtype=tf.float32)#输出层
B2=tf.Variable(np.zeros([3]),dtype=tf.float32)
#训练模型
acc_train=[]
acc_test=[]
cce_train=[]
cce_test=[]
for i in range(0,iter+1):
    with tf.GradientTape() as tape:
        Hidden_train=tf.nn.relu(tf.matmul(X_train,W1)+B1)
        PRED_train=tf.nn.softmax(tf.matmul(Hidden_train,W2)+B2)
        Loss_train=tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true=Y_train,y_pred=PRED_train))

        Hidden_test=tf.nn.relu(tf.matmul(X_test,W1)+B1)
        PRED_test=tf.nn.softmax(tf.matmul(Hidden_test,W2)+B2)
        Loss_test=tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true=Y_test,y_pred=PRED_test))

    accuracy_train=tf.reduce_mean(tf.cast(tf.equal(tf.argmax(PRED_train.numpy(),axis=1),y_train),tf.float32))
    accuracy_test=tf.reduce_mean(tf.cast(tf.equal(tf.argmax(PRED_test.numpy(),axis=1),y_test),tf.float32))

    acc_train.append(accuracy_train)
    acc_test.append(accuracy_test)
    cce_train.append(Loss_train)
    cce_test.append(Loss_test)

    grads=tape.gradient(Loss_train,[W1,B1,W2,B2])
    W1.assign_sub(learn_rate*grads[0])
    B1.assign_sub(learn_rate*grads[1])
    W2.assign_sub(learn_rate*grads[2])
    B2.assign_sub(learn_rate*grads[3])
    if i % display_step==0:
        print("i:%i,TrainAcc:%f,TrainLoss:%f,TestAcc:%f,TestLoss:%f"%(i,accuracy_train,Loss_train,accuracy_test,Loss_test))
#结果可视化
plt.figure(figsize=(10,3))

plt.subplot(121)
plt.plot(cce_train,color="blue",label="train")
plt.plot(cce_test,color="red",label="test")
plt.xlabel("Iteration")
plt.ylabel("Loss")
plt.legend()
plt.subplot(122)

plt.plot(acc_train,color="blue",label="train")
plt.plot(acc_test,color="red",label="test")
plt.xlabel("Iteration")
plt.ylabel("Accuracy")
plt.legend()

plt.show()

② 结果记录
单层神经网络
在这里插入图片描述

多层神经网络
在这里插入图片描述

③ 实验总结
#参learn_rate = 0.5，iter = 100，display_step = 5其中神经网络的学习速度主要根据训练集上代价函数下降的快慢有关，而最后的分类的结果主要跟在验证集上的分类正确率有关。因此可以根据该参数主要影响代价函数还是影响分类正确率进行分类。超参数调节可以使用贝叶斯优化。

题目二：

使用低阶API实现Softmax函数和交叉熵损失函数，并使用它们修改题目一，看下结果是否相同。
① 代码

import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = "SimHei"
gpus = tf.config.experimental.list_physical_devices('GPU')
#
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
#
TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1], TRAIN_URL)
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"
test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)
#
df_iris_train = pd.read_csv(train_path, header=0)
df_iris_test = pd.read_csv(test_path, header=0)
#
iris_train = np.array(df_iris_train)  # (120.5)
iris_test = np.array(df_iris_test)  # (30.5)
#
x_train = iris_train[:, 0:4]  # (129,4)
y_train = iris_train[:, 4]  # 〔120,)
x_test = iris_test[:, 0:4]  # (38.4)
y_test = iris_test[:, 4]  # (38.)
#
x_train = x_train - np.mean(x_train, axis=0)  # (dtype(float64))
x_test = x_test - np.mean(x_test, axis=0)  # 〔dtype(float64))
#
X_train = tf.cast(x_train, tf.float32)
Y_train = tf.one_hot(tf.constant(y_train, dtype=tf.int32), 3)
X_test = tf.cast(x_test, tf.float32)
Y_test = tf.one_hot(tf.constant(y_test, dtype=tf.int32), 3)
#
learn_rate = 0.5
iter = 100
display_step = 5
#
np.random.seed(612)
W = tf.Variable(np.random.randn(4, 3), dtype=tf.float32)
B = tf.Variable(np.zeros([3]), dtype=tf.float32)
acc_train = []
acc_test = []
cce_train = []
cce_test = []

for i in range(0, iter + 1):
    with tf.GradientTape() as tape:
        PRED_train = tf.exp(tf.matmul(X_train, W) + B)
        Loss_train = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true=Y_train, y_pred=PRED_train))
    PRED_test = tf.exp(tf.matmul(X_test, W) + B)
    Loss_test = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true=Y_test, y_pred=PRED_test))

    accuracy_train = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(PRED_train.numpy(), axis=1), y_train), tf.float32))
    accuracy_test = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(PRED_test.numpy(), axis=1), y_test), tf.float32))

    acc_train.append(accuracy_train)
    acc_test.append(accuracy_test)
    cce_train.append(Loss_train)
    cce_test.append(Loss_test)

    grads = tape.gradient(Loss_train, [W, B])
    W.assign_sub(learn_rate * grads[0])  # dLdm_(4.5)
    B.assign_sub(learn_rate * grads[1])  # gdL_db_(3.2
    if i % display_step == 0:
        print("i: %i. TrainAcc:%f. TrainLoss: %f , TestAcc:%f. TestLoss: %f" % (
        i, accuracy_train, Loss_train, accuracy_test, Loss_test))
#
plt.figure(figsize=(10, 3))
plt.suptitle("训练集和测试集的损失曲线和迭代率曲线", fontsize=20)
plt.subplot(121)
plt.plot(cce_train, color="blue", label="train")
plt.plot(cce_test, color="red", label="test")
plt.xlabel("Iteration")
plt.ylabel("Loss")
# plt.title(“训练集和测过集的杨先曲线”，fontsize=18)
plt.legend()

plt.subplot(122)
plt.plot(acc_train, color="blue", label="train")
plt.plot(acc_test, color="red", label="test")
plt.xlabel("Iteration")
plt.ylabel("Accuracy")
# plt.titLe(“清练集和测试寨的选代率曲线”，fonts1ze=1日)
plt.legend()
plt.show()

② 实验结果
在这里插入图片描述

选做题：
题目一：
选择两种鸢尾花的属性，采用神经网络模型，对变色鸢尾和维吉尼亚鸢尾进行分类，并参考MOOC中的PPT11.4.2,绘制分类图。
要求：
⑴编写代码实现上述要求；
⑵两种类型的决策边界是直线还是曲线？为什么？观察实验结果，和你的想法是否一致。
① 代码
② 结果记录
③ 实验总结
题目二：
使用第11讲拓展题数据集，设计多层神经网络，实现对良/恶性乳腺癌肿瘤的预测，并以恰当的形式展现训练过程和结果。
要求：
⑴尝试改变超参数和使用的属性，使网络达到最佳性能；
⑵对实验过程和结果进行对比和分析，并给出简要总结。
① 代码
② 结果记录
③ 实验总结

4.实验小结&讨论题

①在神经网络中，激活函数的作用是什么？常用的激活函数有哪些？在多分类问题中，在输出层一般使用使用什么激活函数？隐含层一般使用使用什么激活函数？为什么？
答：激活函数的作用是去线性化；常用到激活函数：tanh，ReL，Sigmoid；Sigmoid函数用于输出层，tanh函数用于隐含层。

②什么是损失函数？在多分类问题中，一般使用什么损失函数？为什么？
答：损失函数是用来评估模型的预测值与真实值不一致的程度
（1）L1范数损失L1Loss
（2）均方误差损失MSELoss
（3）交叉熵损失CrossEntropyLoss

③神经网络的深度和宽度对网络性能有什么影响？
答：如果一个深层结构能够刚刚好解决问题，那么就不可能用一个更浅的同样紧凑的结构来解决，因此要解决复杂的问题，要么增加深度，要么增加宽度。但是神经网络一般来说不是越深越好，也不是越宽越好，并且由于计算量的限制或对于速度的需求，如何用更少的参数获得更好的准确率无疑是一个永恒的追求。

④训练数据和测试数据对神经网络的性能有何影响？在选择、使用和划分数据集时，应注意什么？
答：注意使用的范围和整体效果。