吴恩达机器学习课后作业-04神经网络

news2025/7/8 3:09:44

神经网络

对y进行独立热编码处理（one-hot处理）
序列化权重参数
前向传播
代价函数
反向传播
神经网络优化
可视化隐藏层

在这里插入图片描述

对y进行独立热编码处理（one-hot处理）

在这里插入图片描述

def one_hot_encoder(raw_y):
    result=[]
    for i in raw_y:#1-10
        y_temp=np.zeros(10)#1行10列0向量
        y_temp[i-1]=1
        result.append(y_temp)#一行一行写入，每一行都仅有一个1，循环写入5000次
    return np.array(result)
   结果
   [[0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]
 ...
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 1. 0.]]

序列化权重参数

"""
序列化权重参数函数
"""
def seriallize(a,b):
    return np.append(a.flatten(),b.flatten())

"""
解序列化
"""
def deserialize(theta_serialize):
    theta1=theta_serialize[:25*401].reshape(25,401)
    theta2=theta_serialize[25*401].reshape(10,26)
    return theta1,theta2


theta_serialize=seriallize(theta1,theta2)#序列化
theta1,theta2=deserialize(theta_serialize)#解序列化

前向传播

"""
前向传播函数
"""
def sigmoid(z):
    return 1/(1+np.exp(-z))
def feed_forward(theta_serialize,x):
    theta1,theta2=deserialize(theta_serialize)
    a1 = x
    z2 = x @ theta1.T
    a2 = sigmoid(z2)
    a2 = np.insert(a2, 0, 1, axis=1)
    z3 = a2 @ theta2.T
    h = sigmoid(z3)
    return a1,z2,a2,z3,h

代价函数

在这里插入图片描述

"""
代价函数（不带正则化）

"""
def cost(theta_serialize,x,y):
    a1, z2, a2, z3, h=feed_forward(theta_serialize,x)
    J=-np.sum(y*np.log(h)+(1-y)*np.log(1-h))/len(x)
    return J
"""
代价函数（正则化）
"""
def reg_cost(theta_serialize,x,y,lamda):

    sum1=np.sum(np.power(theta1[:,1:],2))
    sum2 = np.sum(np.power(theta2[:, 1:], 2))
    reg=(sum1+sum2)*lamda /(2*len(x))
    return reg+cost(theta_serialize,x,y)

反向传播

在这里插入图片描述


"""
反向传播
无正则化

"""
def sigmoid_gradient(z):
    return sigmoid(z)*(1-sigmoid(z))

def gridient(theta_serialize,x,y):
    theta1,theta2=deserialize(theta_serialize)
    a1, z2, a2, z3, h=feed_forward(theta_serialize,x)
    d3=h-y
    d2=d3@theta2[:,1:]*sigmoid_gradient(z2)
    D2=(d3.T@a2)/len(x)
    D1=(d2.T@a1)/len(x)
    return seriallize(D1,D2)
"""
反向传播
带正正则化

"""
def reg_gradient(theta_serialize,x,y,lamda):
    D=gridient(theta_serialize,x,y)
    D1,D2=deserialize(D)
    theta1, theta2 = deserialize(theta_serialize)
    D1[:,1:]=D1[:,1:]+theta1[:,1:]*lamda/len(x)
    return seriallize(D1,D2)

神经网络优化


"""
神经网络优化
"""

def nn_training(x,y):
    init_theta = np.random.uniform(-0.5,0.5,10285)
    res = minimize(
        fun =reg_cost,
        x0=init_theta,
        args= (x,y,lamda),
        method='TNC',
        jac = reg_gradient,
        options = {'maxiter':300})
    return res


lamda=10

# print(cost(theta_serialize,x,y))
# print(reg_cost(theta_serialize,x,y,lamda))
#
res = nn_training(x,y)
raw_y = data['y'].reshape(5000,)
_,_,_,_,h = feed_forward(res.x,x)
y_pred = np.argmax(h,axis=1)+1
acc = np.mean(y_pred == raw_y)
print(acc)

可视化隐藏层

"""
可视化隐藏层
"""
def plot_hidden_layer(theta):
    theta1,_=deserialize(theta)
    hidden_layer=theta1[:,1:]
    fig, ax = plt.subplots(figsize=(8, 8), nrows=5, ncols=5, sharey=True, sharex=True)
    plt.xticks([])
    plt.yticks([])
    for r in range(5):
        for c in range(5):
            ax[r, c].imshow(hidden_layer[5 * r + c].reshape(20, 20).T, cmap="gray_r")
    plt.show()