神经网络
- 对y进行独立热编码处理(one-hot处理)
- 序列化权重参数
- 前向传播
- 代价函数
- 反向传播
- 神经网络优化
- 可视化隐藏层
对y进行独立热编码处理(one-hot处理)
def one_hot_encoder(raw_y):
result=[]
for i in raw_y:#1-10
y_temp=np.zeros(10)#1行10列0向量
y_temp[i-1]=1
result.append(y_temp)#一行一行写入,每一行都仅有一个1,循环写入5000次
return np.array(result)
结果
[[0. 0. 0. ... 0. 0. 1.]
[0. 0. 0. ... 0. 0. 1.]
[0. 0. 0. ... 0. 0. 1.]
...
[0. 0. 0. ... 0. 1. 0.]
[0. 0. 0. ... 0. 1. 0.]
[0. 0. 0. ... 0. 1. 0.]]
序列化权重参数
"""
序列化权重参数函数
"""
def seriallize(a,b):
return np.append(a.flatten(),b.flatten())
"""
解序列化
"""
def deserialize(theta_serialize):
theta1=theta_serialize[:25*401].reshape(25,401)
theta2=theta_serialize[25*401].reshape(10,26)
return theta1,theta2
theta_serialize=seriallize(theta1,theta2)#序列化
theta1,theta2=deserialize(theta_serialize)#解序列化
前向传播
"""
前向传播函数
"""
def sigmoid(z):
return 1/(1+np.exp(-z))
def feed_forward(theta_serialize,x):
theta1,theta2=deserialize(theta_serialize)
a1 = x
z2 = x @ theta1.T
a2 = sigmoid(z2)
a2 = np.insert(a2, 0, 1, axis=1)
z3 = a2 @ theta2.T
h = sigmoid(z3)
return a1,z2,a2,z3,h
代价函数
"""
代价函数(不带正则化)
"""
def cost(theta_serialize,x,y):
a1, z2, a2, z3, h=feed_forward(theta_serialize,x)
J=-np.sum(y*np.log(h)+(1-y)*np.log(1-h))/len(x)
return J
"""
代价函数(正则化)
"""
def reg_cost(theta_serialize,x,y,lamda):
sum1=np.sum(np.power(theta1[:,1:],2))
sum2 = np.sum(np.power(theta2[:, 1:], 2))
reg=(sum1+sum2)*lamda /(2*len(x))
return reg+cost(theta_serialize,x,y)
反向传播
"""
反向传播
无正则化
"""
def sigmoid_gradient(z):
return sigmoid(z)*(1-sigmoid(z))
def gridient(theta_serialize,x,y):
theta1,theta2=deserialize(theta_serialize)
a1, z2, a2, z3, h=feed_forward(theta_serialize,x)
d3=h-y
d2=d3@theta2[:,1:]*sigmoid_gradient(z2)
D2=(d3.T@a2)/len(x)
D1=(d2.T@a1)/len(x)
return seriallize(D1,D2)
"""
反向传播
带正正则化
"""
def reg_gradient(theta_serialize,x,y,lamda):
D=gridient(theta_serialize,x,y)
D1,D2=deserialize(D)
theta1, theta2 = deserialize(theta_serialize)
D1[:,1:]=D1[:,1:]+theta1[:,1:]*lamda/len(x)
return seriallize(D1,D2)
神经网络优化
"""
神经网络优化
"""
def nn_training(x,y):
init_theta = np.random.uniform(-0.5,0.5,10285)
res = minimize(
fun =reg_cost,
x0=init_theta,
args= (x,y,lamda),
method='TNC',
jac = reg_gradient,
options = {'maxiter':300})
return res
lamda=10
# print(cost(theta_serialize,x,y))
# print(reg_cost(theta_serialize,x,y,lamda))
#
res = nn_training(x,y)
raw_y = data['y'].reshape(5000,)
_,_,_,_,h = feed_forward(res.x,x)
y_pred = np.argmax(h,axis=1)+1
acc = np.mean(y_pred == raw_y)
print(acc)
可视化隐藏层
"""
可视化隐藏层
"""
def plot_hidden_layer(theta):
theta1,_=deserialize(theta)
hidden_layer=theta1[:,1:]
fig, ax = plt.subplots(figsize=(8, 8), nrows=5, ncols=5, sharey=True, sharex=True)
plt.xticks([])
plt.yticks([])
for r in range(5):
for c in range(5):
ax[r, c].imshow(hidden_layer[5 * r + c].reshape(20, 20).T, cmap="gray_r")
plt.show()