文章目录
- 模型
- 构建数据
- 数值微分实现(梯度计算)
- 模型封装
- 运行测试
- 运行结果
主要介绍 数值微分法 求梯度,以及基于此对参数作随机梯度下降,并封装一个简单的线性回归模型以作调试,最后绘制loss图像。
模型
y = X W + b y = XW + b y=XW+b
y为标量,X列数为2. 损失函数使用均方误差。
构建数据
def build_data(weights, bias, num_examples):
x = np.random.randn(num_examples, len(weights))
y = x.dot(weights) + bias
# 给y加个噪声
y += np.random.rand(1)
return x, y
def data_iter(features, labels, batch_size):
num_examples = len(features)
# 按样本数量构造索引
indices = list(range(num_examples))
# 打乱索引数组
np.random.shuffle(indices)
for i in range(0, num_examples, batch_size):
batch_indices = np.array(indices[i:min(i + batch_size, num_examples)])
yield features[batch_indices], labels[batch_indices]
数值微分实现(梯度计算)
就是求偏导。
# 基于数值微分+中心差分法 求偏导(梯度)
def numerical_gradient(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
tmp_val = x[idx]
x[idx] = float(tmp_val) + h
fxh1 = f(x) # f(x+h)
x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2 * h)
x[idx] = tmp_val # 还原值
it.iternext()
return grad
模型封装
class Network:
def __init__(self, input_size, output_size, weight_init_std=0.01):
self.params = {'w1': np.random.rand(input_size, output_size),
'b1': np.array([0.0])}
def predict(self, x):
w1, b1 = self.params['w1'], self.params['b1']
return x.dot(w1) + b1
def loss(self, x, y):
pred_y = self.predict(x)
return np.mean(np.square(y - pred_y))
def numerical_gradient(self, x, y):
loss_w = lambda w: self.loss(x, y)
grads = dict()
grads['w1'] = numerical_gradient(loss_w, self.params['w1'])
grads['b1'] = numerical_gradient(loss_w, self.params['b1'])
return grads
运行测试
if __name__ == '__main__':
start = time.perf_counter()
# np.random.seed(1)
true_w1 = np.random.rand(2, 1)
true_b1 = np.random.rand(1)
# true_w1 = np.array([[3.0], [4.0]])
# true_b1 = np.array([5.0]) x_train, y_train = build_data(true_w1, true_b1, 5000)
net = Network(2, 1, 0.01)
init_loss = net.loss(x_train, y_train)
print(net.params)
loss_history = list()
loss_history.append(init_loss)
num_epochs = 2
batch_size = 50
learning_rate = 0.01
for i in range(num_epochs):
# running_loss = 0.0
for x, y in data_iter(x_train, y_train, batch_size):
grads = net.numerical_gradient(x, y)
for key in grads:
net.params[key] -= learning_rate * grads[key]
running_loss = net.loss(x, y)
loss_history.append(running_loss)
# current_loss = net.loss(x_train, y_train)
# loss_history.append(current_loss)
# print(f'第{i}次:{net.params}')
plt.title("基于 数值微分+中心差分法 的单层简单线性模型")
plt.xlabel("epoch")
plt.ylabel("loss")
plt.plot(loss_history, linestyle='dotted')
plt.show()
# print(loss_history)
print(f'初始损失值:{init_loss}')
print(f'最后一次损失值:{loss_history[-1]}')
print()
print(f'正确参数: true_w1={true_w1}, true_b1={true_b1}')
print(f'预测参数: true_w1={net.params["w1"]}, true_b1={net.params["b1"]}')
print()
end = time.perf_counter()
print(f"运行时间:{(end - start)*1000}毫秒")
运行结果