python使用数值微分法求梯度，实现单层线性回归

news2025/4/17 2:10:08

文章目录

模型
构建数据
数值微分实现（梯度计算）
模型封装
运行测试
运行结果

主要介绍 数值微分法 求梯度，以及基于此对参数作随机梯度下降，并封装一个简单的线性回归模型以作调试，最后绘制loss图像。

模型

$y = X W + b$

y为标量，X列数为2. 损失函数使用均方误差。

构建数据

def build_data(weights, bias, num_examples):  
    x = np.random.randn(num_examples, len(weights))  
    y = x.dot(weights) + bias  
    # 给y加个噪声  
    y += np.random.rand(1)  
    return x, y  
  
  
def data_iter(features, labels, batch_size):  
    num_examples = len(features)  
    # 按样本数量构造索引  
    indices = list(range(num_examples))  
    # 打乱索引数组  
    np.random.shuffle(indices)  
    for i in range(0, num_examples, batch_size):  
        batch_indices = np.array(indices[i:min(i + batch_size, num_examples)])  
        yield features[batch_indices], labels[batch_indices]

数值微分实现（梯度计算）

就是求偏导。

# 基于数值微分+中心差分法 求偏导（梯度）  
def numerical_gradient(f, x):  
    h = 1e-4  # 0.0001  
    grad = np.zeros_like(x)  
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])  
    while not it.finished:  
        idx = it.multi_index  
        tmp_val = x[idx]  
        x[idx] = float(tmp_val) + h  
        fxh1 = f(x)  # f(x+h)  
  
        x[idx] = tmp_val - h  
        fxh2 = f(x)  # f(x-h)  
        grad[idx] = (fxh1 - fxh2) / (2 * h)  
  
        x[idx] = tmp_val  # 还原值  
        it.iternext()  
    return grad

模型封装

class Network:  
    def __init__(self, input_size, output_size, weight_init_std=0.01):  
        self.params = {'w1': np.random.rand(input_size, output_size),  
                       'b1': np.array([0.0])}  
  
    def predict(self, x):  
        w1, b1 = self.params['w1'], self.params['b1']  
        return x.dot(w1) + b1  
  
    def loss(self, x, y):  
        pred_y = self.predict(x)  
        return np.mean(np.square(y - pred_y))  
  
    def numerical_gradient(self, x, y):  
        loss_w = lambda w: self.loss(x, y)  
        grads = dict()  
        grads['w1'] = numerical_gradient(loss_w, self.params['w1'])  
        grads['b1'] = numerical_gradient(loss_w, self.params['b1'])  
        return grads

运行测试

if __name__ == '__main__':  
    start = time.perf_counter()  
  
    # np.random.seed(1)  
    true_w1 = np.random.rand(2, 1)  
    true_b1 = np.random.rand(1)  
    # true_w1 = np.array([[3.0], [4.0]])  
    # true_b1 = np.array([5.0])    x_train, y_train = build_data(true_w1, true_b1, 5000)  
  
    net = Network(2, 1, 0.01)  
    init_loss = net.loss(x_train, y_train)  
  
    print(net.params)  
  
    loss_history = list()  
    loss_history.append(init_loss)  
  
    num_epochs = 2  
    batch_size = 50  
    learning_rate = 0.01  
    for i in range(num_epochs):  
        # running_loss = 0.0  
        for x, y in data_iter(x_train, y_train, batch_size):  
            grads = net.numerical_gradient(x, y)  
            for key in grads:  
                net.params[key] -= learning_rate * grads[key]  
            running_loss = net.loss(x, y)  
            loss_history.append(running_loss)  
  
        # current_loss = net.loss(x_train, y_train)  
        # loss_history.append(current_loss)  
        # print(f'第{i}次：{net.params}')  
  
    plt.title("基于 数值微分+中心差分法 的单层简单线性模型")  
    plt.xlabel("epoch")  
    plt.ylabel("loss")  
    plt.plot(loss_history, linestyle='dotted')  
    plt.show()  
  
    # print(loss_history)  
    print(f'初始损失值：{init_loss}')  
    print(f'最后一次损失值：{loss_history[-1]}')  
  
    print()  
  
    print(f'正确参数: true_w1={true_w1}, true_b1={true_b1}')  
    print(f'预测参数: true_w1={net.params["w1"]}, true_b1={net.params["b1"]}')  
    print()  
  
    end = time.perf_counter()  
    print(f"运行时间：{(end - start)*1000}毫秒")