手动构建线性回归（PyTorch）

news2025/1/14 18:25:29

import torch
from sklearn.datasets import make_regression
import matplotlib.pyplot as plt
import random
#1.构建数据
#构建数据集
def create_dataset():
    x,y,coef=make_regression(n_samples=100,
                             n_features=1,
                             random_state=0,
                             noise=10,
                             coef=True,
                             bias=14.5)
    #将构建数据转换为张量类型
    x=torch.tensor(x)
    y=torch.tensor(y)
    return x,y

#构建数据加载器
def data_loader(x,y, batch_size):
    #计算下样本的数量
    data_len = len(y)
    #构建数据索引
    data_index=list(range(data_len))

    random.shuffle(data_index)
    #计算总的batch数量
    batch_number=data_len//batch_size
    for idx in range(batch_number):
        start=idx+batch_size
        end=start+batch_size
        batch_train_x=x[start:end]
        batch_train_y=y[start:end]
        yield batch_train_x,batch_train_y

def test01():
    x,y=create_dataset()
    plt.scatter(x,y)
    plt.show()

    for x,y in data_loader(x,y,batch_size=10):
        print(y)
#2.假设函数、损失函数、优化方法
#损失函数：平均损失
#优化方法：梯度下降
#假设函数
w=torch.tensor(0.1,requires_grad=True,dtype=torch.float64)
b=torch.tensor(0.1,requires_grad=True,dtype=torch.float64)



def linear_regression(x):
    return w*x+b

#损失函数
def square_loss(y_pred,y_true):
    return torch.square(y_pred - y_true)

#优化方法
def sqd(lr=1e-2):
    #除以16是使用的是批次样本的平均梯度
    w.data=w.data-lr*w.grad.data/16
    b.data=b.data-lr*b.grad.data/16
    

if __name__ == '__main__':
    test01()