一.LFM梯度下降算法
2.代码实现
# 0. 引入依赖
import numpy as np
import pandas as pd
# 1. 数据准备
# 评分矩阵R
R = np.array([[4,0,2,0,1],
[0,2,3,0,0],
[1,0,2,4,0],
[5,0,0,3,1],
[0,0,1,5,1],
[0,3,2,4,1],]) # 0分表示还未进行评价
# 二维数组小技巧:取行数R.shape[0]和len(R),列数R.shape[1]和len(R[0])
# 2. 算法实现
"""
@输入参数:
R:M*N 的评分矩阵
K:隐特征向量维度
max_iter: 最大迭代次数
alpha:步长
lamda:正则化系数
@输出:
分解之后的 P,Q
P:初始化用户特征矩阵M*K
Q:初始化物品特征矩阵N*K
"""
# 给定超参数
K = 5
max_iter = 10000
alpha = 0.0002
lamda = 0.002
# 核心算法
def LFM_grad_desc(R, K=2, max_iter=1000, alpha=0.0001, lamda=0.002):
# 基本维度参数定义
M = len(R)
N = len(R[0])
# P,Q初始值,随机生成一个M*K的矩阵
P = np.random.rand(M, K)
Q = np.random.rand(N, K)
Q = Q.T # Q转置(变为K*M矩阵)
# 开始迭代
for step in range(max_iter):
# 对所有的用户u、物品i做遍历,对应的特征向量Pu、Qi梯度下降
for u in range(M):
for i in range(N):
# 对于每一个大于0的评分,求出预测评分误差,0分表示没评价过
if R[u][i] > 0:
eui = np.dot(P[u, :], Q[:, i]) - R[u][i] # 用户u对物品i的向量乘积减去该物品的实际评分
# 代入公式,按照梯度下降算法更新当前的Pu、Qi
for k in range(K):
# 循环每一步都递减所以不用再求和然后再减去
P[u][k] = P[u][k] - alpha * (2 * eui * Q[k][i] + 2 * lamda * P[u][k])
Q[k][i] = Q[k][i] - alpha * (2 * eui * P[u][k] + 2 * lamda * Q[k][i])
# u、i遍历完成,所有特征向量更新完成,可以得到P、Q,可以计算预测评分矩阵
predR = np.dot(P, Q)
# 计算当前损失函数
cost = 0
for u in range(M):
for i in range(N):
if R[u][i] > 0:
cost += (np.dot(P[u, :], Q[:, i]) - R[u][i]) ** 2
# 加上正则化项
for k in range(K):
cost += lamda * (P[u][k] ** 2 + Q[k][i] ** 2)
if cost < 0.0001:
break
return P, Q.T, cost
# 3. 测试
P, Q, cost = LFM_grad_desc(R, K, max_iter, alpha, lamda)
predR = P.dot(Q.T)
print("P矩阵:\n",P)
print("Q矩阵:\n",Q)
print("评分矩阵R:\n",R) # 0分表示还未进行评价
print("预测矩阵R:\n",predR)
print("损失函数:",cost)
P矩阵:
[[ 0.54304109 0.9228639 0.14783175 0.77816254 1.15637692]
[ 0.14356755 1.15903469 0.97873865 1.22677179 0.52455555]
[-0.16846691 0.98488441 1.10533395 0.32992059 0.28532355]
[ 0.87657833 0.57447017 -0.34208188 1.43837734 0.71989314]
[ 1.5637987 0.72932366 0.82628242 0.41197437 0.85621136]
[ 0.66612342 0.31426319 0.39115091 1.0367152 1.41084558]]
Q矩阵:
[[ 1.51487685 0.83543438 -0.35620585 1.61076433 1.03996819]
[ 0.76168401 0.09937762 0.46440526 0.56698361 1.19510994]
[-0.61148311 0.80592431 0.52441136 1.08444592 0.57857038]
[ 0.70225008 1.83156388 1.58047378 0.75103376 1.10363137]
[ 0.04653705 0.39352596 0.39012753 0.54503258 0.10373379]]
评分矩阵:
[[4 0 2 0 1]
[0 2 3 0 0]
[1 0 2 4 0]
[5 0 0 3 1]
[0 0 1 5 1]
[0 3 2 4 1]]
预测矩阵:
[[3.99700575 2.39719455 2.00214327 4.16591922 0.99019496]
[3.35871349 2.00152766 2.99342673 5.27080015 1.56766906]
[1.00202499 1.01093241 1.99926743 3.99519765 1.02037399]
[4.99524478 2.24178838 1.72392629 3.00187116 0.99204648]
[4.23796407 2.90417824 1.00699752 4.9942467 0.99549538]
[4.26944937 2.99417494 1.99160815 3.99724365 1.01866479]]
损失函数: 0.27969855481954214