
news2025/2/22 22:05:29


Day_54基于 M-distance 的推荐

一. 关于M-distance 的推荐

        1. 基本数据说明

        2. 推荐系统的算法过程

        3. 简单思考

二. 代码复现

        1. 数据导入

        2. 代码的初始化

        3. 核心代码

        3.1 基础数据的构建

        3.2 leave-out-leave测试

        3.3 误差计算

Day_55基于 M-distance 的推荐 (续)

一. 解法思路

二. 运行结果

三. 补充54天的代码:

Day_54基于 M-distance 的推荐

一. 关于M-distance 的推荐

        1. 基本数据说明

        首先我们要明确什么是M-distance 的推荐,其本质和kNN算法有异曲同工之妙,kNN算法是根据邻居的种类来判断测试样本的种类,那么同样的道理,M-distance推荐也是根据邻居的属性来确定样本的属性。那该怎么做呢?




        2. 推荐系统的算法过程


        首先我们算出每个项目的平均评分情况(将每个项目对应的列的数据相加(只加非0部分的数据),再除以这列的非0的个数。得到的结果如上图G1ave所示),接着我们设置阈值参数\Delta,然后将ave的每一个值与样本(样本是第i行第j列)对应的ave的j列的值作比较,若在ave[ j ]\pm \Delta的范围内,则记录对应的项目的列(2,5) (这里要把0除开,我自己画的矩阵参数不是太好可以参考——博客),最后根据我们所得到第i行所记录项目的列的平均数[(i,2)+(i,5)]/2,估计样本的值。

        3. 简单思考

        其实M-distance 的推荐也还是一个估计的过程,相较于kNN,只不过估计考虑的参数变多了(由以前的一维变为二维),但是思想仍然没有改变。它的思维过程有点类似于:找到平均值相等(差距不大)的项目,接着找这个人对这些项目的评分,然后用这些项目的评分来估计位置的评分。

二. 代码复现

        这个部分是这个算法的实现部分,我的建议是必须要搞明白M-distance 的推荐系统的思维过程才能看得懂代码,否则是肯定不行,而且由于这里面变量非常多,一个不小心就不知道这个变量代表的是哪一个数值参数了,所以两点建议①直到M-distance 的推荐系统的思维过程,需要什么参数(平均数,阈值...)②代码的话还是模拟计算机用脑子自己过一遍,这样理解起来更快,变量多了之后可以先记录看这个变量是怎么样使用的,再结合变量的名称就可以知道这个变量代表的含义了

        1. 数据导入


            MBR tempRecommender = new MBR("D:/data/movielens-943u1682m.txt",
                    943, 1682, 100000);

        2. 代码的初始化


     * Default rating for 1-5 points.
    public static final double DEFAULT_RATING = 3.0;

     * The total number of users.
    private int numUsers;

     * The total number of items.
    private int numItems;

     * The total number of ratings (non-zero values)
    private int numRatings;

     * The predictions.
    private double[] predictions;

     * Compressed rating matrix. User-item-rating triples.
    private int[][] compressedRatingMatrix;

     * The degree of users (how many item he has rated).
    private int[] userDegrees;

     * The average rating of the current user.
    private double[] userAverageRatings;

     * The degree of users (how many item he has rated).
    private int[] itemDegrees;

     * The average rating of the current item.
    private double[] itemAverageRatings;

     * The first user start from 0. Let the first user has x ratings, the second
     * user will start from x.
    private int[] userStartingIndices;

     * Number of non-neighbor objects.
    private int numNonNeighbors;

     * The radius (delta) for determining the neighborhood.
    private double radius;

        3. 核心代码

        3.1 基础数据的构建



    public MBR(String paraFilename, int paraNumUsers, int paraNumItems, int paraNumRatings) throws Exception {
        // Step 1. Initialize these arrays
        numItems = paraNumItems;
        numUsers = paraNumUsers;
        numRatings = paraNumRatings;

        userDegrees = new int[numUsers];
        userStartingIndices = new int[numUsers + 1];
        userAverageRatings = new double[numUsers];
        itemDegrees = new int[numItems];
        compressedRatingMatrix = new int[numRatings][3];
        itemAverageRatings = new double[numItems];

        predictions = new double[numRatings];

        System.out.println("Reading " + paraFilename);

        // Step 2. Read the data file.
        File tempFile = new File(paraFilename);
        if (!tempFile.exists()) {
            System.out.println("File " + paraFilename + " does not exists.");
        } // Of if
        BufferedReader tempBufReader = new BufferedReader(new FileReader(tempFile));
        String tempString;
        String[] tempStrArray;
        int tempIndex = 0;
        userStartingIndices[0] = 0;
        userStartingIndices[numUsers] = numRatings;
        while ((tempString = tempBufReader.readLine()) != null) {
            // Each line has three values
            tempStrArray = tempString.split(",");
            compressedRatingMatrix[tempIndex][0] = Integer.parseInt(tempStrArray[0]);
            compressedRatingMatrix[tempIndex][1] = Integer.parseInt(tempStrArray[1]);
            compressedRatingMatrix[tempIndex][2] = Integer.parseInt(tempStrArray[2]);


            if (tempIndex > 0) {
                // Starting to read the data of a new user.
                if (compressedRatingMatrix[tempIndex][0] != compressedRatingMatrix[tempIndex - 1][0]) {
                    userStartingIndices[compressedRatingMatrix[tempIndex][0]] = tempIndex;
                } // Of if
            } // Of if
        } // Of while

        double[] tempUserTotalScore = new double[numUsers];
        double[] tempItemTotalScore = new double[numItems];
        for (int i = 0; i < numRatings; i++) {
            tempUserTotalScore[compressedRatingMatrix[i][0]] += compressedRatingMatrix[i][2];
            tempItemTotalScore[compressedRatingMatrix[i][1]] += compressedRatingMatrix[i][2];
        } // Of for i

        for (int i = 0; i < numUsers; i++) {
            userAverageRatings[i] = tempUserTotalScore[i] / userDegrees[i];
        } // Of for i
        for (int i = 0; i < numItems; i++) {
            itemAverageRatings[i] = tempItemTotalScore[i] / itemDegrees[i];
        } // Of for i
    }// Of the first constructor

        3.2 leave-out-leave测试


     * Leave-one-out prediction. The predicted values are stored in predictions.
     * @see predictions
    public void leaveOneOutPrediction() {
        double tempItemAverageRating;
        // Make each line of the code shorter.
        int tempUser, tempItem, tempRating;
        System.out.println("\r\nLeaveOneOutPrediction for radius " + radius);

        numNonNeighbors = 0;
        for (int i = 0; i < numRatings; i++) {
            tempUser = compressedRatingMatrix[i][0];
            tempItem = compressedRatingMatrix[i][1];
            tempRating = compressedRatingMatrix[i][2];

            // Step 1. Recompute average rating of the current item.
            tempItemAverageRating = (itemAverageRatings[tempItem] * itemDegrees[tempItem] - tempRating)
                    / (itemDegrees[tempItem] - 1);

            // Step 2. Recompute neighbors, at the same time obtain the ratings
            // Of neighbors.
            int tempNeighbors = 0;
            double tempTotal = 0;
            int tempComparedItem;
            for (int j = userStartingIndices[tempUser]; j < userStartingIndices[tempUser + 1]; j++) {
                tempComparedItem = compressedRatingMatrix[j][1];
                if (tempItem == tempComparedItem) {
                    continue;// Ignore itself.
                } // Of if

                if (Math.abs(tempItemAverageRating - itemAverageRatings[tempComparedItem]) < radius) {
                    tempTotal += compressedRatingMatrix[j][2];
                } // Of if
            } // Of for j

            // Step 3. Predict as the average value of neighbors.
            if (tempNeighbors > 0) {
                predictions[i] = tempTotal / tempNeighbors;
            } else {
                predictions[i] = DEFAULT_RATING;
            } // Of if
        } // Of for i
    }// Of leaveOneOutPrediction

        3.3 误差计算


     * Compute the MAE based on the deviation of each leave-one-out.
     * @author Fan Min
    public double computeMAE() throws Exception {
        double tempTotalError = 0;
        for (int i = 0; i < predictions.length; i++) {
            tempTotalError += Math.abs(predictions[i] - compressedRatingMatrix[i][2]);
        } // Of for i

        return tempTotalError / predictions.length;
    }// Of computeMAE


     * Compute the MAE based on the deviation of each leave-one-out.
     * @author Fan Min
    public double computeRSME() throws Exception {
        double tempTotalError = 0;
        for (int i = 0; i < predictions.length; i++) {
            tempTotalError += (predictions[i] - compressedRatingMatrix[i][2])
                    * (predictions[i] - compressedRatingMatrix[i][2]);
        } // Of for i

        double tempAverage = tempTotalError / predictions.length;

        return Math.sqrt(tempAverage);
    }// Of computeRSME

Day_55基于 M-distance 的推荐 (续)

一. 解法思路

        第54天实现的是item-based recommendation,即基于项目的推荐,今天我们要实现的基于 user-based recommendation即基于用户的推荐,其实本质上的计算方法并没有改变,只是将矩阵转置即可。


package Day_55;

 * Recommendation with M-distance.
 * @author Fan Min


public class MBR1 {

     * Compressed rating matrix. User-item-rating triples.
    private int[][] compressedRatingMatrix;

    public MBR1(String paraFilename, int paraNumUsers, int paraNumItems,
                         int paraNumRatings)throws Exception {

        int user,item,score;
        String tempString;
        String[] tempStrArray;
        compressedRatingMatrix=new int [paraNumRatings][3];
        int[][] matrix=new int [paraNumUsers][paraNumItems];

        File tempFile = new File(paraFilename);
        if (!tempFile.exists()) {
            System.out.println("File " + paraFilename + " does not exists.");
        } // Of if
        BufferedReader tempBufReader = new BufferedReader(new FileReader(tempFile));

        while ((tempString = tempBufReader.readLine()) != null) {
            tempStrArray = tempString.split(",");
            user = Integer.parseInt(tempStrArray[0]);
            item = Integer.parseInt(tempStrArray[1]);
            score = Integer.parseInt(tempStrArray[2]);

        int k=0;
        for (int i = 0; i <paraNumItems ; i++) {
            for (int j = 0; j < paraNumUsers; j++) {

        PrintStream ps = new PrintStream("D:/data/ceshi.txt");
        for(int i=0;i<paraNumRatings;i++){

     * The entrance of the program.
     * @param args
     *            Not used now.
    public static void main(String[] args) {
        try {
            MBR1 tempRecommender = new MBR1("D:/data/movielens-943u1682m.txt", 943, 1682, 100000);

        } catch (Exception ee) {
        } // Of try
    }// Of main
}// Of class MBR

二. 运行结果


三. 补充54天的代码:

package Day_55;

 * Recommendation with M-distance.
 * @author Fan Min


public class MBR {

     * Default rating for 1-5 points.
    public static final double DEFAULT_RATING = 3.0;

     * The total number of users.
    private int numUsers;

     * The total number of items.
    private int numItems;

     * The total number of ratings (non-zero values)
    private int numRatings;

     * The predictions.
    private double[] predictions;

     * Compressed rating matrix. User-item-rating triples.
    private int[][] compressedRatingMatrix;

     * The degree of users (how many item he has rated).
    private int[] userDegrees;

     * The average rating of the current user.
    private double[] userAverageRatings;

     * The degree of users (how many item he has rated).
    private int[] itemDegrees;

     * The average rating of the current item.
    private double[] itemAverageRatings;

     * The first user start from 0. Let the first user has x ratings, the second
     * user will start from x.
    private int[] userStartingIndices;

     * Number of non-neighbor objects.
    private int numNonNeighbors;

     * The radius (delta) for determining the neighborhood.
    private double radius;

     * Construct the rating matrix.
     * @param paraRatingFilename
     *            the rating filename.
     * @param paraNumUsers
     *            number of users
     * @param paraNumItems
     *            number of items
     * @param paraNumRatings
     *            number of ratings
    public MBR(String paraFilename, int paraNumUsers, int paraNumItems, int paraNumRatings) throws Exception {
        // Step 1. Initialize these arrays
        numItems = paraNumItems;
        numUsers = paraNumUsers;
        numRatings = paraNumRatings;

        userDegrees = new int[numUsers];
        userStartingIndices = new int[numUsers + 1];
        userAverageRatings = new double[numUsers];
        itemDegrees = new int[numItems];
        compressedRatingMatrix = new int[numRatings][3];
        itemAverageRatings = new double[numItems];

        predictions = new double[numRatings];

        System.out.println("Reading " + paraFilename);

        // Step 2. Read the data file.
        File tempFile = new File(paraFilename);
        if (!tempFile.exists()) {
            System.out.println("File " + paraFilename + " does not exists.");
        } // Of if
        BufferedReader tempBufReader = new BufferedReader(new FileReader(tempFile));
        String tempString;
        String[] tempStrArray;
        int tempIndex = 0;
        userStartingIndices[0] = 0;
        userStartingIndices[numUsers] = numRatings;
        while ((tempString = tempBufReader.readLine()) != null) {
            // Each line has three values
            tempStrArray = tempString.split(",");
            compressedRatingMatrix[tempIndex][0] = Integer.parseInt(tempStrArray[0]);
            compressedRatingMatrix[tempIndex][1] = Integer.parseInt(tempStrArray[1]);
            compressedRatingMatrix[tempIndex][2] = Integer.parseInt(tempStrArray[2]);


            if (tempIndex > 0) {
                // Starting to read the data of a new user.
                if (compressedRatingMatrix[tempIndex][0] != compressedRatingMatrix[tempIndex - 1][0]) {
                    userStartingIndices[compressedRatingMatrix[tempIndex][0]] = tempIndex;
                } // Of if
            } // Of if
        } // Of while

        double[] tempUserTotalScore = new double[numUsers];
        double[] tempItemTotalScore = new double[numItems];
        for (int i = 0; i < numRatings; i++) {
            tempUserTotalScore[compressedRatingMatrix[i][0]] += compressedRatingMatrix[i][2];
            tempItemTotalScore[compressedRatingMatrix[i][1]] += compressedRatingMatrix[i][2];
        } // Of for i

        for (int i = 0; i < numUsers; i++) {
            userAverageRatings[i] = tempUserTotalScore[i] / userDegrees[i];
        } // Of for i
        for (int i = 0; i < numItems; i++) {
            itemAverageRatings[i] = tempItemTotalScore[i] / itemDegrees[i];
        } // Of for i
    }// Of the first constructor

     * Set the radius (delta).
     * @param paraRadius
     *            The given radius.
    public void setRadius(double paraRadius) {
        if (paraRadius > 0) {
            radius = paraRadius;
        } else {
            radius = 0.1;
        } // Of if
    }// Of setRadius

     * Leave-one-out prediction. The predicted values are stored in predictions.
     * @see predictions
    public void leaveOneOutPrediction() {
        double tempItemAverageRating;
        // Make each line of the code shorter.
        int tempUser, tempItem, tempRating;
        System.out.println("\r\nLeaveOneOutPrediction for radius " + radius);

        numNonNeighbors = 0;
        for (int i = 0; i < numRatings; i++) {
            tempUser = compressedRatingMatrix[i][0];
            tempItem = compressedRatingMatrix[i][1];
            tempRating = compressedRatingMatrix[i][2];

            // Step 1. Recompute average rating of the current item.
            tempItemAverageRating = (itemAverageRatings[tempItem] * itemDegrees[tempItem] - tempRating)
                    / (itemDegrees[tempItem] - 1);

            // Step 2. Recompute neighbors, at the same time obtain the ratings
            // Of neighbors.
            int tempNeighbors = 0;
            double tempTotal = 0;
            int tempComparedItem;
            for (int j = userStartingIndices[tempUser]; j < userStartingIndices[tempUser + 1]; j++) {
                tempComparedItem = compressedRatingMatrix[j][1];
                if (tempItem == tempComparedItem) {
                    continue;// Ignore itself.
                } // Of if

                if (Math.abs(tempItemAverageRating - itemAverageRatings[tempComparedItem]) < radius) {
                    tempTotal += compressedRatingMatrix[j][2];
                } // Of if
            } // Of for j

            // Step 3. Predict as the average value of neighbors.
            if (tempNeighbors > 0) {
                predictions[i] = tempTotal / tempNeighbors;
            } else {
                predictions[i] = DEFAULT_RATING;
            } // Of if
        } // Of for i
    }// Of leaveOneOutPrediction

     * Compute the MAE based on the deviation of each leave-one-out.
     * @author Fan Min
    public double computeMAE() throws Exception {
        double tempTotalError = 0;
        for (int i = 0; i < predictions.length; i++) {
            tempTotalError += Math.abs(predictions[i] - compressedRatingMatrix[i][2]);
        } // Of for i

        return tempTotalError / predictions.length;
    }// Of computeMAE

     * Compute the MAE based on the deviation of each leave-one-out.
     * @author Fan Min
    public double computeRSME() throws Exception {
        double tempTotalError = 0;
        for (int i = 0; i < predictions.length; i++) {
            tempTotalError += (predictions[i] - compressedRatingMatrix[i][2])
                    * (predictions[i] - compressedRatingMatrix[i][2]);
        } // Of for i

        double tempAverage = tempTotalError / predictions.length;

        return Math.sqrt(tempAverage);
    }// Of computeRSME

     * The entrance of the program.
     * @param args
     *            Not used now.
    public static void main(String[] args) {
        try {
            MBR tempRecommender = new MBR("D:/data/ceshi.txt",1682,
                    943,  100000);

            for (double tempRadius = 0.2; tempRadius < 0.6; tempRadius += 0.1) {

                double tempMAE = tempRecommender.computeMAE();
                double tempRSME = tempRecommender.computeRSME();

                System.out.println("Radius = " + tempRadius + ", MAE = " + tempMAE + ", RSME = " + tempRSME
                        + ", numNonNeighbors = " + tempRecommender.numNonNeighbors);
            } // Of for tempRadius
        } catch (Exception ee) {
        } // Of try
    }// Of main
}// Of class MBR





