赞
踩
由于本站的前一个这个算法过早,无人回复,本人运用他的代码出现了许多问题,所以将其修复后再次上传。
本文,用皮尔逊相关系数计算用户之间的相似性。
from operator import itemgetter, attrgetter from math import sqrt import codecs def load_data(): filename_user_movie = 'E:/code/ml-100k/u.data' filename_movieInfo = 'E:/code/ml-100k/u.item' user_movie = {} for line in open(filename_user_movie): (userId, itemId, rating, timestamp) = line.strip().split('\t') user_movie.setdefault(userId,{}) user_movie[userId][itemId] = float(rating) movies = {} for line in open(filename_movieInfo,'r',encoding='ISO-8859-1'): (movieId, movieTitle) = line.split('|')[0:2] movies[movieId] = movieTitle return user_movie, movies def average_rating(user): average = 0 for u in user_movie[user].keys(): average += user_movie[user][u] average = average * 1.0 / len(user_movie[user].keys()) return average def calUserSim(user_movie): # build inverse table for movie_user movie_user = {} for ukey in user_movie.keys(): for mkey in user_movie[ukey].keys(): if mkey not in movie_user: movie_user[mkey] = [] movie_user[mkey].append(ukey) # calculated co-rated movies between users C = {} for movie, users in movie_user.items(): for u in users: C.setdefault(u,{}) for n in users: if u == n: continue C[u].setdefault(n,[]) C[u][n].append(movie) # calculate user similarity (perason correlation) userSim = {} for u in C.keys(): for n in C[u].keys(): userSim.setdefault(u,{}) userSim[u].setdefault(n,0) average_u_rate = average_rating(u) average_n_rate = average_rating(n) part1 = 0 part2 = 0 part3 = 0 for m in C[u][n]: part1 += (user_movie[u][m]-average_u_rate)*(user_movie[n][m]-average_n_rate)*1.0 part2 += pow(user_movie[u][m]-average_u_rate, 2)*1.0 part3 += pow(user_movie[n][m]-average_n_rate, 2)*1.0 part2 = sqrt(part2) part3 = sqrt(part3) if part2 == 0: part2 = 0.001 if part3 == 0: part3 = 0.001 userSim[u][n] = part1 / (part2 * part3) return userSim def getRecommendations(user, user_movie, movies, userSim, N): pred = {} interacted_items = user_movie[user].keys() average_u_rate = average_rating(user) sumUserSim = 0 for n, nuw in sorted(userSim[user].items(),key=itemgetter(1),reverse=True)[0:N]: average_n_rate = average_rating(n) for i, nrating in user_movie[n].items(): # filter movies user interacted before if i in interacted_items: continue pred.setdefault(i,0) pred[i] += nuw * (nrating - average_n_rate) sumUserSim += nuw for i, rating in pred.items(): pred[i] = average_u_rate + (pred[i]*1.0) / sumUserSim # top-10 pred pred = sorted(pred.items(), key=itemgetter(1), reverse=True)[0:10] return pred if __name__ == "__main__": # load data user_movie, movies = load_data() # Calculate user similarity userSim = calUserSim(user_movie) # Recommend pred = getRecommendations('1', user_movie, movies, userSim, 20) # display recommend result (top-10 results) for i, rating in pred: print ('film: %s, rating: %s' % (movies[i], rating)) print ('filmNum: %s, rating: %s' % (i, rating))
引用作者前者
本文一切用途都是学术用途,没有任何商业行为。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。