赞
踩
参考链接:
1、https://github.com/SongX64/movie_recommend_knowleagegraph;
2、https://www.cnblogs.com/bluetree2/p/10446522.html;
import pandas as pd from neo4j import GraphDatabase # 连接数据库驱动 uri = "bolt://localhost:7687" driver = GraphDatabase.driver(uri, auth=("neo4j", "neo4j")) # 参数设置 k = 10 # 考虑最相似的用户,也就是最邻近的邻居 moives_common = 3 # 考虑用户相似度,要有多少个电影公共看过 usesrs_common = 2 # 至少共通看过2个电影,说用户相似 threshold_sim = 0.9 # 用户相似度阈值 def load_data(): with driver.session() as session: # 清空数据库 session.run("""MATCH ()-[r]->() DELETE r""") session.run("""MATCH (n) DETACH DELETE n""") # --------------从文件中读取数据,存入 neo4j 数据库中------------ # 加载电影 print("Loading movies ...") session.run(""" LOAD CSV WITH HEADERS FROM "file:///out_movies.csv" AS csv CREATE (:Movie {title: csv.title}) """) # 加载评分 print("Loading gradings ... ") session.run(""" LOAD CSV WITH HEADERS FROM "file:///out_grade.csv" AS csv MERGE(m:Movie {title: csv.title}) MERGE(u:User {id: toInteger(csv.user_id)}) CREATE (u)-[:RATED {grading: toInteger(csv.grade)}]->(m) """) # 加载电影类型 print("Loading genre ...") session.run(""" LOAD CSV WITH HEADERS FROM "file:///out_genre.csv" AS csv MERGE (m:Movie {title: csv.title}) MERGE (g:Genre {genre: csv.genre}) CREATE (m)-[:HAS_GENRE]->(g) """) # 加载关键词 print("Loading keywords ...") session.run(""" LOAD CSV WITH HEADERS FROM "file:///out_keyword.csv" AS csv MERGE(m:Movie {title: csv.title}) MERGE(k:Keyword {keyword: csv.keyword}) CREATE (m)-[:HAS_KEYWORD]->(k) """) # 加载导演 print("Loading productors ...") session.run(""" LOAD CSV WITH HEADERS FROM "file:///out_productor.csv" AS csv MERGE(m:Movie {title: csv.title}) MERGE(p:Productor {name: csv.productor}) CREATE (m)-[:HAS_PRODUCTOR]->(p) """) # -------------------读取文件完毕------------------------- def queries(): while True: userid = int(input("请输入要为哪位用户推荐电影,输入ID即可:")) m = int(input("为该用户推荐的电影个数:")) # 电影类型 genre = [] if int(input("是否筛选喜欢的类型?输入0或1: ")): # 排除的话 with driver.session() as session: try: # 查询所有类型,放入元组中 q = session.run(f"""MATCH (g:Genre) RETURN g.genre AS genre""") result = [] for i, r in enumerate(q): result.append(r["genre"]) # 将 genre 列转为 DataFrame 类型,并列出提示 df = pd.DataFrame(result, columns=["genre"]) print() print(df) # 根据上面的输出,输入类型 inp = input("请输入喜欢的类型,例如 1 2 3 : ") if len(inp) != 0: inp = inp.split(" ") # TODO 这里是什么意思? lamuda表达式 ??? genre = [df["genre"].iloc[int(x)] for x in inp] finally: print("Error") # 进行查询, 用户u1对电影的评分, 降序排序 with driver.session() as session: q = session.run(f""" MATCH (u1:User {{ id:{userid} }})-[r:RATED]-(m:Movie) RETURN m.title AS title,r.grading AS grade ORDER BY grade DESC """) print() print("你评分过的电影如下所示: ") # 将 session 查询结果放入元组中 result = [] for r in q: result.append([r["title"], r["grade"]]) # 输出结果, 用户对于电影的一个评分列表 if len(result) == 0: print("没有结果推荐") else: df = pd.DataFrame(result, columns=["title", "grade"]) print() print(df.to_string(index=True)) print("---------------------------------------------------------------------------------------------------") # 删除用户相似性关系 session.run(f""" MATCH (u1:User)-[s:SIMILARITY]-(u2:User) DELETE s """) # 重新计算用户相似性 # 通过电影连接两个用户, u1 --rated-- movie --rated-- u2 # 计算u1,u2共同评论过的电影,然后根据两个人的评分来计算相似度 # (用户1评分 * 用户2评分)的总和,除以他们分别的根号平方和 session.run(f""" MATCH (u1:User {{id : {userid}}})-[r1:RATED]-(m:Movie)-[r2:RATED]-(u2:User) WITH u1, u2, COUNT(m) AS movies_common, SUM(r1.grading * r2.grading)/(SQRT( SUM(r1.grading^2) ) * SQRT( SUM(r2.grading^2) )) as sim WHERE movies_common >= {moives_common} AND sim > {threshold_sim} MERGE (u1)-[s:SIMILARITY]-(u2) SET s.sim = sim """) # 条件语句拼装, 过滤类型 Q_GENRE = "" if len(genre) > 0: Q_GENRE = "AND ((SIZE(gen) > 0) AND " Q_GENRE += "(ANY(X IN " + str(genre) + " WHERE X IN gen))" Q_GENRE += ")" q = session.run(f""" MATCH (u1:User{{id : {userid}}})-[s:SIMILARITY]-(u2:User) WITH u1,u2,s ORDER BY s.sim DESC LIMIT {k} MATCH (m:Movie)-[r:RATED]-(u2) OPTIONAL MATCH (g:Genre)--(m) WITH u1,u2,s,m,r, COLLECT(DISTINCT g.genre) AS gen WHERE NOT((m)-[:RATED]-(u1)) {Q_GENRE} WITH m.title AS title, SUM(r.grading * s.sim)/SUM(s.sim) AS grade, COUNT(u2) AS num, gen WHERE num >= {usesrs_common} RETURN title,grade,num,gen ORDER BY grade DESC, num DESC LIMIT {m} """) print("推荐的电影:") result = [] for r in q: result.append([r["title"], r["grade"], r["num"], r["gen"]]) if len(result) == 0: print("无推荐") print() continue df = pd.DataFrame(result, columns=["title", "avg grade", "num recommenders", "genre"]) print() print(df.to_string(index=True)) print("---------------------------------------------------------------------------------------------------") if __name__ == "__main__": if int(input("是否需要重新加载知识图谱? 输入0或1: ")): load_data() queries()
是否需要重新加载知识图谱? 输入0或1: 1 Loading movies ... Loading gradings ... Loading genre ... Loading keywords ... Loading productors ... 请输入要为哪位用户推荐电影,输入ID即可: 944 为该用户推荐的电影个数: 10 是否筛选喜欢的类型?输入0或1: 1 genre 0 Action 1 Adventure 2 Fantasy 3 Science Fiction 4 Crime 5 Animation 6 Family 7 Drama 8 Romance 9 Thriller 10 Comedy 11 Mystery 12 War 13 History 14 Western 15 Horror 16 Music 17 Documentary 18 Foreign 19 TV Movie 请输入喜欢的类型,例如 1 2 3 : 3 Error 你评分过的电影如下所示: 没有结果推荐 --------------------------------------------------------------------------------------------------- 推荐的电影: 无推荐
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。