当前位置:   article > 正文

Python使用LOAD CSV将csv数据导入neo4j_python读取csv插入neo4j

python读取csv插入neo4j

参考链接:
1、https://github.com/SongX64/movie_recommend_knowleagegraph;
2、https://www.cnblogs.com/bluetree2/p/10446522.html;

import pandas as pd
from neo4j import GraphDatabase

# 连接数据库驱动
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "neo4j"))

# 参数设置
k = 10  # 考虑最相似的用户,也就是最邻近的邻居
moives_common = 3  # 考虑用户相似度,要有多少个电影公共看过
usesrs_common = 2  # 至少共通看过2个电影,说用户相似
threshold_sim = 0.9  # 用户相似度阈值


def load_data():
    with driver.session() as session:
        # 清空数据库
        session.run("""MATCH ()-[r]->() DELETE r""")
        session.run("""MATCH (n) DETACH DELETE n""")

        # --------------从文件中读取数据,存入 neo4j 数据库中------------
        # 加载电影
        print("Loading movies ...")
        session.run("""
            LOAD CSV WITH HEADERS FROM "file:///out_movies.csv" AS csv
            CREATE (:Movie {title: csv.title})
        """)

        # 加载评分
        print("Loading gradings ... ")
        session.run("""
            LOAD CSV WITH HEADERS FROM "file:///out_grade.csv" AS csv
            MERGE(m:Movie {title: csv.title})
            MERGE(u:User {id: toInteger(csv.user_id)})
            CREATE (u)-[:RATED {grading: toInteger(csv.grade)}]->(m)
        """)

        # 加载电影类型
        print("Loading genre ...")
        session.run("""
            LOAD CSV WITH HEADERS FROM "file:///out_genre.csv" AS csv
            MERGE (m:Movie {title: csv.title})
            MERGE (g:Genre {genre: csv.genre})
            CREATE (m)-[:HAS_GENRE]->(g)
        """)

        # 加载关键词
        print("Loading keywords ...")
        session.run("""
            LOAD CSV WITH HEADERS FROM "file:///out_keyword.csv" AS csv
            MERGE(m:Movie {title: csv.title})
            MERGE(k:Keyword {keyword: csv.keyword})
            CREATE (m)-[:HAS_KEYWORD]->(k)
        """)

        # 加载导演
        print("Loading productors ...")
        session.run("""
            LOAD CSV WITH HEADERS FROM "file:///out_productor.csv" AS csv
            MERGE(m:Movie {title: csv.title})
            MERGE(p:Productor {name: csv.productor})
            CREATE (m)-[:HAS_PRODUCTOR]->(p)
        """)
        # -------------------读取文件完毕-------------------------


def queries():
    while True:
        userid = int(input("请输入要为哪位用户推荐电影,输入ID即可:"))
        m = int(input("为该用户推荐的电影个数:"))

        # 电影类型
        genre = []
        if int(input("是否筛选喜欢的类型?输入0或1: ")):
            # 排除的话
            with driver.session() as session:
                try:
                    # 查询所有类型,放入元组中
                    q = session.run(f"""MATCH (g:Genre) RETURN g.genre AS genre""")
                    result = []
                    for i, r in enumerate(q):
                        result.append(r["genre"])

                    # 将 genre 列转为 DataFrame 类型,并列出提示
                    df = pd.DataFrame(result, columns=["genre"])
                    print()
                    print(df)

                    # 根据上面的输出,输入类型
                    inp = input("请输入喜欢的类型,例如 1 2 3 : ")
                    if len(inp) != 0:
                        inp = inp.split(" ")
                        # TODO 这里是什么意思? lamuda表达式 ???
                        genre = [df["genre"].iloc[int(x)] for x in inp]
                finally:
                    print("Error")

        # 进行查询, 用户u1对电影的评分, 降序排序
        with driver.session() as session:
            q = session.run(f"""
                MATCH (u1:User {{ id:{userid} }})-[r:RATED]-(m:Movie)
                RETURN m.title AS title,r.grading AS grade
                ORDER BY grade DESC
            """)
            print()
            print("你评分过的电影如下所示: ")

            # 将 session 查询结果放入元组中
            result = []
            for r in q:
                result.append([r["title"], r["grade"]])

            # 输出结果, 用户对于电影的一个评分列表
            if len(result) == 0:
                print("没有结果推荐")
            else:
                df = pd.DataFrame(result, columns=["title", "grade"])
                print()
                print(df.to_string(index=True))
            print("---------------------------------------------------------------------------------------------------")

            # 删除用户相似性关系
            session.run(f"""
                MATCH (u1:User)-[s:SIMILARITY]-(u2:User)
                DELETE s
            """)

            # 重新计算用户相似性
            # 通过电影连接两个用户, u1 --rated-- movie --rated-- u2
            # 计算u1,u2共同评论过的电影,然后根据两个人的评分来计算相似度
            # (用户1评分 * 用户2评分)的总和,除以他们分别的根号平方和
            session.run(f"""
                MATCH (u1:User {{id : {userid}}})-[r1:RATED]-(m:Movie)-[r2:RATED]-(u2:User)
                WITH
                    u1, u2,
                    COUNT(m) AS movies_common,
                    SUM(r1.grading * r2.grading)/(SQRT( SUM(r1.grading^2) ) * SQRT( SUM(r2.grading^2) )) as sim
                WHERE movies_common >= {moives_common} AND sim > {threshold_sim}
                MERGE (u1)-[s:SIMILARITY]-(u2)
                SET s.sim = sim
            """)

            # 条件语句拼装, 过滤类型
            Q_GENRE = ""
            if len(genre) > 0:
                Q_GENRE = "AND ((SIZE(gen) > 0) AND "
                Q_GENRE += "(ANY(X IN " + str(genre) + " WHERE X IN gen))"
                Q_GENRE += ")"

            q = session.run(f"""
                MATCH (u1:User{{id : {userid}}})-[s:SIMILARITY]-(u2:User)
                WITH u1,u2,s
                ORDER BY s.sim DESC LIMIT {k}
                MATCH (m:Movie)-[r:RATED]-(u2)
                OPTIONAL MATCH (g:Genre)--(m)
                WITH u1,u2,s,m,r, COLLECT(DISTINCT g.genre) AS gen
                WHERE NOT((m)-[:RATED]-(u1)) {Q_GENRE}
                WITH
                    m.title AS title,
                    SUM(r.grading * s.sim)/SUM(s.sim) AS grade,
                    COUNT(u2) AS num,
                    gen
                WHERE num >= {usesrs_common}
                RETURN title,grade,num,gen
                ORDER BY grade DESC, num DESC
                LIMIT {m}
            """)

            print("推荐的电影:")
            result = []
            for r in q:
                result.append([r["title"], r["grade"], r["num"], r["gen"]])
            if len(result) == 0:
                print("无推荐")
                print()
                continue
            df = pd.DataFrame(result, columns=["title", "avg grade", "num recommenders", "genre"])
            print()
            print(df.to_string(index=True))
            print("---------------------------------------------------------------------------------------------------")


if __name__ == "__main__":
    if int(input("是否需要重新加载知识图谱? 输入0或1: ")):
        load_data()
    queries()

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
是否需要重新加载知识图谱? 输入0或1:  1


Loading movies ...
Loading gradings ... 
Loading genre ...
Loading keywords ...
Loading productors ...


请输入要为哪位用户推荐电影,输入ID即可: 944
为该用户推荐的电影个数: 10
是否筛选喜欢的类型?输入0或1:  1



              genre
0            Action
1         Adventure
2           Fantasy
3   Science Fiction
4             Crime
5         Animation
6            Family
7             Drama
8           Romance
9          Thriller
10           Comedy
11          Mystery
12              War
13          History
14          Western
15           Horror
16            Music
17      Documentary
18          Foreign
19         TV Movie


请输入喜欢的类型,例如 1 2 3 :  3


Error

你评分过的电影如下所示: 
没有结果推荐
---------------------------------------------------------------------------------------------------
推荐的电影:
无推荐
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
    声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/很楠不爱3/article/detail/610386
    推荐阅读
    相关标签
      

    闽ICP备14008679号