赞
踩
最近研究课题需要创建知识图谱,用python写了一篇代码,主要作用是从excel文件中读取节点和节点之间的关系,并创建到neo4j数据库中,最终效果是这样。
# coding:utf-8 import xlwt import xlrd from py2neo import Graph, Node, Relationship ##连接neo4j数据库,输入地址、用户名、密码 graph = Graph('http://localhost:7474', username='neo4j', password='root') book = xlrd.open_workbook('桥梁结构及其病害整理.xlsx') workSheetName = book.sheet_names() # ['桥梁结构', '桥梁病害', '桥梁结构关系', '病害关系', '桥梁和病害的关系'] print("Excel文件包含的表单有:"+str(workSheetName)) # 根据指定的表单名,一行一行获取指定表单中的所有数据,表单名为worksheetname def GetAllSheetCellValue(worksheetname): bridgeStructure = book.sheet_by_name(worksheetname) AllsheetValue = [] for i in range(bridgeStructure.nrows): for j in range(bridgeStructure.ncols): AllsheetValue.append(bridgeStructure.cell_value(i,j)) # print("《"+str(worksheetname)+"》"+"数据获取成功,请指定变量接收") return AllsheetValue # 根据指定的表单名,按列获取表单中的数据 def GetAllSheetValueByColum(worksheetname): bridgeStructure = book.sheet_by_name(worksheetname)#获取指定名称的表单 col_nums = bridgeStructure.ncols #获取指定表单的有效列数 # print(bridgeStructure) AllsheetValue = [] for i in range(col_nums): AllsheetValue.append(bridgeStructure.col_values(i)) # print(AllsheetValue) return AllsheetValue # 创建一个指定节点类,名字,创建节点,其他属性有需要自定拓展这个方法 def CreateNode(className,lableName,name): test_node= Node(className,lable = lableName, name=name) graph.create(test_node) # 指定表名,类名,名字,批量创建节点 def CreateNodes(worksheetname,lableName,ClassName): sheetvalue = GetAllSheetCellValue(worksheetname) nums = 0 for i in range(len(sheetvalue)): CreateNode(ClassName,lableName,sheetvalue[i]) nums+=1 print("创建"+worksheetname+"节点成功,总计创建%s个"%(nums)) # 根据需要创建节点的表名个数(有几个表就传输参数是几),批量创建节点,这个方法中默认构件类名就是图谱中的类名 # 参数说明 nums:在一个Excel文件中的需要创建节点的表单数 def CreateNodesBySheetNums(nums): for i in range(nums): CreateNodes(workSheetName[i], workSheetName[i],workSheetName[i]) #建立两个节点之间的关系 #参数说明 node1:节点1,node2:节点2,relationship:节点之间的关系 def CreateTwoNodeRelationship(node1,node2,relationship): relation = Relationship(node2, relationship, node1) graph.create(relation) # 根据Excel文件中两个对象列表和一个关系列表,建立两个列表之间的子类(subclassof)关系 # 参数说明 worksheetname:指定的表单名,className1:第一个类的名称,className2:第二个类的名称 def subclassRelationship(worksheetname, className1,className2): list1 = GetAllSheetValueByColum(worksheetname)[0] list2 = GetAllSheetValueByColum(worksheetname)[2] relationship = GetAllSheetCellValue(worksheetname)[1] print(list1,relationship,list2) # 利用Python执行CQL语句 # graph.run("match(a:桥梁结构),(b:桥梁病害) where a.name='支座' and b.name='冲蚀' CREATE (a)-[r:DIRECTED]->(b)") num=0 for i in range(len(list1)): num+=1 graph.run("match(a:%s),(b:%s) where a.name='%s' and b.name='%s' CREATE (a)-[r:%s]->(b)"%(className1,className2,str(list2[i]),str(list1[i]),str(relationship))) print('创建%d个关系成功'%(num)) CreateNodesBySheetNums(2) subclassRelationship(workSheetName[2],"桥梁结构","桥梁结构") subclassRelationship(workSheetName[3],"桥梁病害","桥梁病害") subclassRelationship(workSheetName[4],"桥梁病害","桥梁结构") print("程序运行完成")
我不知道怎样上传Excel文件,需要关注微信公众号【BIM技术应用交流】回复“知识图谱”拿到Excel文件下载链接。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。