赞
踩
用的是原始的xml数据,选择v2版本,根据这篇文章,进行格式转化,转换成csv文件
import xml.etree.cElementTree as ET path = 'Restaurants_Train_v2.xml' tree = ET.parse(path) root = tree.getroot() # category级别 data = [] for sentence in root.findall('sentence'): text = sentence.find('text').text aspectCategories = sentence.find('aspectCategories') for aspectCategory in aspectCategories.findall('aspectCategory'): category = aspectCategory.get('category') polarity = aspectCategory.get('polarity') data.append((text, category, polarity)) import pandas as pd df=pd.DataFrame(data,columns=['text','category','polarity']) df.to_csv('restaurant_train_category.csv',index=False) df.head()
# aspect级别 data=[] for sentence in root.findall('.//aspectTerms/..'): text = sentence.find('text').text aspectTerms=sentence.find('aspectTerms') for aspectTerm in aspectTerms.findall('aspectTerm'): term = aspectTerm.get('term') polarity = aspectTerm.get('polarity') data.append((text, term, polarity)) df = pd.DataFrame(data,columns=['text', 'term', 'polarity']) df = df[df['polarity'].isin(['positive', 'negative', 'neutral'])] df['polarity'] = df['polarity'].map( { 'positive': 1, 'neutral': 0, 'negative': -1}) df.to_csv('restaurant_train_aspectterm.csv',index=0
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。