赞
踩
数据集链接: https://pan.baidu.com/s/19EGElx2Ylb-DpQRrJ0F7og 提取码: tg7c
将原始数据的‘评论’一列抽取
抽取代码
#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:\下载\data\input\huizong.csv' #评论汇总文件
outputfile = 'D:\下载\data\output\meidi_jd.txt' #评论提取后保存路径
data = pd.read_csv(inputfile, encoding = 'utf-8')
data = data[[u'评论']][data[u'品牌'] == u'美的']
data.to_csv(outputfile, index = False, header = False, encoding = 'utf-8')
原始数据去重
#-*- coding: utf-8 -*-
import pandas as pd
inputfile = 'D:\下载\data\output\meidi_jd.txt' #评论文件
outputfile = 'D:\下载\data\output\meidi_jd_process_1.txt' #评论处理后保存路径
data = pd.read_csv(inputfile, encoding = 'utf-8', header = None)
l1 = len(data)
data = pd.DataFrame(data[0].unique())
l2 = len(data)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。