赞
踩
对句子进行数据清洗
- def remove_stopwords(sen): #去除停用词
- sen_new = " ".join([i for i in sen if i not in stop_words])
- return sen_new
-
- def clean(sentences): #数据过滤
- clean_sentences = pd.Series(sentences).str.replace("[^a-zA-Z]", " ",regex=True) #匹配所有非字母字符 将其替换为空
- clean_sentences = [s.lower() for s in clean_sentences] #转换为全小写字母
- clean_sentences = [remove_stopwords(r.split())for r in clean_sentences] #去除停用词
- return clean_sentences
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。