赞
踩
对于大段大段的英文txt文本,可以用open指令打开,手动建立stopwords字典,进行停用词处理。(附上通用词)
with open('E:\\DATA\\520only abstract.txt','r',encoding='UTF-8')as f: #打开需要处理的txt文件 t=f.read()#文本命名为t from nltk import word_tokenize#分词 word_tokens=word_tokenize(t)#对t分词 stopwords=['d','ll','m','re','s' ,'t','ve','ZT','ZZ','a','able','about','above','abst','accordance','according','accordingly','across','act','actually','added','adj','adopted','affected',\ 'affecting','affects','after','afterwards','again','against','ah','all','allow','allows','almost','alone','along','already','also','although','always','am','among','amongst','an','and','announce','another','any',\ 'anybody','anyhow','anymore','anyone','anything','anyway','anyways','anywhere','apart','apparently','appear','appreciate','appropriate','approximately','are','area','areas','aren','arent','arise','around',\ 'as','aside','ask','asked','asking','asks','associated','at','auth','available','away','awfully','back','backed','backing','backs','be','became','because','become','becomes','becoming','been','before',\ 'beforehand','began','begin','beginning','beginnings','begins','behind','being','beings','believe','below','beside','besides','best','better','between','beyond','big','biol','both','brief','briefly','but','by',\ 'ca','came','can','cannot','cant','case','cases','cause','causes','certain','certainly','changes','clear','clearly','co','com','come','comes','concerning','consequently','consider','considering','containing','contains',\ 'corresponding','could','couldnt','course','currently','date','definitely','describe','described','despite','did','differ','different','differently','discuss','do','does','doing','done','down','downed','downing',\ 'downs','downwards','due','during','each','early','ed','edu','effect','eg','eight','eighty','either','else','elsewhere','end','ended','ending','ends','enough','entirely','especially','et','et-al','etc','even','evenly',\ 'ever','every','everybody','everyone','everything','everywhere','ex','exactly','example','except','face','faces','fact','facts','far','felt','few','ff','fifth','find','finds','first','five','fix','followed','following','follows',\ 'for','former','formerly','forth','found','four','from','full','fully','further','furthered','furthering','furthermore','furthers','gave','general','generally','get','gets','getting','give','given','gives','giving','go','goes',\ 'going','gone','good','goods','got','gotten','great','greater','greatest','greetings','group','grouped','grouping','groups','had','happens','hardly','has','have','having','he','hed','hello','help','hence','her',\ 'here','hereafter','hereby','herein','heres','hereupon','hers','herself','hes','hi','hid','high','higher','highest','him','himself','his','hither','home','hopefully','how','howbeit','however','hundred','id','ie','if',\ 'ignored','im','immediate','immediately','importance','important','in','inasmuch','inc','include','indeed','index','indicate','indicated','indicates','information','inner','insofar','instead','interest','interested',\ 'interesting','interests','into','invention','inward','is','isn','t','it','itd','its','itself','j','just','k','keep','keeps','kept','keys','kg','kind','km','knew','know','known','knows','large','largely','last','lately','later',\ 'latest','latter','latterly','least','less','lest','let','lets','like','liked','likely','line','little','long','longer','longest','look','looking','looks','ltd','m','made','mainly','make','makes','making','man','many','may', \ 'line', 'little', 'long', 'longer', 'longest', 'look','looking','looks','ltd','m','made', 'mainly', 'make', 'makes', 'making', 'man', 'many', 'may','maybe', 'me', 'mean', 'means', 'meantime', 'meanwhile', 'member', 'members', 'men', 'merely', 'mg',\ 'might', 'million', 'miss', 'ml', 'more', 'moreover', 'most', 'mostly', 'mr', 'mrs', 'much', 'mug','must', 'my', 'myself','n', 'na', 'name', 'namely', 'nay', 'nd', 'near', 'nearly', 'necessarily', 'necessary', 'need', 'needed',\ 'needing', 'needs', 'neither', 'never', 'nevertheless', 'new', 'newer', 'newest', 'next', 'nine','ninety', 'no', 'nobody', 'non','none', 'nonetheless', 'noone', 'nor', 'normally', 'nos', 'not', 'noted', 'nothing', 'novel', 'now',\ 'nowhere', 'number', 'numbers', 'o', 'obtain', 'obtained', 'obviously', 'of', 'off', 'often', 'oh', 'ok','okay', 'old', 'older', 'oldest','omitted', 'on', 'once', 'one', 'ones', 'only', 'onto', 'open', 'opened', 'opening', 'opens', 'or',\ 'ord', 'order', 'ordered', 'ordering', 'orders', 'other', 'others', 'otherwise', 'ought', 'our', 'ours','ourselves', 'out', 'outside', 'over','overall', 'owing', 'own', 'p', 'page', 'pages', 'part', 'parted', 'particular', 'particularly',\ 'parting', 'parts', 'past', 'per', 'perhaps', 'place', 'placed', 'places', 'please', 'plus', 'point','pointed', 'pointing', 'take', 'taken', 'taking','tell', 'tends', 'th', 'than', 'thank', 'thanks', 'thanx', 'that', 'the', 'their', 'theirs', 'them',\ 'themselves', 'then', 'thence', 'there', 'thereafter', 'thereby', 'thered', 'therefore', 'therein','thereof', 'therere', 'theres', 'thereto','thereupon', 'these', 'they', 'theyd', 'theyre', 'thing', 'things','think', 'thinks', 'third', 'this', 'thorough', 'thoroughly', 'those', 'thou', 'though', 'thoughh',\ 'thought', 'thoughts', 'thousand', 'three', 'throug', 'through', 'throughout', 'thru', 'thus', 'til','tip', 'to', 'today', 'together', 'too', 'took', 'toward', 'towards', 'tried', 'tries', 'truly', 'try','trying', 'ts', 'turn', 'turned', 'turning', 'turns', 'twice', 'two', 'un', 'under', 'unfortunately',\ 'unless','unlike', 'unlikely', 'until', 'unto', 'up', 'upon', 'ups', 'us', 'use', 'used', 'useful', 'usefully','usefulness', 'uses', 'using', 'usually', 'uucp', 'value', 'various', 'very', 'via', 'viz', 'vol','vols', 'vs', 'want', 'wanted', 'wanting' 'wants', 'was', 'way', 'ways', 'we', 'wed', 'welcome', 'well',\ 'wells', 'went', 'were', 'what', 'whatever', 'whats', 'when', 'whence', 'whenever', 'where','whereafter', 'whereas', 'whereby', 'wherein', 'wheres', 'whereupon','wherever', 'whether', 'which', 'while', 'whim','whither', 'who', 'whod', 'whoever', 'whole', 'whom', 'whomever', 'whos', 'whose', 'why', 'widely',\ 'will', 'willing', 'wish', 'with', 'within', 'without', 'wonder','words', 'work', 'worked', 'working', 'work','world', 'would', 'www', 'year', 'years', 'yes', 'yet', 'you', 'youd', 'young', 'younger', 'youngest','your', 'youre', 'yours', 'yourself', 'yourselves', 'z', 'zero', 'zt', 'zz']#导入stopwords filtered_sentence = []#定义输出 for w in word_tokens: if w not in stopwords: filtered_sentence.append(w)#for循环 print("\n\nFiltered Sentence \n\n") print(" ".join(filtered_sentence)) #输出结果
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。