- from nltk.stem import WordNetLemmatizer
- wnl = WordNetLemmatizer()
- #print(wnl.lemmatize('countries','apples'))
- ff=open("xxx.txt",'a',encoding='UTF-8')
- list=[];
- with open("xxx.txt",'r',encoding='utf-8')as f:
- for line in f:
- LineList=[];
- for word in line.split():
- a=wnl.lemmatize(word)
- LineList.append(a);
- list.append(LineList);
- for line in list:
- print(line,file=ff);

- # -*- coding: utf8 -*-
- import spacy
- nlp = spacy.load('en', disable=['parser', 'ner'])
- texts=[["x"],["xx"]]
- def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']):
- """https://spacy.io/api/annotation"""
- texts_out = []
- for sent in texts:
- doc = nlp(" ".join(sent))
- texts_out.append([token.lemma_ for token in doc if token.pos_ in allowed_postags])
- return texts_out
- # Do lemmatization keeping only noun, adj, vb, adv
- data_lemmatized = lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'])
- data=str(data_lemmatized)
- f=open('xxx.txt','w',encoding='utf-8')
- f.writelines(data)
- f.close()

