赞
踩
本文介绍了TextBlob的使用方法,这是一个用Python编写的开源的文本处理库。它可以用来执行很多自然语言处理的任务,比如,词性标注,名词性成分提取,情感分析,文本翻译,等等。
简介
官方文档:https://textblob.readthedocs.io/en/dev/
- # 安装:pip install textblob
- # 配置国内源安装:pip install textblob -i https://pypi.tuna.tsinghua.edu.cn/simple
- # 参考:https://textblob.readthedocs.io/en/dev/quickstart.html
- from textblob import TextBlob
- text = 'I love natural language processing! I am not like fish!'
- blob = TextBlob(text)
- blob.tags
-
- [('I', 'PRP'),
- ('love', 'VBP'),
- ('natural', 'JJ'),
- ('language', 'NN'),
- ('processing', 'NN'),
- ('I', 'PRP'),
- ('am', 'VBP'),
- ('not', 'RB'),
- ('like', 'IN'),
- ('fish', 'NN')]
- np = blob.noun_phrases
- for w in np:
- print(w)
-
- natural language processing
- for sentence in blob.sentences:
- print(sentence + '------>' + str(sentence.sentiment.polarity))
-
- I love natural language processing!------>0.3125
- i am not like you!------>0.0
- token = blob.words
- for w in token:
- print(w)
-
- I
- love
- natural
- language
- processing
- I
- am
- not
- like
- fish
- sentence = blob.sentences
- for s in sentence:
- print(s)
- I love natural language processing!
- I am not like fish!
- token = blob.words
- for w in token:
- # 变复数
- print(w.pluralize())
- # 变单数
- print(w.singularize())
-
- we
- I
- love
- love
- naturals
- natural
- languages
- language
- processings
- processing
- we
- I
- ams
- am
- nots
- not
- likes
- like
- fish
- fish
- from textblob import Word
- w = Word('went')
- print(w.lemmatize('v'))
- w = Word('octopi')
- print(w.lemmatize())
-
- go
- octopus
- from textblob.wordnet import VERB
- word = Word('octopus')
- syn_word = word.synsets
- for syn in syn_word:
- print(syn)
- Synset('octopus.n.01')
- Synset('octopus.n.02')
- # 指定返回的同义词集为动词
- syn_word1 = Word("hack").get_synsets(pos=VERB)
- for syn in syn_word1:
- print(syn)
-
- Synset('chop.v.05')
- Synset('hack.v.02')
- Synset('hack.v.03')
- Synset('hack.v.04')
- Synset('hack.v.05')
- Synset('hack.v.06')
- Synset('hack.v.07')
- Synset('hack.v.08')
-
- # 查看synset(同义词集)的具体定义
- Word("beautiful").definitions
-
- ['delighting the senses or exciting intellectual or emotional admiration',
- '(of weather) highly enjoyable']
- sen = 'I lvoe naturl language processing!'
- sen = TextBlob(sen)
- print(sen.correct())
-
- I love nature language processing!
-
- # Word.spellcheck()返回拼写建议以及置信度
- w1 = Word('good')
- w2 = Word('god')
- w3 = Word('gd')
- print(w1.spellcheck())
- print(w2.spellcheck())
- print(w3.spellcheck())
-
- [('good', 1.0)]
- [('god', 1.0)]
- [('go', 0.586139896373057), ('god', 0.23510362694300518), ('d', 0.11658031088082901), ('g', 0.03626943005181347), ('ed', 0.009067357512953367), ('rd', 0.006476683937823834), ('nd', 0.0038860103626943004), ('gr', 0.0025906735751295338), ('sd', 0.0006476683937823834), ('md', 0.0006476683937823834), ('id', 0.0006476683937823834), ('gdp', 0.0006476683937823834), ('ga', 0.0006476683937823834), ('ad', 0.0006476683937823834)]
- text = TextBlob('I lvoe naturl language processing!')
- print(text.parse())
-
- I/PRP/B-NP/O lvoe/NN/I-NP/O naturl/NN/I-NP/O language/NN/I-NP/O processing/NN/I-NP/O !/./O/O
- text = TextBlob('I lvoe naturl language processing!')
- print(text.ngrams(n=2))
-
- [WordList(['I', 'lvoe']), WordList(['lvoe', 'naturl']), WordList(['naturl', 'language']), WordList(['language', 'processing'])]
- # 一个使用TextBlob进行Naive Bayes classifier
- # 参考:https://textblob.readthedocs.io/en/dev/classifiers.html#classifiers
- # 1.准备数据集:训练集和测试集
- train = [
- ... ('I love this sandwich.', 'pos'),
- ... ('this is an amazing place!', 'pos'),
- ... ('I feel very good about these beers.', 'pos'),
- ... ('this is my best work.', 'pos'),
- ... ("what an awesome view", 'pos'),
- ... ('I do not like this restaurant', 'neg'),
- ... ('I am tired of this stuff.', 'neg'),
- ... ("I can't deal with this", 'neg'),
- ... ('he is my sworn enemy!', 'neg'),
- ... ('my boss is horrible.', 'neg')
- ... ]
- test = [
- ... ('the beer was good.', 'pos'),
- ... ('I do not enjoy my job', 'neg'),
- ... ("I ain't feeling dandy today.", 'neg'),
- ... ("I feel amazing!", 'pos'),
- ... ('Gary is a friend of mine.', 'pos'),
- ... ("I can't believe I'm doing this.", 'neg')
- ... ]
-
- # 2.创建朴素贝叶斯分类器
- from textblob.classifiers import NaiveBayesClassifier
-
- # 3.把训练丢进去训练
- nb_model = NaiveBayesClassifier(train)
-
- # 4.预测新来的样本
- dev_sen = "This is an amazing library!"
- print(nb_model.classify(dev_sen))
-
- pos
-
- # 也可以计算属于某一类的概率
- dev_sen_prob = nb_model.prob_classify(dev_sen)
- print(dev_sen_prob.prob("pos"))
-
- 0.980117820324005
-
- # 5.计算模型在测试集上的精确度
- print(nb_model.accuracy(test))
-
- 0.8333333333333334
— 完 —
扫码关注人工智能头条 围观一个假的 AI
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。