赞
踩
第一种:针对单个单词进行词性标注
- import java.io.IOException;
- import edu.stanford.nlp.tagger.maxent.MaxentTagger;
-
- public class tagger
- {
- public static void main(String[] args) throws IOException,ClassNotFoundException
- {
- // Initialize the tagger
- MaxentTagger tagger = new MaxentTagger("F:/stanford-postagger-2014-06-16/models/english-left3words-distsim.tagger");
- // The sample string
- String sample = "text";
- // The tagged string
- String tagged = tagger.tagString(sample);
-
- // Output the result
- System.out.println(tagged);
- }
-
- }
- class TaggerDemo {
-
- private TaggerDemo() {}
-
- public static void main(String[] args) throws Exception
- {
- if (args.length != 2)
- {
- System.err.println("usage: java TaggerDemo modelFile fileToTag");
-
- return;
- }
- MaxentTagger tagger = new MaxentTagger("F:\\stanford-postagger-2014-06-16\\models\\english-bidirectional-distsim.tagger");
-
- List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader("F:\trigger.txt")));
-
- for (List<HasWord> sentence : sentences)
- {
- List<TaggedWord> tSentence = tagger.tagSentence(sentence);
-
- System.out.println(Sentence.listToString(tSentence, false));
- }
- }
-
- }
第三种:读取文本文件,对文件进行词性标注
- import java.io.BufferedReader;
- import java.io.FileInputStream;
- import java.io.InputStreamReader;
- import java.io.OutputStreamWriter;
- import java.io.PrintWriter;
- import java.util.List;
-
- import edu.stanford.nlp.ling.Sentence;
- import edu.stanford.nlp.ling.TaggedWord;
- import edu.stanford.nlp.ling.HasWord;
- import edu.stanford.nlp.ling.CoreLabel;
- import edu.stanford.nlp.process.CoreLabelTokenFactory;
- import edu.stanford.nlp.process.DocumentPreprocessor;
- import edu.stanford.nlp.process.PTBTokenizer;
- import edu.stanford.nlp.process.TokenizerFactory;
- import edu.stanford.nlp.tagger.maxent.MaxentTagger;
-
- /** This demo shows user-provided sentences (i.e., {@code List<HasWord>})
- * being tagged by the tagger. The sentences are generated by direct use
- * of the DocumentPreprocessor class.
- *
- * @author Christopher Manning
- */
- class TaggerDemo2 {
-
- private TaggerDemo2() {}
-
- public static void main(String[] args) throws Exception
- {
-
- if (args.length != 2)
- {
- System.err.println("usage: java TaggerDemo2 modelFile fileToTag");
-
- return;
- }
-
- MaxentTagger tagger = new MaxentTagger(args[0]);
-
- TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
- "untokenizable=noneKeep");
-
- BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
-
- PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
-
- DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
-
- documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
-
- for (List<HasWord> sentence : documentPreprocessor)
- {
- List<TaggedWord> tSentence = tagger.tagSentence(sentence);
-
- pw.println(Sentence.listToString(tSentence, false));
- }
-
- // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
- List<HasWord> sent = Sentence.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
-
- List<TaggedWord> taggedSent = tagger.tagSentence(sent);
-
- for (TaggedWord tw : taggedSent)
- {
- if (tw.tag().startsWith("JJ"))
- {
- pw.println(tw.word());
- }
- }
-
- pw.close();
- }
-
- }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。