当前位置:   article > 正文

斯坦福词性标注Demo_tagger = new maxenttagger(

tagger = new maxenttagger(

第一种:针对单个单词进行词性标注

  1. import java.io.IOException;
  2. import edu.stanford.nlp.tagger.maxent.MaxentTagger;
  3. public class tagger
  4. {
  5. public static void main(String[] args) throws IOException,ClassNotFoundException
  6. {
  7. // Initialize the tagger
  8. MaxentTagger tagger = new MaxentTagger("F:/stanford-postagger-2014-06-16/models/english-left3words-distsim.tagger");
  9. // The sample string
  10. String sample = "text";
  11. // The tagged string
  12. String tagged = tagger.tagString(sample);
  13. // Output the result
  14. System.out.println(tagged);
  15. }
  16. }

第二种:对单句话进行词性标注

  1. class TaggerDemo {
  2. private TaggerDemo() {}
  3. public static void main(String[] args) throws Exception
  4. {
  5. if (args.length != 2)
  6. {
  7. System.err.println("usage: java TaggerDemo modelFile fileToTag");
  8. return;
  9. }
  10. MaxentTagger tagger = new MaxentTagger("F:\\stanford-postagger-2014-06-16\\models\\english-bidirectional-distsim.tagger");
  11. List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader("F:\trigger.txt")));
  12. for (List<HasWord> sentence : sentences)
  13. {
  14. List<TaggedWord> tSentence = tagger.tagSentence(sentence);
  15. System.out.println(Sentence.listToString(tSentence, false));
  16. }
  17. }
  18. }
第三种:读取文本文件,对文件进行词性标注


  1. import java.io.BufferedReader;
  2. import java.io.FileInputStream;
  3. import java.io.InputStreamReader;
  4. import java.io.OutputStreamWriter;
  5. import java.io.PrintWriter;
  6. import java.util.List;
  7. import edu.stanford.nlp.ling.Sentence;
  8. import edu.stanford.nlp.ling.TaggedWord;
  9. import edu.stanford.nlp.ling.HasWord;
  10. import edu.stanford.nlp.ling.CoreLabel;
  11. import edu.stanford.nlp.process.CoreLabelTokenFactory;
  12. import edu.stanford.nlp.process.DocumentPreprocessor;
  13. import edu.stanford.nlp.process.PTBTokenizer;
  14. import edu.stanford.nlp.process.TokenizerFactory;
  15. import edu.stanford.nlp.tagger.maxent.MaxentTagger;
  16. /** This demo shows user-provided sentences (i.e., {@code List<HasWord>})
  17. * being tagged by the tagger. The sentences are generated by direct use
  18. * of the DocumentPreprocessor class.
  19. *
  20. * @author Christopher Manning
  21. */
  22. class TaggerDemo2 {
  23. private TaggerDemo2() {}
  24. public static void main(String[] args) throws Exception
  25. {
  26. if (args.length != 2)
  27. {
  28. System.err.println("usage: java TaggerDemo2 modelFile fileToTag");
  29. return;
  30. }
  31. MaxentTagger tagger = new MaxentTagger(args[0]);
  32. TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
  33. "untokenizable=noneKeep");
  34. BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
  35. PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
  36. DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
  37. documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
  38. for (List<HasWord> sentence : documentPreprocessor)
  39. {
  40. List<TaggedWord> tSentence = tagger.tagSentence(sentence);
  41. pw.println(Sentence.listToString(tSentence, false));
  42. }
  43. // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
  44. List<HasWord> sent = Sentence.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
  45. List<TaggedWord> taggedSent = tagger.tagSentence(sent);
  46. for (TaggedWord tw : taggedSent)
  47. {
  48. if (tw.tag().startsWith("JJ"))
  49. {
  50. pw.println(tw.word());
  51. }
  52. }
  53. pw.close();
  54. }
  55. }


声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Cpp五条/article/detail/599268
推荐阅读
相关标签
  

闽ICP备14008679号