斯坦福词性标注Demo_tagger = new maxenttagger(

作者：Cpp五条 | 2024-05-20 20:55:30

踩

tagger = new maxenttagger(

第一种：针对单个单词进行词性标注


import java.io.IOException;
import edu.stanford.nlp.tagger.maxent.MaxentTagger; 
 
public class tagger 
{    
	public static void main(String[] args) throws IOException,ClassNotFoundException 
	{       
		// Initialize the tagger        
		MaxentTagger tagger = new MaxentTagger("F:/stanford-postagger-2014-06-16/models/english-left3words-distsim.tagger");         
		// The sample string        
		String sample = "text";         
		// The tagged string        
		String tagged = tagger.tagString(sample); 
		
		// Output the result        
		System.out.println(tagged);   
    }
 
}

第二种：对单句话进行词性标注


class TaggerDemo {
 
	private TaggerDemo() {}
 
	public static void main(String[] args) throws Exception 
    {
		if (args.length != 2) 
	    {
			System.err.println("usage: java TaggerDemo modelFile fileToTag");
		    
			return;
        }
        MaxentTagger tagger = new MaxentTagger("F:\\stanford-postagger-2014-06-16\\models\\english-bidirectional-distsim.tagger");
      
        List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader("F:\trigger.txt")));
        
        for (List<HasWord> sentence : sentences) 
        {
        	List<TaggedWord> tSentence = tagger.tagSentence(sentence);
    	  
    	    System.out.println(Sentence.listToString(tSentence, false));
        }
    }
 
}

第三种：读取文本文件，对文件进行词性标注


import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.List;
 
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
 
/** This demo shows user-provided sentences (i.e., {@code List<HasWord>})
 *  being tagged by the tagger. The sentences are generated by direct use
 *  of the DocumentPreprocessor class.
 *
 *  @author Christopher Manning
 */
class TaggerDemo2 {
 
	private TaggerDemo2() {}
 
    public static void main(String[] args) throws Exception 
    {
	
    	if (args.length != 2) 
    	{
    		System.err.println("usage: java TaggerDemo2 modelFile fileToTag");
            
    		return;
        }
    
    	MaxentTagger tagger = new MaxentTagger(args[0]);
    	
    	TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
									   "untokenizable=noneKeep");
    	
    	BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
    	
    	PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
    	
    	DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
    	
    	documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
    	
    	for (List<HasWord> sentence : documentPreprocessor)
    	{
    		List<TaggedWord> tSentence = tagger.tagSentence(sentence);
    		
    		pw.println(Sentence.listToString(tSentence, false));
    	}
 
    	// print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
    	List<HasWord> sent = Sentence.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
    	
    	List<TaggedWord> taggedSent = tagger.tagSentence(sent);
    	
    	for (TaggedWord tw : taggedSent) 
    	{
    		if (tw.tag().startsWith("JJ")) 
    		{
    			pw.println(tw.word());
    		}
    	}
 
    	pw.close();
    }
 
}

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/Cpp五条/article/detail/599268