赞
踩
- import java.io.*;
-
- import opennlp.tools.sentdetect.SentenceDetectorME;
- import opennlp.tools.sentdetect.SentenceModel;
- import opennlp.tools.tokenize.Tokenizer;
- import opennlp.tools.tokenize.TokenizerME;
- import opennlp.tools.tokenize.TokenizerModel;
- import opennlp.tools.util.Span;
-
-
-
-
- public class Testing_openNLP {
-
-
- /* http://opennlp.apache.org/documentation/1.5.3/manual/opennlp.html 官方教程Apache OpenNLP Developer Documentation
- * openNLP 中的各种模型可以在 http://opennlp.sourceforge.net/models-1.5/ 下载
- * http://www.programcreek.com/2012/05/opennlp-tutorial/ this is good tutorial about openNLP tools
- *
- * */
-
- public static void main(String[] args) {
- // String testString = "This isn't the greatest example sentence in the world because I've seen better. Neither is this one. This one's not bad, though.";
- String testString = "Hi. How are you? This is &3 $444 Mike." ;
-
- String tokens[] = Token(testString);
- String sentences[] = sentenceSegmentation(testString);
- String aa = "";
-
- }
-
- //分句
- public static String[] sentenceSegmentation(String str){
- try {
- InputStream modelIn = new FileInputStream("en-sent.bin");
- SentenceModel model = null;
- try {
- model = new SentenceModel(modelIn);
- }
- catch (IOException e) {
- e.printStackTrace();
- }
- finally {
- if (modelIn != null) {
- try {
- modelIn.close();
- }
- catch (IOException e) {
- }
- }
- }
-
- SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
- String sentences[] = sentenceDetector.sentDetect(str);
- return sentences;
-
- } catch (FileNotFoundException e1) {
- e1.printStackTrace();
- return null;
- }
- }
-
-
- //分词
- public static String[] Token(String str){
- try{
- InputStream modelIn = new FileInputStream("en-token.bin");
- TokenizerModel model = null;
- try {
- model = new TokenizerModel(modelIn);
- }
- catch (IOException e) {
- e.printStackTrace();
- }
- finally {
- if (modelIn != null) {
- try {
- modelIn.close();
- }
- catch (IOException e) {
- }
- }
- }
-
- TokenizerME tokenizer = new TokenizerME(model);
- String tokens[] = tokenizer.tokenize(str);
- // double tokenProbs[] = tokenizer.getTokenProbabilities();//must be called directly after one of the tokenize methods was called.
- return tokens;
- }
- catch(FileNotFoundException e){return null;}
- }
-
- }
-
-
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。