public void testSegment() throws Exception
{
String text = "商品和服务";
List<Term> termList = DKNLPBase.segment(text);
assertEquals("商品", termList.get(0).word);
assertEquals("和", termList.get(1).word);
assertEquals("服务", termList.get(2).word);
text = "柯杰解说“李世石VS阿法狗第二局” 结局竟是这样";
termList = DKNLPBase.segment(text);
assertEquals("阿法狗", termList.get(5).word); //能够识别"阿法狗"
}
List<String> extractKeyword(String txt,int keySum);
public void testExtractKeyword() throws Exception
{
String content = "程序员(英文Programmer)是从事程序开发、维护的专业人员。" +
"一般将程序员分为程序设计人员和程序编码人员," +
"但两者的界限并不非常清楚,特别是在中国。" +
"软件从业人员分为初级程序员、高级程序员、系统" +
"分析员和项目经理四大类。";
List<String> keyword = DKNLPBase.extractKeyword(content, 1);
assertEquals(1, keyword.size());
assertEquals("程序员", keyword.get(0));
}
List<String> extractPhrase(String txt, int phSum);