赞
踩
Java回炉重造(三)使用Apache Commons Text库计算文本相似性:jaccard相似系数、余弦相似度
https://code.csdn.net/u012995856/apache-commons-learn/tree/master
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>1.1</version>
</dependency>
TextSimilaryTest.java
package cn.pangpython.acl.text;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.text.similarity.CosineSimilarity;
import org.apache.commons.text.similarity.JaccardSimilarity;
/**
* @Project ApacheCommonsLearn
* @Package cn.pangpython.acl.text
* @Author pangPython
* @Time 下午10:53:59
*/
public class TextSimilaryTest {
public static void main(String[] args) {
//计算jaccard相似系数
JaccardSimilarity jaccardSimilarity = new JaccardSimilarity();
double jcdsimilary1 = jaccardSimilarity.apply("hello", "hell");
System.out.println("jcdsimilary1:"+jcdsimilary1);
double jcdsimilary2 = jaccardSimilarity.apply("this is an apple", "this is an app");
System.out.println("jcdsimilary2:"+jcdsimilary2);
//计算余弦相似度
CosineSimilarity cosineSimilarity = new CosineSimilarity();
Map<CharSequence, Integer> leftVector = new HashMap<>();
Map<CharSequence, Integer> rightVector = new HashMap<>();
leftVector.put("a", 1);
leftVector.put("b", 0);
leftVector.put("c", 1);
rightVector.put("a", 1);
rightVector.put("b", 1);
rightVector.put("c", 0);
double cosSimilary = cosineSimilarity.cosineSimilarity(leftVector, rightVector);
System.out.println("cosSimilary:"+cosSimilary);
}
}

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。