赞
踩
比较两段文本内容的差异,网上都没有合适的算法,只能自己写了
,效果如下图
- <html>
- <head>
- <meta charset="UTF-8">
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
- <meta http-equiv="X-UA-Compatible" content="ie=edge">
- <title></title>
- </head>
- <body>
- 文本1<br>
- <textarea id="text1" cols="100" rows="10">张三李四</textarea><br>
- 文本2<br>
- <textarea id="text2" cols="100" rows="10">李四张三张三</textarea><br>
- <button type="button" onclick="compare()">比对</button><br><br>
- 结果1<br>
- <pre id="result1"></pre>
- 结果2<br>
- <pre id="result2"></pre>
-
- <script type="text/javascript">
- function compare() {
- // 获取要比对的文本
- let text1 = document.getElementById('text1').value;
- let text2 = document.getElementById('text2').value;
-
- // 进行比对
- let {result1, result2} = getHighlight(text1, text2);
-
- // 渲染比对结果
- document.getElementById('result1').innerHTML = result1;
- document.getElementById('result2').innerHTML = result2;
- }
-
- /**
- * 获取高亮文本
- *
- * @param text1 文本1
- * @param text2 文本2
- * @param highlightBefore 高亮前缀
- * @param highlightAfter 高亮后缀
- * @return {result2: string, result1: string} 高亮后的字符串
- */
- function getHighlight(text1, text2, highlightBefore, highlightAfter) {
- // 初始化高亮文本
- highlightBefore = highlightBefore ? highlightBefore : '<span style="color:red;">';
- highlightAfter = highlightAfter ? highlightAfter : '</span>';
-
- // 字符数组
- let char1s = [];
- let char2s = [];
- // 偏移量
- let char2Index = 0;
-
- // 初始化字符数组对象
- for (let i in text1) {
- char1s.push({
- value: text1[i],
- highlight: false
- });
- }
- for (let i in text2) {
- char2s.push({
- value: text2[i],
- highlight: false
- });
- }
-
- for (let i = 0; i < char1s.length; i++, char2Index++) {
- console.log(i, char2Index);
- // 如果文本1结尾有东西
- if (char2Index > char2s.length - 1) {
- char1s[i].highlight = true;
- continue;
- }
-
- let char1 = char1s[i].value;
- let char2 = char2s[char2Index].value;
- console.log(`char1: ${char1}, char2: ${char2}`);
- // 如果字符相等
- if (char1 === char2) {
- continue;
- }
-
- /**
- * 初始化比对粒度
- *
- * 如果连续3个一样的字符,就说明比对成功
- * 比对粒度太大,细小的部分可能比对不出来
- * 比对粒度太小,重复率高的话容易被比对出来,容易比对错
- * 正常调成3就够了,除非重复率特别高的,可以往上调,但建议不超过5
- * 最低也不要低于2,不可调成1,否则只要有一个字符一样的,就会被识别出来
- */
- let granularity;
- if (char1s.length < 10) {
- granularity = 2;
- } else if (char1s.length < 1000) {
- granularity = 3;
- } else if (char1s.length < 100000) {
- granularity = 4;
- } else {
- granularity = 5;
- }
-
- // 快到结尾时调小比对粒度
- if (i > char1s.length - granularity) {
- granularity = char1s.length - i;
- console.log('修改比对粒度', granularity);
- }
-
- // 如果剩余长度小于比对粒度,则不进行比对
- if (char1s.length - i < granularity) {
- break;
- }
-
- // 用来判断是否找到一样的
- let end = -1;
-
- // 进行循环比对
- // j: 从char2s的哪个索引开始比对
- for (let j = char2Index + 1; j < char2s.length - granularity + 1; j++) {
- // 比对factor个
- let equals = true;
- for (let k = j, offset = 0; k < j + granularity; k++, offset++) {
- console.log('第' + offset + '次比对', char1s[i + offset].value, char2s[k].value, i + offset, k);
- if (char1s[i + offset].value !== char2s[k].value) {
- equals = false;
- break;
- }
- }
-
- // 如果相等,结束比对
- if (equals) {
- end = j;
- console.log('比对成功, end=' + end + ', 比对粒度' + granularity);
- break;
- } else {
- console.log('比对失败, 比对粒度' + granularity);
- }
- }
- console.log('比对结束,end=' + end);
-
- // 如果找到了
- if (end !== -1) {
- for (let k = char2Index; k < end; k++) {
- char2s[k].highlight = true;
- char2Index++;
- console.log('设置高亮', char2s[k]);
- }
- }
-
- // 如果没找到,就让自己高亮
- else {
- char1s[i].highlight = true;
- char2Index--;
- console.log('找不到' + char1);
- }
- }
-
- // 遍历最后多出来的文本2,全部高亮
- for (let i = char2Index; i < char2s.length; i++) {
- char2s[i].highlight = true;
- }
-
- // 将字符数组对象转换成高亮形式
- let result1 = char1s.map(char => char.highlight ? highlightBefore + char.value + highlightAfter : char.value).join('');
- let result2 = char2s.map(char => char.highlight ? highlightBefore + char.value + highlightAfter : char.value).join('');
- return {result1, result2};
- }
- </script>
- </body>
- </html>
另外提供了Java版本,算法是一样的,翻译过来的而已
Char.java
- /**
- * 用于描述一个字符
- */
- class Char {
- /**
- * 字符
- */
- private Character value;
-
- /**
- * 高亮
- */
- private Boolean highlight;
-
- public Char(Character value) {
- this.value = value;
- this.highlight = false;
- }
-
- public Character getValue() {
- return value;
- }
-
- public void setValue(Character value) {
- this.value = value;
- }
-
- public Boolean getHighlight() {
- return highlight;
- }
-
- public void setHighlight(Boolean highlight) {
- this.highlight = highlight;
- }
-
- @Override
- public String toString() {
- return "Char{" +
- "value=" + value +
- ", highlight=" + highlight +
- '}';
- }
- }
CompareResult.java
- /**
- * 比对结果
- *
- * @author 猴哥
- */
- public class CompareResult {
- /**
- * 结果1
- */
- private String result1;
-
- /**
- * 结果2
- */
- private String result2;
-
- public CompareResult(String result1, String result2) {
- this.result1 = result1;
- this.result2 = result2;
- }
-
- public String getResult1() {
- return result1;
- }
-
- public void setResult1(String result1) {
- this.result1 = result1;
- }
-
- public String getResult2() {
- return result2;
- }
-
- public void setResult2(String result2) {
- this.result2 = result2;
- }
-
- @Override
- public String toString() {
- return "CompareResult{" +
- "result1='" + result1 + '\'' +
- ", result2='" + result2 + '\'' +
- '}';
- }
- }
CompareUtil.java
- import java.util.Arrays;
- import java.util.Optional;
- import java.util.stream.Collectors;
-
- /**
- * 比较字符串工具类
- *
- * @author 猴哥
- */
- public class CompareUtil {
- private CompareUtil() {}
-
- /**
- * 获取高亮文本
- *
- * @param text1 文本1
- * @param text2 文本2
- * @param highlightBefore 高亮前缀
- * @param highlightAfter 高亮后缀
- * @return 高亮后的字符串
- */
- public static CompareResult compare(String text1, String text2, String highlightBefore, String highlightAfter) {
- // 判断非空
- text1 = Optional.ofNullable(text1).orElse("");
- text2 = Optional.ofNullable(text2).orElse("");
-
- // 字符数组
- Char[] char1s = new Char[text1.length()];
- Char[] char2s = new Char[text2.length()];
- // 偏移量
- int char2Index = 0;
-
- // 初始化字符数组对象
- for (int i = 0; i < text1.length(); i++) {
- char1s[i] = new Char(text1.charAt(i));
- }
- for (int i = 0; i < text2.length(); i++) {
- char2s[i] = new Char(text2.charAt(i));
- }
-
- for (int i = 0; i < char1s.length; i++, char2Index++) {
- // 如果文本1结尾有东西
- if (char2Index > char2s.length - 1) {
- char1s[i].setHighlight(true);
- continue;
- }
-
- char char1 = char1s[i].getValue();
- char char2 = char2s[char2Index].getValue();
- System.out.println("char1: " + char1 + ", char2: " + char2);
- // 如果字符相等
- if (char1 == char2) {
- continue;
- }
-
- /*
- * 初始化比对粒度
- *
- * 如果连续3个一样的字符,就说明比对成功
- * 比对粒度太大,细小的部分可能比对不出来
- * 比对粒度太小,重复率高的话容易被比对出来,容易比对错
- * 正常调成3就够了,除非重复率特别高的,可以往上调,但建议不超过5
- * 最低也不要低于2,不可调成1,否则只要有一个字符一样的,就会被识别出来
- */
- int granularity;
- if (char1s.length < 10) {
- granularity = 2;
- } else if (char1s.length < 1000) {
- granularity = 3;
- } else if (char1s.length < 100000) {
- granularity = 4;
- } else {
- granularity = 5;
- }
-
- // 快到结尾时调小比对粒度
- if (i > char1s.length - granularity) {
- granularity = char1s.length - i;
- System.out.println("修改比对粒度: " + granularity);
- }
-
- // 如果剩余长度小于比对粒度,则不进行比对
- if (char1s.length - i < granularity) {
- break;
- }
-
- // 用来判断是否找到一样的
- int end = -1;
-
- // 进行循环比对
- // j: 从char2s的哪个索引开始比对
- for (int j = char2Index + 1; j < char2s.length - granularity + 1; j++) {
- // 比对factor个
- boolean equals = true;
- for (int k = j, offset = 0; k < j + granularity; k++, offset++) {
- System.out.println("第" + offset + "次比对 " + char1s[i + offset].getValue() + " " + char2s[k].getValue() + " " + (i + offset) + " " + k);
- if (!char1s[i + offset].getValue().equals(char2s[k].getValue())) {
- equals = false;
- break;
- }
- }
-
- // 如果相等,结束比对
- if (equals) {
- end = j;
- System.out.println("比对成功, end=" + end + ", 比对粒度" + granularity);
- break;
- } else {
- System.out.println("比对失败, 比对粒度" + granularity);
- }
- }
- System.out.println("比对结束,end=" + end);
-
- // 如果找到了
- if (end != -1) {
- for (int k = char2Index; k < end; k++) {
- char2s[k].setHighlight(true);
- char2Index++;
- System.out.println("设置高亮" + char2s[k]);
- }
- }
-
- // 如果没找到,就让自己高亮
- else {
- char1s[i].setHighlight(true);
- char2Index--;
- System.out.println("找不到" + char1);
- }
- }
-
- // 遍历最后多出来的文本2,全部高亮
- for (int i = char2Index; i < char2s.length; i++) {
- char2s[i].setHighlight(true);
- }
-
- // 初始化高亮前缀、后缀
- final String before = Optional.ofNullable(highlightBefore).orElse("<span style=\"color:red;\">");
- final String after = Optional.ofNullable(highlightAfter).orElse("</span>");
-
- // 将字符数组对象转换成高亮形式
- String result1 = Arrays.stream(char1s)
- .map(char1 -> char1.getHighlight() ? before + char1.getValue() + after : char1.getValue() + "")
- .collect(Collectors.joining());
- String result2 = Arrays.stream(char2s)
- .map(char2 -> char2.getHighlight() ? before + char2.getValue() + after : char2.getValue() + "")
- .collect(Collectors.joining());
- return new CompareResult(result1, result2);
- }
- }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。