赞
踩
<dependency>
<groupId>com.belerweb</groupId>
<artifactId>pinyin4j</artifactId>
<version>2.5.0</version>
</dependency>
/** * 获取文本拼音 * @param context 文本内容 * @param existNotPinyin 是否保存非汉字 * @return String 拼音 */ private String pinyinTest(String context, boolean existNotPinyin) { if (context == null || context.trim().length() <= 0) { return null; } //设置格式 HanyuPinyinOutputFormat outputFormat = new HanyuPinyinOutputFormat(); //WITHOUT_TONE 不带音标、WITH_TONE_NUMBER 带数字音标、WITH_TONE_MARK 带符号音标 outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); char[] chars = context.trim().toCharArray(); StringBuilder builder = new StringBuilder(); try { for (char aChar : chars) { String[] pinyin = PinyinHelper.toHanyuPinyinStringArray(aChar, outputFormat); //不是汉字会返回null if (pinyin == null || pinyin.length <= 0) { if (existNotPinyin) { builder.append(aChar); } continue; } //多音字的情况取第一个(也可以全取) builder.append(pinyin[0]); } } catch (BadHanyuPinyinOutputFormatCombination e) { e.printStackTrace(); } return builder.toString().toUpperCase(); }
@Test
public void test() {
String temp = "我爱罗52";
String list = pinyinTest(temp, false); //WOAILUO
}
<dependency>
<groupId>com.hankcs</groupId>
<artifactId>hanlp</artifactId>
<version>portable-1.8.4</version>
</dependency>
/** * 获取文本拼音 * @param context 文本内容 * @param existNotPinyin 是否保存非汉字 * @return String 拼音 */ private String hanLpTest(String content, Boolean existNotPinyin) { if (context == null || context.trim().length() <= 0) { return null; } if (existNotPinyin) { return HanLP.convertToPinyinString(content, "", false).toUpperCase(); } List<Pinyin> pinyinList = HanLP.convertToPinyinList(content); StringBuilder builder = new StringBuilder(); pinyinList.forEach(pinyin -> { if (pinyin == null || Pinyin.none5.equals(pinyin)) { return; } builder.append(pinyin.getPinyinWithoutTone()); }); return builder.toString().toUpperCase(); }
@Test
public void test() {
String temp = "我爱罗52";
System.out.println(hanLpTest(temp, true)); //WOAILUO52
}
/**
* 分词正则表达式
*/
private final String SPLIT_WORD_REG_EX = "[^aoeiuv]?h?[iuv]?(ai|ei|ao|ou|er|ang?|eng?|ong|a|o|e|i|u|ng|n)?";
/** * 关键字分词 * @param keyword 关键字 * @return List<String> 分词列表 */ private List<String> splitTest(String keyword) { if (context == null || context.trim().length() <= 0) { return Collections.emptyList(); } List<String> keywordList = new ArrayList<>(); int index = 0; Pattern pat = Pattern.compile(SPLIT_WORD_REG_EX); for (int i = keyword.length(); i > 0; i = i - index) { Matcher matcher = pat.matcher(keyword); if (!matcher.find()) { break; } keywordList.add(matcher.group()); index = matcher.end() - matcher.start(); keyword = keyword.substring(index); } return keywordList; }
@Test
public void test() {
String temp = "我爱罗52";
List<String> list = splitTest(temp);
System.out.println(list); //[我, 爱, 罗, 5, 2]
}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。