Commit ea89c502 by 魏建枢

昵称相似度代码

parent 448e98a5
...@@ -204,9 +204,25 @@ ...@@ -204,9 +204,25 @@
<artifactId>snakeyaml</artifactId> <artifactId>snakeyaml</artifactId>
<version>1.30</version> <version>1.30</version>
</dependency> </dependency>
<!-- 相似度算法 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>1.10.0</version>
</dependency>
<!-- 中文转拼音 -->
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>pinyin</artifactId>
<version>0.4.0</version>
</dependency>
<dependency>
<groupId>com.belerweb</groupId>
<artifactId>pinyin4j</artifactId>
<version>2.5.1</version>
</dependency>
</dependencies> </dependencies>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>
......
package com.flink.util;
import org.apache.commons.text.similarity.JaroWinklerSimilarity;
import com.github.houbb.pinyin.constant.enums.PinyinStyleEnum;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
/**
* @author wjs
* @version 创建时间:2025-8-28 15:39:33
* 类说明 昵称相似度
*/
public class NicknameValidator {
// 存储已注册昵称(实际项目中可替换为数据库)
private static final Set<String> registeredNicknames = ConcurrentHashMap.newKeySet();
// 相似度阈值(根据业务调整)
private static final double SIMILARITY_THRESHOLD = 0.85;
// 注册新昵称(核心方法)
public static synchronized NicknameValidationResult registerNickname(String newNickname) {
// 1. 格式标准化处理
String normalized = normalizeNickname(newNickname);
// 2. 精确匹配检查
if (registeredNicknames.contains(normalized)) {
return new NicknameValidationResult(false, "昵称已存在", newNickname, normalized, 1.0);
}
// 3. 相似度匹配检查
SimilarMatch closestMatch = findClosestMatch(normalized);
if (closestMatch != null) {
return new NicknameValidationResult(false,
"与现有昵称 '" + closestMatch.getOriginal() + "' 高度相似",
newNickname,
normalized,
closestMatch.getSimilarity());
}
// 4. 通过校验,添加昵称
registeredNicknames.add(normalized);
return new NicknameValidationResult(true, "注册成功", newNickname, normalized, 0.0);
}
// 标准化处理:大小写转换/去特殊符号/中文转拼音
private static String normalizeNickname(String nickname) {
// 转小写
String cleaned = nickname.toLowerCase();
// 移除非字母数字字符(保留中文)
cleaned = cleaned.replaceAll("[^\\p{L}\\p{N}]+", "");
// 中文转拼音(保留原字符)
StringBuilder pinyinBuilder = new StringBuilder();
// 创建输出格式配置对象
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
format.setToneType(HanyuPinyinToneType.WITHOUT_TONE); // 不带声调
format.setCaseType(HanyuPinyinCaseType.LOWERCASE); // 小写输出
for (char c : cleaned.toCharArray()) {
if (Character.toString(c).matches("\\p{IsHan}")) {
// pinyinBuilder.append(PinyinHelper.toPinyin(c, PinyinStyleEnum.PLAIN));
// pinyinBuilder.append(PinyinHelper.toPinyin(String.valueOf(c), PinyinStyleEnum.PLAIN));
try {
String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray(c, format);
if (pinyinArray != null && pinyinArray.length > 0) {
pinyinBuilder.append(pinyinArray[0]); // 多音字取第一个
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
pinyinBuilder.append(c);
}
}
return pinyinBuilder.toString();
}
// 查找最相似的已有昵称
private static SimilarMatch findClosestMatch(String normalized) {
JaroWinklerSimilarity similarity = new JaroWinklerSimilarity();
SimilarMatch closest = null;
for (String existing : registeredNicknames) {
double score = similarity.apply(existing, normalized);
if (score >= SIMILARITY_THRESHOLD &&
(closest == null || score > closest.getSimilarity())) {
closest = new SimilarMatch(existing, score);
}
}
return closest;
}
// 测试用例
public static void main(String[] args) {
// 初始化已存在昵称
registeredNicknames.add(normalizeNickname("张三123"));
registeredNicknames.add(normalizeNickname("Viper"));
registeredNicknames.add(normalizeNickname("🐼熊猫大侠"));
// 测试用例
String[] testCases = {
"张3123", // 精确重复
"zhangsan123", // 中文转拼音重复
"V1per", // 相似字符替换
"熊猫大侠", // 特殊符号匹配
"UniqueUser" // 新昵称
};
// 执行测试
for (String test : testCases) {
NicknameValidationResult result = registerNickname(test);
System.out.printf("输入: %-12s → %s (相似度: %.2f)%n",
test,
result.getMessage(),
result.getSimilarityScore());
}
}
// 辅助类:存储相似匹配结果
private static class SimilarMatch {
private final String original;
private final double similarity;
public SimilarMatch(String original, double similarity) {
this.original = original;
this.similarity = similarity;
}
public String getOriginal() { return original; }
public double getSimilarity() { return similarity; }
}
// 验证结果封装
public static class NicknameValidationResult {
private final boolean valid;
private final String message;
private final String originalInput;
private final String normalized;
private final double similarityScore;
public NicknameValidationResult(boolean valid, String message,
String originalInput, String normalized,
double similarityScore) {
this.valid = valid;
this.message = message;
this.originalInput = originalInput;
this.normalized = normalized;
this.similarityScore = similarityScore;
}
// Getters
public boolean isValid() { return valid; }
public String getMessage() { return message; }
public String getOriginalInput() { return originalInput; }
public String getNormalized() { return normalized; }
public double getSimilarityScore() { return similarityScore; }
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment