Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
eagleEye
/
eagleEye-flink_kafka
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
ea89c502
authored
Aug 28, 2025
by
魏建枢
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
昵称相似度代码
parent
448e98a5
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
183 additions
and
1 deletions
eagleEye-flink_kafka/pom.xml
eagleEye-flink_kafka/src/main/java/com/flink/util/NicknameValidator.java
eagleEye-flink_kafka/pom.xml
View file @
ea89c502
...
...
@@ -204,9 +204,25 @@
<artifactId>
snakeyaml
</artifactId>
<version>
1.30
</version>
</dependency>
<!-- 相似度算法 -->
<dependency>
<groupId>
org.apache.commons
</groupId>
<artifactId>
commons-text
</artifactId>
<version>
1.10.0
</version>
</dependency>
<!-- 中文转拼音 -->
<dependency>
<groupId>
com.github.houbb
</groupId>
<artifactId>
pinyin
</artifactId>
<version>
0.4.0
</version>
</dependency>
<dependency>
<groupId>
com.belerweb
</groupId>
<artifactId>
pinyin4j
</artifactId>
<version>
2.5.1
</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
...
...
eagleEye-flink_kafka/src/main/java/com/flink/util/NicknameValidator.java
0 → 100644
View file @
ea89c502
package
com
.
flink
.
util
;
import
org.apache.commons.text.similarity.JaroWinklerSimilarity
;
import
com.github.houbb.pinyin.constant.enums.PinyinStyleEnum
;
import
net.sourceforge.pinyin4j.format.HanyuPinyinCaseType
;
import
net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat
;
import
net.sourceforge.pinyin4j.format.HanyuPinyinToneType
;
import
net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination
;
import
net.sourceforge.pinyin4j.PinyinHelper
;
import
net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat
;
import
java.util.*
;
import
java.util.concurrent.ConcurrentHashMap
;
/**
* @author wjs
* @version 创建时间:2025-8-28 15:39:33
* 类说明 昵称相似度
*/
public
class
NicknameValidator
{
// 存储已注册昵称(实际项目中可替换为数据库)
private
static
final
Set
<
String
>
registeredNicknames
=
ConcurrentHashMap
.
newKeySet
();
// 相似度阈值(根据业务调整)
private
static
final
double
SIMILARITY_THRESHOLD
=
0.85
;
// 注册新昵称(核心方法)
public
static
synchronized
NicknameValidationResult
registerNickname
(
String
newNickname
)
{
// 1. 格式标准化处理
String
normalized
=
normalizeNickname
(
newNickname
);
// 2. 精确匹配检查
if
(
registeredNicknames
.
contains
(
normalized
))
{
return
new
NicknameValidationResult
(
false
,
"昵称已存在"
,
newNickname
,
normalized
,
1.0
);
}
// 3. 相似度匹配检查
SimilarMatch
closestMatch
=
findClosestMatch
(
normalized
);
if
(
closestMatch
!=
null
)
{
return
new
NicknameValidationResult
(
false
,
"与现有昵称 '"
+
closestMatch
.
getOriginal
()
+
"' 高度相似"
,
newNickname
,
normalized
,
closestMatch
.
getSimilarity
());
}
// 4. 通过校验,添加昵称
registeredNicknames
.
add
(
normalized
);
return
new
NicknameValidationResult
(
true
,
"注册成功"
,
newNickname
,
normalized
,
0.0
);
}
// 标准化处理:大小写转换/去特殊符号/中文转拼音
private
static
String
normalizeNickname
(
String
nickname
)
{
// 转小写
String
cleaned
=
nickname
.
toLowerCase
();
// 移除非字母数字字符(保留中文)
cleaned
=
cleaned
.
replaceAll
(
"[^\\p{L}\\p{N}]+"
,
""
);
// 中文转拼音(保留原字符)
StringBuilder
pinyinBuilder
=
new
StringBuilder
();
// 创建输出格式配置对象
HanyuPinyinOutputFormat
format
=
new
HanyuPinyinOutputFormat
();
format
.
setToneType
(
HanyuPinyinToneType
.
WITHOUT_TONE
);
// 不带声调
format
.
setCaseType
(
HanyuPinyinCaseType
.
LOWERCASE
);
// 小写输出
for
(
char
c
:
cleaned
.
toCharArray
())
{
if
(
Character
.
toString
(
c
).
matches
(
"\\p{IsHan}"
))
{
// pinyinBuilder.append(PinyinHelper.toPinyin(c, PinyinStyleEnum.PLAIN));
// pinyinBuilder.append(PinyinHelper.toPinyin(String.valueOf(c), PinyinStyleEnum.PLAIN));
try
{
String
[]
pinyinArray
=
PinyinHelper
.
toHanyuPinyinStringArray
(
c
,
format
);
if
(
pinyinArray
!=
null
&&
pinyinArray
.
length
>
0
)
{
pinyinBuilder
.
append
(
pinyinArray
[
0
]);
// 多音字取第一个
}
}
catch
(
BadHanyuPinyinOutputFormatCombination
e
)
{
e
.
printStackTrace
();
}
}
else
{
pinyinBuilder
.
append
(
c
);
}
}
return
pinyinBuilder
.
toString
();
}
// 查找最相似的已有昵称
private
static
SimilarMatch
findClosestMatch
(
String
normalized
)
{
JaroWinklerSimilarity
similarity
=
new
JaroWinklerSimilarity
();
SimilarMatch
closest
=
null
;
for
(
String
existing
:
registeredNicknames
)
{
double
score
=
similarity
.
apply
(
existing
,
normalized
);
if
(
score
>=
SIMILARITY_THRESHOLD
&&
(
closest
==
null
||
score
>
closest
.
getSimilarity
()))
{
closest
=
new
SimilarMatch
(
existing
,
score
);
}
}
return
closest
;
}
// 测试用例
public
static
void
main
(
String
[]
args
)
{
// 初始化已存在昵称
registeredNicknames
.
add
(
normalizeNickname
(
"张三123"
));
registeredNicknames
.
add
(
normalizeNickname
(
"Viper"
));
registeredNicknames
.
add
(
normalizeNickname
(
"🐼熊猫大侠"
));
// 测试用例
String
[]
testCases
=
{
"张3123"
,
// 精确重复
"zhangsan123"
,
// 中文转拼音重复
"V1per"
,
// 相似字符替换
"熊猫大侠"
,
// 特殊符号匹配
"UniqueUser"
// 新昵称
};
// 执行测试
for
(
String
test
:
testCases
)
{
NicknameValidationResult
result
=
registerNickname
(
test
);
System
.
out
.
printf
(
"输入: %-12s → %s (相似度: %.2f)%n"
,
test
,
result
.
getMessage
(),
result
.
getSimilarityScore
());
}
}
// 辅助类:存储相似匹配结果
private
static
class
SimilarMatch
{
private
final
String
original
;
private
final
double
similarity
;
public
SimilarMatch
(
String
original
,
double
similarity
)
{
this
.
original
=
original
;
this
.
similarity
=
similarity
;
}
public
String
getOriginal
()
{
return
original
;
}
public
double
getSimilarity
()
{
return
similarity
;
}
}
// 验证结果封装
public
static
class
NicknameValidationResult
{
private
final
boolean
valid
;
private
final
String
message
;
private
final
String
originalInput
;
private
final
String
normalized
;
private
final
double
similarityScore
;
public
NicknameValidationResult
(
boolean
valid
,
String
message
,
String
originalInput
,
String
normalized
,
double
similarityScore
)
{
this
.
valid
=
valid
;
this
.
message
=
message
;
this
.
originalInput
=
originalInput
;
this
.
normalized
=
normalized
;
this
.
similarityScore
=
similarityScore
;
}
// Getters
public
boolean
isValid
()
{
return
valid
;
}
public
String
getMessage
()
{
return
message
;
}
public
String
getOriginalInput
()
{
return
originalInput
;
}
public
String
getNormalized
()
{
return
normalized
;
}
public
double
getSimilarityScore
()
{
return
similarityScore
;
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment