Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
eagleEye
/
eagleEye-flink_kafka
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
f0508aeb
authored
Aug 08, 2025
by
魏建枢
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
增加公共消费基础job
parent
0a7239f6
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
1428 additions
and
4 deletions
eagleEye-flink_kafka/pom.xml
eagleEye-flink_kafka/src/main/java/com/flink/achieve/base/CommonConsumeBaseAchi.java
eagleEye-flink_kafka/src/main/java/com/flink/achieve/base/OpenSimiApiAchi.java
eagleEye-flink_kafka/src/main/java/com/flink/achieve/doris/UserDailyActivityAchi.java
eagleEye-flink_kafka/src/main/java/com/flink/common/DorisConnector.java
eagleEye-flink_kafka/src/main/java/com/flink/enums/JobTypeEnum.java
eagleEye-flink_kafka/src/main/java/com/flink/factory/JobProcessorFactory.java
eagleEye-flink_kafka/src/main/java/com/flink/processor/impl/CommonConsumeBaseProcessor.java
eagleEye-flink_kafka/src/main/java/com/flink/processor/impl/UserDailyActivityProcessor.java
eagleEye-flink_kafka/src/main/java/com/flink/vo/CombinedLog.java
eagleEye-flink_kafka/src/main/java/com/flink/vo/DeviceLog.java
eagleEye-flink_kafka/src/main/java/com/flink/vo/EnrichedLog.java
eagleEye-flink_kafka/src/main/java/com/flink/vo/EventLog.java
eagleEye-flink_kafka/src/main/java/com/flink/vo/UserDailyActivityOutputLog.java
eagleEye-flink_kafka/pom.xml
View file @
f0508aeb
...
...
@@ -177,6 +177,11 @@
<!-- 根据 JDK 版本选择路径 -->
<systemPath>
${JAVA_HOME}/lib/tools.jar
</systemPath>
</dependency>
<dependency>
<groupId>
com.zaxxer
</groupId>
<artifactId>
HikariCP
</artifactId>
<version>
3.4.5
</version>
</dependency>
</dependencies>
...
...
eagleEye-flink_kafka/src/main/java/com/flink/achieve/base/CommonConsumeBaseAchi.java
View file @
f0508aeb
...
...
@@ -48,6 +48,10 @@ public class CommonConsumeBaseAchi extends MultipleSourceCommonBase implements S
DataStreamSource
<
String
>
collectLogStreamSource
=
kafkaDataSource
.
getDataStreamSource
();
CollectLogAchi
.
collectLog
(
collectLogStreamSource
);
}
if
(
StringUtils
.
equals
(
kafkaDataSource
.
getTopic
(),
TopicTypeEnum
.
OPEN_SIMI_API
.
getTopic
()))
{
DataStreamSource
<
String
>
oepnSimiApiStreamSource
=
kafkaDataSource
.
getDataStreamSource
();
OpenSimiApiAchi
.
openSimiApi
(
oepnSimiApiStreamSource
);
}
}
}
else
{
return
;
...
...
eagleEye-flink_kafka/src/main/java/com/flink/achieve/base/OpenSimiApiAchi.java
0 → 100644
View file @
f0508aeb
package
com
.
flink
.
achieve
.
base
;
import
java.io.Serializable
;
import
java.util.Objects
;
import
org.apache.doris.flink.sink.DorisSink
;
import
org.apache.flink.api.common.functions.FlatMapFunction
;
import
org.apache.flink.api.common.typeinfo.TypeInformation
;
import
org.apache.flink.calcite.shaded.org.apache.commons.codec.binary.StringUtils
;
import
org.apache.flink.streaming.api.datastream.DataStreamSource
;
import
org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator
;
import
org.apache.flink.table.api.DataTypes
;
import
org.apache.flink.table.data.GenericRowData
;
import
org.apache.flink.table.data.RowData
;
import
org.apache.flink.table.data.StringData
;
import
org.apache.flink.table.types.DataType
;
import
org.apache.flink.util.Collector
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSON
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.TypeReference
;
import
com.flink.common.DorisConnector
;
import
com.flink.config.TableConfig
;
import
com.flink.enums.OpenSimiApiTypeEnum
;
import
com.flink.vo.simi.CreateGroupReqDto
;
import
com.flink.vo.simi.JoinGroupReqDto
;
import
com.flink.vo.simi.LeaveGroupReqDto
;
/**
* @author wjs
* @version 创建时间:2025-8-7 10:40:09 类说明
*/
public
class
OpenSimiApiAchi
implements
Serializable
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
OpenSimiApiAchi
.
class
);
// 定义公共常量
private
static
final
String
FLUME_TYPE_FIELD
=
"flume_type"
;
private
static
final
String
DATA_FIELD
=
"req_body"
;
private
static
final
String
SEND_TYPE
=
"send_type"
;
private
static
final
String
DORIS_DELETE_SIGN
=
"__DORIS_DELETE_SIGN__"
;
private
static
final
int
DELETE_SIGN_VALUE
=
0
;
// 用户加群退群的记录表配置
private
static
final
String
[]
USERS_JOIN_EXIT_GROUP_FIELDS
=
{
"cid"
,
"group_id"
,
"time"
,
"operation_type"
,
"behavior_type"
,
"group_name"
,
DORIS_DELETE_SIGN
};
private
static
final
DataType
[]
USERS_JOIN_EXIT_GROUP_TYPES
=
{
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
INT
()
};
public
static
void
openSimiApi
(
DataStreamSource
<
String
>
dataStreamSource
)
{
// 初始化表配置
TableConfig
usersJoinExitGroupConfig
=
new
TableConfig
(
USERS_JOIN_EXIT_GROUP_FIELDS
,
USERS_JOIN_EXIT_GROUP_TYPES
,
"bi.user_join_exit_group_record"
);
// 创建Doris Sink
DorisSink
<
RowData
>
dorisUsersSink
=
DorisConnector
.
sinkDoris
(
usersJoinExitGroupConfig
.
getFields
(),
usersJoinExitGroupConfig
.
getTypes
(),
usersJoinExitGroupConfig
.
getTableName
());
// 处理用户数据
processDataStream
(
dataStreamSource
,
"openSimiApi"
,
usersJoinExitGroupConfig
,
dorisUsersSink
,
(
sendType
,
item
,
fieldCount
)
->
mapToUsersJoinExitGroupRow
(
sendType
,
item
,
fieldCount
));
}
/**
* 通用数据处理方法
*
* @param dataStream 数据流
* @param flumeType 数据类型
* @param tableConfig 表配置
* @param dorisSink Doris Sink
* @param mapper 行数据映射函数
*/
private
static
void
processDataStream
(
DataStreamSource
<
String
>
dataStream
,
String
flumeType
,
TableConfig
tableConfig
,
DorisSink
<
RowData
>
dorisSink
,
RowMapper
mapper
)
{
SingleOutputStreamOperator
<
RowData
>
processedStream
=
dataStream
.
flatMap
(
new
ElementProcessor
(
flumeType
,
mapper
,
tableConfig
.
getFields
().
length
))
.
returns
(
TypeInformation
.
of
(
RowData
.
class
)).
filter
(
Objects:
:
nonNull
);
processedStream
.
sinkTo
(
dorisSink
).
name
(
"Doris-"
+
flumeType
);
}
private
static
class
ElementProcessor
implements
FlatMapFunction
<
String
,
RowData
>,
Serializable
{
private
static
final
long
serialVersionUID
=
1L
;
private
final
String
flumeType
;
private
final
RowMapper
mapper
;
private
final
int
fieldCount
;
public
ElementProcessor
(
String
flumeType
,
RowMapper
mapper
,
int
fieldCount
)
{
this
.
flumeType
=
flumeType
;
this
.
mapper
=
mapper
;
this
.
fieldCount
=
fieldCount
;
}
@Override
public
void
flatMap
(
String
value
,
Collector
<
RowData
>
out
)
throws
Exception
{
try
{
JSONObject
jsonObj
=
JSON
.
parseObject
(
value
);
if
(!
flumeType
.
equals
(
jsonObj
.
getString
(
FLUME_TYPE_FIELD
)))
{
return
;
}
String
reqBody
=
jsonObj
.
getString
(
DATA_FIELD
);
String
sendType
=
jsonObj
.
getString
(
SEND_TYPE
);
Object
item
=
null
;
if
(
StringUtils
.
equals
(
sendType
,
OpenSimiApiTypeEnum
.
CREATE_GROUP
.
getCode
()))
{
item
=
JSON
.
parseObject
(
reqBody
,
new
TypeReference
<
CreateGroupReqDto
>()
{
});
}
else
if
(
StringUtils
.
equals
(
sendType
,
OpenSimiApiTypeEnum
.
JOIN_GROUP
.
getCode
()))
{
item
=
JSON
.
parseObject
(
reqBody
,
new
TypeReference
<
JoinGroupReqDto
>()
{
});
}
else
if
(
StringUtils
.
equals
(
sendType
,
OpenSimiApiTypeEnum
.
LEAVE_GROUP
.
getCode
()))
{
item
=
JSON
.
parseObject
(
reqBody
,
new
TypeReference
<
LeaveGroupReqDto
>()
{
});
}
if
(
item
!=
null
)
{
RowData
row
=
mapper
.
map
(
sendType
,
item
,
fieldCount
);
if
(
row
!=
null
)
{
out
.
collect
(
row
);
}
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"OpenSimiApiAchi 处理 {} 数据出错 | rawData:{} | error:{}"
,
flumeType
,
value
,
e
.
getMessage
(),
e
);
}
}
}
// 用户群数据映射
private
static
RowData
mapToUsersJoinExitGroupRow
(
String
sendType
,
Object
item
,
int
fieldCount
)
{
GenericRowData
row
=
new
GenericRowData
(
fieldCount
);
if
(
StringUtils
.
equals
(
sendType
,
OpenSimiApiTypeEnum
.
CREATE_GROUP
.
getCode
()))
{
CreateGroupReqDto
createGroup
=
(
CreateGroupReqDto
)
item
;
row
.
setField
(
0
,
StringData
.
fromString
(
createGroup
.
getCid
()));
row
.
setField
(
1
,
StringData
.
fromString
(
createGroup
.
getGroupId
()));
row
.
setField
(
2
,
StringData
.
fromString
(
createGroup
.
getTime
()));
row
.
setField
(
3
,
StringData
.
fromString
(
"加群"
));
row
.
setField
(
4
,
StringData
.
fromString
(
"邀请加入"
));
row
.
setField
(
5
,
null
);
// 群名称
}
else
if
(
StringUtils
.
equals
(
sendType
,
OpenSimiApiTypeEnum
.
JOIN_GROUP
.
getCode
()))
{
JoinGroupReqDto
joinGroup
=
(
JoinGroupReqDto
)
item
;
row
.
setField
(
0
,
StringData
.
fromString
(
joinGroup
.
getCid
()));
row
.
setField
(
1
,
StringData
.
fromString
(
joinGroup
.
getGroupId
()));
row
.
setField
(
2
,
StringData
.
fromString
(
joinGroup
.
getTime
()));
row
.
setField
(
3
,
StringData
.
fromString
(
"加群"
));
row
.
setField
(
4
,
StringData
.
fromString
(
joinGroup
.
getJoinType
()));
row
.
setField
(
5
,
StringData
.
fromString
(
joinGroup
.
getGroupName
()));
}
else
if
(
StringUtils
.
equals
(
sendType
,
OpenSimiApiTypeEnum
.
LEAVE_GROUP
.
getCode
()))
{
LeaveGroupReqDto
leaveGroup
=
(
LeaveGroupReqDto
)
item
;
row
.
setField
(
0
,
StringData
.
fromString
(
leaveGroup
.
getCid
()));
row
.
setField
(
1
,
StringData
.
fromString
(
leaveGroup
.
getGroupId
()));
row
.
setField
(
2
,
StringData
.
fromString
(
leaveGroup
.
getTime
()));
row
.
setField
(
3
,
StringData
.
fromString
(
"退群"
));
row
.
setField
(
4
,
StringData
.
fromString
(
leaveGroup
.
getExitType
()));
row
.
setField
(
5
,
StringData
.
fromString
(
leaveGroup
.
getGroupName
()));
}
row
.
setField
(
6
,
DELETE_SIGN_VALUE
);
return
row
;
}
/**
* 行数据映射接口
*
* @param <T> 数据类型
*/
@FunctionalInterface
private
interface
RowMapper
extends
Serializable
{
RowData
map
(
String
sendType
,
Object
item
,
int
fieldCount
);
}
}
eagleEye-flink_kafka/src/main/java/com/flink/achieve/doris/UserDailyActivityAchi.java
0 → 100644
View file @
f0508aeb
package
com
.
flink
.
achieve
.
doris
;
import
java.io.Serializable
;
import
java.sql.Connection
;
import
java.sql.DriverManager
;
import
java.sql.PreparedStatement
;
import
java.sql.ResultSet
;
import
java.time.Duration
;
import
java.util.Collections
;
import
java.util.List
;
import
java.util.Objects
;
import
java.util.concurrent.CompletableFuture
;
import
java.util.concurrent.TimeUnit
;
import
java.util.function.Supplier
;
import
org.apache.commons.collections.CollectionUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.doris.flink.sink.DorisSink
;
import
org.apache.flink.api.common.eventtime.WatermarkStrategy
;
import
org.apache.flink.api.common.functions.CoGroupFunction
;
import
org.apache.flink.api.common.functions.FlatMapFunction
;
import
org.apache.flink.api.common.functions.JoinFunction
;
import
org.apache.flink.api.common.io.ParseException
;
import
org.apache.flink.api.common.state.MapState
;
import
org.apache.flink.api.common.state.MapStateDescriptor
;
import
org.apache.flink.api.common.state.StateTtlConfig
;
import
org.apache.flink.api.common.state.ValueState
;
import
org.apache.flink.api.common.state.ValueStateDescriptor
;
import
org.apache.flink.api.common.typeinfo.TypeHint
;
import
org.apache.flink.api.common.typeinfo.TypeInformation
;
import
org.apache.flink.api.java.tuple.Tuple2
;
import
org.apache.flink.configuration.Configuration
;
import
org.apache.flink.streaming.api.datastream.AsyncDataStream
;
import
org.apache.flink.streaming.api.datastream.DataStream
;
import
org.apache.flink.streaming.api.datastream.DataStreamSource
;
import
org.apache.flink.streaming.api.functions.KeyedProcessFunction
;
import
org.apache.flink.streaming.api.functions.async.ResultFuture
;
import
org.apache.flink.streaming.api.functions.async.RichAsyncFunction
;
import
org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows
;
import
org.apache.flink.table.api.DataTypes
;
import
org.apache.flink.table.data.GenericRowData
;
import
org.apache.flink.table.data.RowData
;
import
org.apache.flink.table.data.StringData
;
import
org.apache.flink.table.data.TimestampData
;
import
org.apache.flink.table.types.DataType
;
import
org.apache.flink.util.Collector
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSON
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.TypeReference
;
import
com.flink.common.DorisConnector
;
import
com.flink.common.MultipleSourceCommonBase
;
import
com.flink.enums.TopicTypeEnum
;
import
com.flink.processor.function.GenDeviceIdProcessor
;
import
com.flink.processor.function.LatestUserProcessFunction
;
import
com.flink.util.CompareUtils
;
import
com.flink.util.TimeConvertUtil
;
import
com.flink.util.ip2region.SearcherUtil
;
import
com.flink.vo.CombinedLog
;
import
com.flink.vo.DeviceIdInfo
;
import
com.flink.vo.DeviceLog
;
import
com.flink.vo.EnrichedLog
;
import
com.flink.vo.EventList
;
import
com.flink.vo.EventLog
;
import
com.flink.vo.KafkaDataSource
;
import
com.flink.vo.OdsCollectLog
;
import
com.flink.vo.OdsEventLog
;
import
com.flink.vo.SimiUserInfo
;
import
com.flink.vo.UserDailyActivityOutputLog
;
import
com.flink.vo.UserProperties
;
/**
* @author wjs
* @version 创建时间:2025-7-31 11:02:52
* 类说明
*/
public
class
UserDailyActivityAchi
extends
MultipleSourceCommonBase
implements
Serializable
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
UserDailyActivityAchi
.
class
);
@Override
public
void
parseSourceKafkaJson
(
List
<
KafkaDataSource
>
dataSourceList
)
throws
ParseException
,
Exception
{
DataStreamSource
<
String
>
collectLogStreamSource
=
null
;
DataStreamSource
<
String
>
eventLogStreamSource
=
null
;
DataStreamSource
<
String
>
pcCollectLogStreamSource
=
null
;
DataStreamSource
<
String
>
pcEventLogStreamSource
=
null
;
DataStreamSource
<
String
>
userStreamSource
=
null
;
DataStreamSource
<
String
>
abroadUserStreamSource
=
null
;
if
(
CollectionUtils
.
isNotEmpty
(
dataSourceList
))
{
for
(
KafkaDataSource
kafkaDataSource
:
dataSourceList
)
{
if
(
StringUtils
.
equals
(
kafkaDataSource
.
getTopic
(),
TopicTypeEnum
.
ODS_NEW_COLLECT_LOG
.
getTopic
()))
{
collectLogStreamSource
=
kafkaDataSource
.
getDataStreamSource
();
}
if
(
StringUtils
.
equals
(
kafkaDataSource
.
getTopic
(),
TopicTypeEnum
.
ODS_EVENT_LOG
.
getTopic
()))
{
eventLogStreamSource
=
kafkaDataSource
.
getDataStreamSource
();
}
if
(
StringUtils
.
equals
(
kafkaDataSource
.
getTopic
(),
TopicTypeEnum
.
SIMI_USER_LIST_TOPIC
.
getTopic
()))
{
userStreamSource
=
kafkaDataSource
.
getDataStreamSource
();
}
if
(
StringUtils
.
equals
(
kafkaDataSource
.
getTopic
(),
TopicTypeEnum
.
ABROAD_SIMI_USER_LIST_TOPIC
.
getTopic
()))
{
abroadUserStreamSource
=
kafkaDataSource
.
getDataStreamSource
();
}
if
(
StringUtils
.
equals
(
kafkaDataSource
.
getTopic
(),
TopicTypeEnum
.
ODS_PC_COLLECT_LOG
.
getTopic
()))
{
pcCollectLogStreamSource
=
kafkaDataSource
.
getDataStreamSource
();
}
if
(
StringUtils
.
equals
(
kafkaDataSource
.
getTopic
(),
TopicTypeEnum
.
ODS_PC_EVENT_LOG
.
getTopic
()))
{
pcEventLogStreamSource
=
kafkaDataSource
.
getDataStreamSource
();
}
}
}
else
{
return
;
}
// 1. 设备日志流处理
DataStream
<
DeviceLog
>
deviceLogStream
=
collectLogStreamSource
.
flatMap
(
new
FlatMapFunction
<
String
,
DeviceLog
>()
{
private
static
final
long
serialVersionUID
=
1L
;
@Override
public
void
flatMap
(
String
value
,
Collector
<
DeviceLog
>
out
)
throws
Exception
{
try
{
// 解析 Kafka 数据
OdsCollectLog
log
=
JSON
.
parseObject
(
value
,
new
TypeReference
<
OdsCollectLog
>()
{});
if
(
null
==
log
)
{
return
;
}
String
deviceId
=
log
.
getDevice_id
();
String
uniqueId
=
log
.
getUnique_id
();
String
appType
=
log
.
getApp_type
();
String
appKey
=
log
.
getApp_key
();
String
other_info
=
log
.
getOther_info
();
String
device_info
=
log
.
getDevice_info
();
String
env_info
=
log
.
getEnv_info
();
String
createTime
=
log
.
getCreate_time
();
String
sendTime
=
log
.
getSend_time
();
DeviceIdInfo
deviceIdInfo
=
GenDeviceIdProcessor
.
genDeviceId
(
appType
,
appKey
,
other_info
,
device_info
,
env_info
);
DeviceLog
deviceLog
=
new
DeviceLog
(
deviceId
,
deviceIdInfo
.
getDeviceIdV1
(),
deviceIdInfo
.
getAppKey
(),
uniqueId
,
appType
,
createTime
.
substring
(
0
,
10
),
deviceIdInfo
.
getModel
(),
deviceIdInfo
.
getBrand
(),
deviceIdInfo
.
getOsRelease
(),
deviceIdInfo
.
getAppVersion
(),
TimeConvertUtil
.
convertToTimestamp
(
sendTime
),
log
.
getZone_name
(),
log
.
getZone_type
(),
log
.
getZone_code
()
);
if
(
deviceLog
!=
null
)
out
.
collect
(
deviceLog
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"Error parsing ods_new_collect_log 处理 Kafka 消息出错 | data:{} | error:{}"
,
value
,
e
.
getMessage
());
}
}
})
.
filter
(
collectLog
->
StringUtils
.
isNoneEmpty
(
collectLog
.
getDeviceId
(),
collectLog
.
getUniqueId
()))
.
assignTimestampsAndWatermarks
(
// 原有水印逻辑
WatermarkStrategy
.<
DeviceLog
>
forBoundedOutOfOrderness
(
Duration
.
ofMinutes
(
10
))
.
withTimestampAssigner
((
collectLog
,
ts
)
->
collectLog
.
getWaterMarkTime
())
)
.
keyBy
(
collectLog
->
collectLog
.
getDeviceId
()
+
"#_#"
+
collectLog
.
getUniqueId
())
.
process
(
new
KeyedProcessFunction
<
String
,
DeviceLog
,
DeviceLog
>()
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
private
ValueState
<
DeviceLog
>
latestDeviceInfoState
;
@Override
public
void
open
(
Configuration
parameters
)
{
// 1. 状态描述符配置
ValueStateDescriptor
<
DeviceLog
>
descriptor
=
new
ValueStateDescriptor
<>(
"deviceInfo-state"
,
DeviceLog
.
class
);
// 2. TTL配置 (30分钟过期)
StateTtlConfig
ttlConfig
=
StateTtlConfig
.
newBuilder
(
Duration
.
ofMinutes
(
30
))
.
setUpdateType
(
StateTtlConfig
.
UpdateType
.
OnCreateAndWrite
)
.
cleanupIncrementally
(
1000
,
true
)
.
build
();
descriptor
.
enableTimeToLive
(
ttlConfig
);
latestDeviceInfoState
=
getRuntimeContext
().
getState
(
descriptor
);
}
@Override
public
void
processElement
(
DeviceLog
deviceLog
,
Context
ctx
,
Collector
<
DeviceLog
>
out
)
throws
Exception
{
DeviceLog
currentLatest
=
latestDeviceInfoState
.
value
();
// 3. 状态更新逻辑:仅保留最新数据
if
(
currentLatest
==
null
||
deviceLog
.
getWaterMarkTime
()
>
currentLatest
.
getWaterMarkTime
())
{
latestDeviceInfoState
.
update
(
deviceLog
);
out
.
collect
(
deviceLog
);
}
}
});
// 2. 事件日志流处理
DataStream
<
EventLog
>
eventLogStream
=
eventLogStreamSource
.
flatMap
(
new
FlatMapFunction
<
String
,
EventLog
>(){
private
static
final
long
serialVersionUID
=
1L
;
@Override
public
void
flatMap
(
String
value
,
Collector
<
EventLog
>
out
)
throws
Exception
{
try
{
// 解析 Kafka 数据
OdsEventLog
odsEventLog
=
JSONObject
.
parseObject
(
value
,
new
TypeReference
<
OdsEventLog
>()
{});
if
(
null
==
odsEventLog
)
{
return
;
}
List
<
EventList
>
eventList
=
JSONObject
.
parseObject
(
odsEventLog
.
getEvent_list
(),
new
TypeReference
<
List
<
EventList
>>()
{});
if
(
CollectionUtils
.
isEmpty
(
eventList
))
{
return
;
}
String
deviceId
=
odsEventLog
.
getDevice_id
();
String
uniqueId
=
odsEventLog
.
getUnique_id
();
String
appType
=
odsEventLog
.
getApp_type
();
String
appKey
=
odsEventLog
.
getApp_key
();
String
createTime
=
odsEventLog
.
getCreate_time
();
String
sendTime
=
odsEventLog
.
getSend_time
();
String
routeIp
=
odsEventLog
.
getRoute_ip
();
String
userProperties
=
odsEventLog
.
getUser_properties
();
if
(
StringUtils
.
isEmpty
(
appKey
)
||
StringUtils
.
equals
(
appKey
,
"C7jias27jias2"
))
{
appKey
=
"8ooOvXJo276"
;
}
String
cid
=
null
;
String
phone
=
null
;
String
nick
=
null
;
if
(
StringUtils
.
isNotEmpty
(
userProperties
))
{
List
<
UserProperties
>
userPropertiesList
=
JSONObject
.
parseObject
(
userProperties
,
new
TypeReference
<
List
<
UserProperties
>>()
{});
if
(
userPropertiesList
!=
null
&&
userPropertiesList
.
size
()
>
0
)
{
for
(
UserProperties
user
:
userPropertiesList
)
{
if
(
StringUtils
.
isNotEmpty
(
user
.
getCid
()))
{
cid
=
user
.
getCid
();
}
else
if
(
StringUtils
.
isNotEmpty
(
user
.
getPhone
()))
{
phone
=
user
.
getPhone
();
}
else
if
(
StringUtils
.
isNotEmpty
(
user
.
getId
()))
{
cid
=
user
.
getId
();
}
else
if
(
StringUtils
.
isNotEmpty
(
user
.
getNick
()))
{
nick
=
user
.
getNick
();
}
else
if
(
StringUtils
.
isNotEmpty
(
user
.
getEmail
()))
{
nick
=
user
.
getEmail
();
}
}
}
}
List
<
String
>
ips
=
SearcherUtil
.
convertStringToList
(
routeIp
);
if
(
CollectionUtils
.
isEmpty
(
ips
))
{
return
;
}
String
ip_name
=
null
;
String
area_name
=
null
;
for
(
String
ip
:
ips
)
{
if
(!
SearcherUtil
.
ipv6
(
ip
))
{
area_name
=
SearcherUtil
.
getCityInfoByFile
(
ip
);
if
(!
CompareUtils
.
stringExists
(
area_name
,
"0|0|0|内网IP|内网IP"
,
"0|0|0|内网IP|Finance-and-Promoting-Technology"
))
{
ip_name
=
ip
;
break
;
}
else
{
ip_name
=
null
;
area_name
=
null
;
}
}
}
if
(
StringUtils
.
isEmpty
(
ip_name
))
{
return
;
}
for
(
EventList
event
:
eventList
)
{
EventLog
eventLog
=
new
EventLog
(
deviceId
,
uniqueId
,
cid
,
phone
,
nick
,
ip_name
,
area_name
,
TimeConvertUtil
.
parseToStringSSS
(
event
.
getR9
()),
TimeConvertUtil
.
convertToTimestamp
(
sendTime
),
appKey
,
appType
,
createTime
,
createTime
.
substring
(
0
,
10
)
);
if
(
eventLog
!=
null
)
{
out
.
collect
(
eventLog
);
}
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"Error parsing ods_event_log 处理 Kafka 消息出错 | data:{} | error:{}"
,
value
,
e
.
getMessage
());
}
}
})
.
filter
(
eventLog
->
StringUtils
.
isNoneEmpty
(
eventLog
.
getDeviceId
(),
eventLog
.
getUniqueId
()))
.
assignTimestampsAndWatermarks
(
// 原有水印逻辑
WatermarkStrategy
.<
EventLog
>
forBoundedOutOfOrderness
(
Duration
.
ofMinutes
(
10
))
.
withTimestampAssigner
((
eventLog
,
ts
)
->
eventLog
.
getWaterMarkTime
())
)
.
keyBy
(
eventLog
->
eventLog
.
getDeviceId
()
+
"#_#"
+
eventLog
.
getUniqueId
())
.
process
(
new
KeyedProcessFunction
<
String
,
EventLog
,
EventLog
>()
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
private
ValueState
<
EventLog
>
latestEventInfoState
;
@Override
public
void
open
(
Configuration
parameters
)
{
// 1. 状态描述符配置
ValueStateDescriptor
<
EventLog
>
descriptor
=
new
ValueStateDescriptor
<>(
"deviceInfo-state"
,
EventLog
.
class
);
// 2. TTL配置 (30分钟过期)
StateTtlConfig
ttlConfig
=
StateTtlConfig
.
newBuilder
(
Duration
.
ofMinutes
(
30
))
.
setUpdateType
(
StateTtlConfig
.
UpdateType
.
OnCreateAndWrite
)
.
cleanupIncrementally
(
1000
,
true
)
.
build
();
descriptor
.
enableTimeToLive
(
ttlConfig
);
latestEventInfoState
=
getRuntimeContext
().
getState
(
descriptor
);
}
@Override
public
void
processElement
(
EventLog
deviceLog
,
Context
ctx
,
Collector
<
EventLog
>
out
)
throws
Exception
{
EventLog
currentLatest
=
latestEventInfoState
.
value
();
// 3. 状态更新逻辑:仅保留最新数据
if
(
currentLatest
==
null
||
deviceLog
.
getWaterMarkTime
()
>
currentLatest
.
getWaterMarkTime
())
{
latestEventInfoState
.
update
(
deviceLog
);
out
.
collect
(
deviceLog
);
}
}
});
// 3. 国内用户数据流处理(5分钟批量更新)
DataStream
<
SimiUserInfo
>
userDataStream
=
userStreamSource
.
flatMap
(
new
FlatMapFunction
<
String
,
SimiUserInfo
>()
{
private
static
final
long
serialVersionUID
=
1L
;
@Override
public
void
flatMap
(
String
value
,
Collector
<
SimiUserInfo
>
out
)
throws
Exception
{
try
{
// 解析 Kafka 数据
SimiUserInfo
simiUserInfo
=
JSONObject
.
parseObject
(
value
,
new
TypeReference
<
SimiUserInfo
>()
{
});
simiUserInfo
.
setUpdateTime
(
TimeConvertUtil
.
convertToTimestamp
(
simiUserInfo
.
getCreate_time
()));
out
.
collect
(
simiUserInfo
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"Error parsing simi_user_list 处理 Kafka 消息出错 | data:{} | error:{}"
,
value
,
e
.
getMessage
());
}
}
})
.
filter
(
u
->
StringUtils
.
isNoneEmpty
(
u
.
getCid
(),
u
.
getPhone_number
()))
.
assignTimestampsAndWatermarks
(
WatermarkStrategy
.<
SimiUserInfo
>
forBoundedOutOfOrderness
(
Duration
.
ofMinutes
(
5
))
.
withTimestampAssigner
((
user
,
ts
)
->
user
.
getUpdateTime
()))
.
keyBy
(
user
->
user
.
getCid
()
+
"#_#"
+
user
.
getPhone_number
()).
process
(
new
LatestUserProcessFunction
());
// 3. 海外用户数据流处理(5分钟批量更新)
DataStream
<
SimiUserInfo
>
abroadUserDataStream
=
abroadUserStreamSource
.
flatMap
(
new
FlatMapFunction
<
String
,
SimiUserInfo
>()
{
private
static
final
long
serialVersionUID
=
1L
;
@Override
public
void
flatMap
(
String
value
,
Collector
<
SimiUserInfo
>
out
)
throws
Exception
{
try
{
// 解析 Kafka 数据
SimiUserInfo
simiUserInfo
=
JSONObject
.
parseObject
(
value
,
new
TypeReference
<
SimiUserInfo
>()
{
});
simiUserInfo
.
setUpdateTime
(
TimeConvertUtil
.
convertToTimestamp
(
simiUserInfo
.
getCreate_time
()));
out
.
collect
(
simiUserInfo
);
}
catch
(
Exception
e
)
{
logger
.
error
(
"Error parsing abroad_simi_user_list 处理 Kafka 消息出错 | data:{} | error:{}"
,
value
,
e
.
getMessage
());
}
}
})
.
filter
(
u
->
StringUtils
.
isNoneEmpty
(
u
.
getCid
(),
u
.
getPhone_number
()))
.
assignTimestampsAndWatermarks
(
WatermarkStrategy
.<
SimiUserInfo
>
forBoundedOutOfOrderness
(
Duration
.
ofMinutes
(
5
))
.
withTimestampAssigner
((
user
,
ts
)
->
user
.
getUpdateTime
()))
.
keyBy
(
user
->
user
.
getCid
()
+
"#_#"
+
user
.
getPhone_number
()).
process
(
new
LatestUserProcessFunction
());
// 4. 关联设备日志和事件日志
DataStream
<
CombinedLog
>
combinedLogStream
=
joinDeviceAndEventLogs
(
deviceLogStream
,
eventLogStream
);
// 5. 关联品牌手机维度表
DataStream
<
EnrichedLog
>
enrichedLogStream
=
enrichWithPhoneModel
(
combinedLogStream
);
DataStream
<
EnrichedLog
>
timeEnrichedStream
=
enrichedLogStream
.
keyBy
(
log
->
log
.
getDeviceIdV1
())
.
process
(
new
DeviceTimeCalculator
());
// 6. 按设备ID和日期分组,取最优记录
DataStream
<
EnrichedLog
>
rankedLogStream
=
rankAndFilter
(
timeEnrichedStream
);
// 8. 关联用户信息
sinkDoris
(
joinUserInfo
(
rankedLogStream
,
userDataStream
));
sinkDoris
(
joinUserInfo
(
rankedLogStream
,
abroadUserDataStream
));
// DataStream<UserDailyActivityOutputLog> outputAppSimiStream = joinUserInfo(rankedLogStream, userDataStream);
// DataStream<UserDailyActivityOutputLog> outputAppAbroadSimiStream = joinUserInfo(rankedLogStream, abroadUserDataStream );
}
public
void
sinkDoris
(
DataStream
<
UserDailyActivityOutputLog
>
outputStream
)
{
//================= 配置入库字段 =================
String
[]
fields
=
{
"device_id"
,
"cid"
,
"app_key"
,
"platform"
,
"app_type"
,
"dt"
,
"country_code"
,
"phone"
,
"nick"
,
"brand"
,
"model"
,
"os_release"
,
"app_version"
,
"ip"
,
"area_name"
,
"network_ip"
,
"network_area_name"
,
"frist_time"
,
"latest_time"
,
"phone_name"
,
"network_model"
,
"device_name"
,
"zone_name"
,
"zone_type"
,
"zone_code"
,
"__DORIS_DELETE_SIGN__"
};
DataType
[]
types
=
{
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
TIMESTAMP
(
3
),
DataTypes
.
TIMESTAMP
(
3
),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
STRING
(),
DataTypes
.
INT
()
};
//================= 创建 Doris Sink =================
String
tableName
=
"bi.daily_activity"
;
DorisSink
<
RowData
>
dorisSink
=
DorisConnector
.
sinkDoris
(
fields
,
types
,
tableName
);
//================= 数据处理流水线 =================
outputStream
.
map
(
log
->
{
try
{
if
(
log
==
null
)
{
return
null
;
}
GenericRowData
row
=
new
GenericRowData
(
fields
.
length
);
// 字段映射
row
.
setField
(
0
,
StringData
.
fromString
(
log
.
getDeviceId
()));
row
.
setField
(
1
,
StringData
.
fromString
(
log
.
getCid
()));
row
.
setField
(
2
,
StringData
.
fromString
(
log
.
getAppKey
()));
row
.
setField
(
3
,
StringData
.
fromString
(
log
.
getPlatform
()));
row
.
setField
(
4
,
StringData
.
fromString
(
log
.
getAppType
()));
row
.
setField
(
5
,
StringData
.
fromString
(
log
.
getDt
()));
row
.
setField
(
6
,
StringData
.
fromString
(
log
.
getCountryCode
()));
row
.
setField
(
7
,
StringData
.
fromString
(
log
.
getPhone
()));
row
.
setField
(
8
,
StringData
.
fromString
(
log
.
getNick
()));
row
.
setField
(
9
,
StringData
.
fromString
(
log
.
getBrand
()));
row
.
setField
(
10
,
StringData
.
fromString
(
log
.
getModel
()));
row
.
setField
(
11
,
StringData
.
fromString
(
log
.
getOsRelease
()));
row
.
setField
(
12
,
StringData
.
fromString
(
log
.
getAppVersion
()));
row
.
setField
(
13
,
StringData
.
fromString
(
log
.
getIp
()));
row
.
setField
(
14
,
StringData
.
fromString
(
log
.
getAreaName
()));
row
.
setField
(
15
,
StringData
.
fromString
(
StringUtils
.
isNotEmpty
(
log
.
getNetworkIp
())
?
log
.
getNetworkIp
()
:
null
));
row
.
setField
(
16
,
StringData
.
fromString
(
StringUtils
.
isNotEmpty
(
log
.
getNetworkAreaName
())
?
log
.
getNetworkAreaName
()
:
null
));
row
.
setField
(
17
,
TimestampData
.
fromEpochMillis
(
log
.
getFirstTime
()));
row
.
setField
(
18
,
TimestampData
.
fromEpochMillis
(
log
.
getLatestTime
()));
row
.
setField
(
19
,
StringData
.
fromString
(
log
.
getPhoneName
()));
row
.
setField
(
20
,
StringData
.
fromString
(
log
.
getNetworkModel
()));
row
.
setField
(
21
,
StringData
.
fromString
(
log
.
getDeviceName
()));
row
.
setField
(
22
,
StringData
.
fromString
(
log
.
getZoneName
()));
row
.
setField
(
23
,
StringData
.
fromString
(
log
.
getZoneType
()));
row
.
setField
(
24
,
StringData
.
fromString
(
log
.
getZoneCode
()));
row
.
setField
(
25
,
0
);
// 删除标记(0=正常数据)
logger
.
info
(
">>>>>>>>>>>>>sinkDoris row:{}"
,
row
.
toString
());
return
(
RowData
)
row
;
}
catch
(
Exception
e
)
{
logger
.
error
(
"数据映射失败: device_id={}, error={}"
,
log
.
getDeviceId
(),
e
.
getMessage
());
return
null
;
}
})
.
filter
(
Objects:
:
nonNull
)
.
sinkTo
(
dorisSink
)
.
name
(
"Doris-DailyActivitySink"
);
}
/**
* 关联设备日志和事件日志
*/
private
DataStream
<
CombinedLog
>
joinDeviceAndEventLogs
(
DataStream
<
DeviceLog
>
deviceLogStream
,
DataStream
<
EventLog
>
eventLogStream
)
{
return
deviceLogStream
.
join
(
eventLogStream
)
.
where
(
device
->
device
.
getDeviceId
()
+
":"
+
device
.
getDt
())
.
equalTo
(
event
->
event
.
getDeviceId
()
+
":"
+
event
.
getDt
())
.
window
(
TumblingEventTimeWindows
.
of
(
Duration
.
ofDays
(
1
)))
.
apply
(
new
JoinFunction
<
DeviceLog
,
EventLog
,
CombinedLog
>()
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
@Override
public
CombinedLog
join
(
DeviceLog
device
,
EventLog
event
)
{
long
firstTime
=
0L
;
long
latestTime
=
0L
;
logger
.
info
(
">>>>>>>>joinDeviceAndEventLogs device:{},event:{}"
,
device
.
toString
(),
event
.
toString
());
return
new
CombinedLog
(
device
.
getDeviceIdV1
(),
device
.
getAppKey
(),
device
.
getAppType
(),
device
.
getDt
(),
device
.
getModel
(),
device
.
getBrand
(),
device
.
getOsRelease
(),
device
.
getAppVersion
(),
event
.
getCid
(),
event
.
getPhone
(),
event
.
getNick
(),
event
.
getIp
(),
event
.
getAreaName
(),
device
.
getWaterMarkTime
(),
device
.
getZoneName
(),
device
.
getZoneType
(),
device
.
getZoneCode
(),
firstTime
,
latestTime
);
}
})
.
assignTimestampsAndWatermarks
(
WatermarkStrategy
.<
CombinedLog
>
forBoundedOutOfOrderness
(
Duration
.
ofMinutes
(
5
))
.
withTimestampAssigner
((
event
,
timestamp
)
->
event
.
getWaterMarkTime
()));
}
/**
* 关联品牌手机维度表
*/
public
DataStream
<
EnrichedLog
>
enrichWithPhoneModel
(
DataStream
<
CombinedLog
>
combinedLogStream
)
{
return
AsyncDataStream
.
orderedWait
(
combinedLogStream
,
new
DatabaseAsyncFunction
(),
30
,
// 超时时间30秒
TimeUnit
.
SECONDS
,
100
// 最大并行请求数
);
}
// 异步JDBC查询实现
private
static
class
DatabaseAsyncFunction
extends
RichAsyncFunction
<
CombinedLog
,
EnrichedLog
>
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
private
transient
Connection
connection
;
private
transient
com
.
google
.
common
.
cache
.
Cache
<
String
,
PhoneModel
>
cache
;
@Override
public
void
open
(
Configuration
parameters
)
throws
Exception
{
// 1. 初始化数据库连接
Class
.
forName
(
"com.mysql.cj.jdbc.Driver"
);
connection
=
DriverManager
.
getConnection
(
"jdbc:mysql://10.0.1.213:3306/spider?useSSL=false&serverTimezone=UTC"
,
"bigdata"
,
"Im#Social&20181*29#im"
);
// 2. 初始化本地缓存(最大1000条,10分钟过期)
cache
=
com
.
google
.
common
.
cache
.
CacheBuilder
.
newBuilder
()
.
maximumSize
(
1000
)
.
expireAfterWrite
(
10
,
TimeUnit
.
MINUTES
)
.
build
();
}
@Override
public
void
asyncInvoke
(
CombinedLog
input
,
ResultFuture
<
EnrichedLog
>
resultFuture
)
{
// 3. 先尝试从缓存获取
PhoneModel
cachedModel
=
cache
.
getIfPresent
(
input
.
getModel
());
if
(
cachedModel
!=
null
)
{
resultFuture
.
complete
(
Collections
.
singleton
(
new
EnrichedLog
(
input
,
cachedModel
.
phoneName
,
cachedModel
.
networkModel
)
));
return
;
}
// 4. 异步数据库查询
CompletableFuture
.
supplyAsync
(()
->
{
try
{
// 5. 缓存未命中则查询数据库
try
(
PreparedStatement
stmt
=
connection
.
prepareStatement
(
"SELECT phone_name, 入网型号 AS network_model FROM brand_phone WHERE 入网型号 = ?"
))
{
stmt
.
setString
(
1
,
input
.
getModel
());
stmt
.
setString
(
2
,
input
.
getModel
());
ResultSet
rs
=
stmt
.
executeQuery
();
if
(
rs
.
next
())
{
logger
.
info
(
">>>>>enrichWithPhoneModel phone_name:{},network_model:{}"
,
rs
.
getString
(
"phone_name"
),
rs
.
getString
(
"network_model"
));
PhoneModel
model
=
new
PhoneModel
(
rs
.
getString
(
"phone_name"
),
rs
.
getString
(
"network_model"
)
);
// 6. 更新缓存
cache
.
put
(
input
.
getModel
(),
model
);
return
model
;
}
}
return
new
PhoneModel
(
null
,
null
);
// 空结果
}
catch
(
Exception
e
)
{
logger
.
error
(
"Async query failed for model: {}"
,
input
.
getModel
(),
e
);
return
new
PhoneModel
(
null
,
null
);
// 降级处理
}
}).
thenAccept
(
model
->
{
// 7. 组装结果
resultFuture
.
complete
(
Collections
.
singleton
(
new
EnrichedLog
(
input
,
model
.
phoneName
,
model
.
networkModel
)
));
});
}
@Override
public
void
close
()
{
if
(
connection
!=
null
)
try
{
connection
.
close
();
}
catch
(
Exception
e
)
{
logger
.
error
(
"DB connection close error"
,
e
);
}
}
// 缓存数据结构
private
static
class
PhoneModel
{
final
String
phoneName
;
final
String
networkModel
;
PhoneModel
(
String
phoneName
,
String
networkModel
)
{
this
.
phoneName
=
phoneName
;
this
.
networkModel
=
networkModel
;
}
}
}
// private DataStream<EnrichedLog> enrichWithPhoneModel(DataStream<CombinedLog> combinedLogStream) {
// return AsyncDataStream.orderedWait(
// combinedLogStream,
// new RichAsyncFunction<CombinedLog, EnrichedLog>() {
// /**
// *
// */
// private static final long serialVersionUID = 1L;
// private transient Connection connection;
//
// @Override
// public void open(Configuration parameters) throws Exception {
// Class.forName("com.mysql.cj.jdbc.Driver");
// connection = DriverManager.getConnection(
// "jdbc:mysql://10.0.1.213:3306/spider?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8&nullCatalogMeansCurrent=true",
// "bigdata", "Im#Social&20181*29#im");
// }
//
// @Override
// public void asyncInvoke(CombinedLog input, ResultFuture<EnrichedLog> resultFuture) {
// CompletableFuture.supplyAsync((Supplier<Void>) () -> {
// try (PreparedStatement stmt = connection.prepareStatement(
// "SELECT phone_name, 入网型号 AS network_model FROM brand_phone WHERE 入网型号 = ?")) {
//
// stmt.setString(1, input.getModel());
// ResultSet rs = stmt.executeQuery();
//
// if (rs.next()) {
// logger.info(">>>>>enrichWithPhoneModel phone_name:{},network_model:{}",rs.getString("phone_name"),rs.getString("network_model"));
// resultFuture.complete(Collections.singleton(
// new EnrichedLog(input,
// rs.getString("phone_name"),
// rs.getString("network_model"))
// ));
// } else {
// resultFuture.complete(Collections.singleton(
// new EnrichedLog(input, null, null)
// ));
// }
// } catch (Exception e) {
// resultFuture.completeExceptionally(e);
//// resultFuture.complete(Collections.singleton(
//// new EnrichedLog(input, null, null)
//// ));
// }
// return null;
// });
// }
//
// @Override
// public void close() {
// if (connection != null) {
// try { connection.close(); }
// catch (Exception e) { logger.error("MySQL connection close error", e); }
// }
// }
// },
// 10, // 超时时间5秒
// TimeUnit.SECONDS,
// 50 // 最大并行请求数
// );
// }
/**
* 按设备ID和日期分组,取最优记录
*/
private
DataStream
<
EnrichedLog
>
rankAndFilter
(
DataStream
<
EnrichedLog
>
enrichedLogStream
)
{
return
enrichedLogStream
.
keyBy
(
log
->
log
.
getDeviceIdV1
()
+
":"
+
log
.
getDt
())
.
process
(
new
KeyedProcessFunction
<
String
,
EnrichedLog
,
EnrichedLog
>()
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
// 状态1:存储每个设备的最优记录(IP+时间策略)
private
MapState
<
String
,
EnrichedLog
>
bestRecordState
;
// 状态2:存储设备的首次/最新时间(跨日期持续更新)
private
MapState
<
String
,
Tuple2
<
Long
,
Long
>>
deviceTimeState
;
@Override
public
void
open
(
Configuration
parameters
)
{
// 初始化最优记录状态
MapStateDescriptor
<
String
,
EnrichedLog
>
bestRecordDesc
=
new
MapStateDescriptor
<>(
"bestRecords"
,
TypeInformation
.
of
(
String
.
class
),
TypeInformation
.
of
(
EnrichedLog
.
class
)
);
StateTtlConfig
ttl
=
StateTtlConfig
.
newBuilder
(
Duration
.
ofDays
(
2
)).
build
();
bestRecordDesc
.
enableTimeToLive
(
ttl
);
bestRecordState
=
getRuntimeContext
().
getMapState
(
bestRecordDesc
);
// 初始化时间状态(永久存储设备生命周期时间)
MapStateDescriptor
<
String
,
Tuple2
<
Long
,
Long
>>
timeDesc
=
new
MapStateDescriptor
<>(
"deviceTimeState"
,
TypeInformation
.
of
(
String
.
class
),
TypeInformation
.
of
(
new
TypeHint
<
Tuple2
<
Long
,
Long
>>()
{})
);
deviceTimeState
=
getRuntimeContext
().
getMapState
(
timeDesc
);
}
@Override
public
void
processElement
(
EnrichedLog
log
,
Context
ctx
,
Collector
<
EnrichedLog
>
out
)
throws
Exception
{
String
key
=
log
.
getDeviceIdV1
()
+
":"
+
log
.
getDt
();
String
deviceKey
=
log
.
getDeviceIdV1
();
// 更新首次/最新时间
Tuple2
<
Long
,
Long
>
times
=
deviceTimeState
.
get
(
deviceKey
);
if
(
times
==
null
)
{
times
=
Tuple2
.
of
(
log
.
getWaterMarkTime
(),
log
.
getWaterMarkTime
());
}
else
{
times
=
Tuple2
.
of
(
Math
.
min
(
times
.
f0
,
log
.
getWaterMarkTime
()),
Math
.
max
(
times
.
f1
,
log
.
getWaterMarkTime
())
);
}
deviceTimeState
.
put
(
deviceKey
,
times
);
// 更新设备时间状态
log
.
setFirstTime
(
times
.
f0
);
// 注入首次时间
log
.
setLatestTime
(
times
.
f1
);
// 注入最新时间
// IP优先级策略筛选最优记录
EnrichedLog
currentBest
=
bestRecordState
.
get
(
key
);
if
(
currentBest
==
null
||
isBetter
(
log
,
currentBest
))
{
logger
.
info
(
">>>>>>>>>>>>>>rankAndFilter key:{},log:{}"
,
key
,
log
.
toString
());
bestRecordState
.
put
(
key
,
log
);
out
.
collect
(
log
);
// 输出当前最优记录
}
}
private
boolean
isBetter
(
EnrichedLog
newLog
,
EnrichedLog
currentBest
)
{
boolean
newValid
=
isValidIp
(
newLog
.
getIp
());
boolean
currentValid
=
isValidIp
(
currentBest
.
getIp
());
if
(
newValid
&&
!
currentValid
)
return
true
;
if
(
newValid
==
currentValid
)
{
return
newLog
.
getWaterMarkTime
()
>
currentBest
.
getWaterMarkTime
();
}
return
false
;
}
private
boolean
isValidIp
(
String
ip
)
{
return
StringUtils
.
isNotEmpty
(
ip
)
&&
!
ip
.
equals
(
"[]"
)
&&
!
ip
.
equals
(
"null"
);
}
});
}
/**
* 关联用户信息
*/
private
DataStream
<
UserDailyActivityOutputLog
>
joinUserInfo
(
DataStream
<
EnrichedLog
>
finalLogStream
,
DataStream
<
SimiUserInfo
>
userDataStream
)
{
return
finalLogStream
.
coGroup
(
userDataStream
)
.
where
(
log
->
log
.
getCid
()
+
"|"
+
log
.
getPhone
())
.
equalTo
(
user
->
user
.
getCid
()
+
"|"
+
user
.
getPhone_number
())
.
window
(
TumblingEventTimeWindows
.
of
(
Duration
.
ofMinutes
(
5
)))
.
apply
(
new
CoGroupFunction
<
EnrichedLog
,
SimiUserInfo
,
UserDailyActivityOutputLog
>()
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
@Override
public
void
coGroup
(
Iterable
<
EnrichedLog
>
logs
,
Iterable
<
SimiUserInfo
>
users
,
Collector
<
UserDailyActivityOutputLog
>
out
)
{
// 1. 预处理用户数据(取最新或第一条)
String
countryCode
=
null
;
for
(
SimiUserInfo
user
:
users
)
{
if
(
user
.
getCountry_code
()
!=
null
)
{
countryCode
=
user
.
getCountry_code
();
break
;
// 取第一个有效值
}
}
// 处理主数据流
for
(
EnrichedLog
log
:
logs
)
{
UserDailyActivityOutputLog
output
=
new
UserDailyActivityOutputLog
();
// 基础字段映射
output
.
setDeviceId
(
log
.
getDeviceIdV1
());
output
.
setCid
(
log
.
getCid
());
output
.
setAppKey
(
log
.
getAppKey
());
output
.
setPlatform
(
getPlatformName
(
log
.
getAppKey
()));
output
.
setAppType
(
log
.
getAppType
());
output
.
setDt
(
log
.
getDt
());
output
.
setCountryCode
(
countryCode
);
output
.
setPhone
(
log
.
getPhone
());
output
.
setNick
(
log
.
getNick
());
output
.
setBrand
(
log
.
getBrand
());
output
.
setModel
(
log
.
getModel
());
output
.
setOsRelease
(
log
.
getOsRelease
());
output
.
setAppVersion
(
log
.
getAppVersion
());
output
.
setIp
(
log
.
getIp
());
output
.
setAreaName
(
log
.
getAreaName
());
output
.
setFirstTime
(
log
.
getFirstTime
());
output
.
setLatestTime
(
log
.
getLatestTime
());
output
.
setPhoneName
(
log
.
getPhoneName
());
output
.
setNetworkModel
(
log
.
getNetworkModel
());
output
.
setDeviceName
(
log
.
getBrand
()
+
"-"
+
log
.
getPhoneName
());
output
.
setZoneName
(
log
.
getZoneName
());
output
.
setZoneType
(
log
.
getZoneType
());
output
.
setZoneCode
(
log
.
getZoneCode
());
// 关联用户维度数据
// for (SimiUserInfo user : users) {
// output.setCountryCode(user.getCountry_code());
// break; // 取第一个匹配项
// }
logger
.
info
(
">>>>>>>>>>joinUserInfo output:{}"
,
output
.
toString
());
out
.
collect
(
output
);
}
}
});
}
private
String
getPlatformName
(
String
appKey
)
{
switch
(
appKey
)
{
case
"ptyzTPaV207"
:
return
"私米安卓国内版"
;
case
"giHQ1YLp925"
:
return
"私米IOS国内版"
;
default
:
return
"未知平台"
;
}
}
// 设备时间计算器
public
static
class
DeviceTimeCalculator
extends
KeyedProcessFunction
<
String
,
EnrichedLog
,
EnrichedLog
>
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
private
ValueState
<
Tuple2
<
Long
,
Long
>>
timeState
;
@Override
public
void
open
(
Configuration
parameters
)
{
ValueStateDescriptor
<
Tuple2
<
Long
,
Long
>>
descriptor
=
new
ValueStateDescriptor
<>(
"deviceTimes"
,
TypeInformation
.
of
(
new
TypeHint
<
Tuple2
<
Long
,
Long
>>()
{}));
StateTtlConfig
ttlConfig
=
StateTtlConfig
.
newBuilder
(
Duration
.
ofDays
(
30
)).
build
();
descriptor
.
enableTimeToLive
(
ttlConfig
);
timeState
=
getRuntimeContext
().
getState
(
descriptor
);
}
@Override
public
void
processElement
(
EnrichedLog
log
,
Context
ctx
,
Collector
<
EnrichedLog
>
out
)
throws
Exception
{
Tuple2
<
Long
,
Long
>
times
=
timeState
.
value
();
long
eventTime
=
log
.
getWaterMarkTime
();
if
(
times
==
null
)
{
times
=
Tuple2
.
of
(
eventTime
,
eventTime
);
}
else
{
times
.
f0
=
Math
.
min
(
times
.
f0
,
eventTime
);
times
.
f1
=
Math
.
max
(
times
.
f1
,
eventTime
);
}
timeState
.
update
(
times
);
logger
.
info
(
">>>>>>>>>DeviceTimeCalculator FirstTime:{},LatestTime:{}"
,
times
.
f0
,
times
.
f1
);
log
.
setFirstTime
(
times
.
f0
);
log
.
setLatestTime
(
times
.
f1
);
out
.
collect
(
log
);
}
}
}
eagleEye-flink_kafka/src/main/java/com/flink/common/DorisConnector.java
View file @
f0508aeb
...
...
@@ -17,7 +17,7 @@ import com.flink.util.LoadPropertiesFile;
/**
* @author wjs
* @version 创建时间:2024-12-16 18:21:22
* 类说明 sink
Kafka
* 类说明 sink
Doris
*/
public
class
DorisConnector
{
...
...
eagleEye-flink_kafka/src/main/java/com/flink/enums/JobTypeEnum.java
View file @
f0508aeb
...
...
@@ -19,6 +19,7 @@ import java.util.stream.Collectors;
public
enum
JobTypeEnum
{
EVENT_IP_CONVERT
(
"JOB_01"
,
"事件IP转换作业"
),
COMMON_CONSUME_BASE
(
"JOB_02"
,
"公共基础消费采集作业"
),
USER_DAILY_ACTIVITY
(
"JOB_03"
,
"用户日活作业"
),
EVENT_IP_CONVERT_CID
(
"JOB_07"
,
"最新事件IP作业"
),
DEVICE_ID_CID
(
"JOB_08"
,
"最新设备ID作业"
),
...
...
eagleEye-flink_kafka/src/main/java/com/flink/factory/JobProcessorFactory.java
View file @
f0508aeb
...
...
@@ -9,6 +9,7 @@ import com.flink.processor.impl.EventIpLatestProcessor;
import
com.flink.processor.impl.RegistrationCheckProcessor
;
import
com.flink.processor.impl.SimiFriendsProcessor
;
import
com.flink.processor.impl.SimiGroupstProcessor
;
import
com.flink.processor.impl.UserDailyActivityProcessor
;
import
com.flink.processor.impl.VectorAngleCalculationProcessor
;
/**
...
...
@@ -22,6 +23,10 @@ public class JobProcessorFactory {
switch
(
jobType
)
{
case
EVENT_IP_CONVERT:
return
new
EventIpConvertProcessor
();
case
COMMON_CONSUME_BASE:
return
new
CommonConsumeBaseProcessor
();
case
USER_DAILY_ACTIVITY:
return
new
UserDailyActivityProcessor
();
case
EVENT_IP_CONVERT_CID:
return
new
EventIpLatestProcessor
();
case
DEVICE_ID_CID:
...
...
@@ -34,8 +39,6 @@ public class JobProcessorFactory {
return
new
VectorAngleCalculationProcessor
();
case
REGISTRATION_CHECK:
return
new
RegistrationCheckProcessor
();
case
COMMON_CONSUME_BASE:
return
new
CommonConsumeBaseProcessor
();
default
:
throw
new
IllegalArgumentException
(
"未知的Job类型: "
+
jobType
);
}
...
...
eagleEye-flink_kafka/src/main/java/com/flink/processor/impl/CommonConsumeBaseProcessor.java
View file @
f0508aeb
...
...
@@ -30,7 +30,8 @@ public class CommonConsumeBaseProcessor implements JobProcessor{
TopicTypeEnum
.
ODS_USER_INVITATION
,
TopicTypeEnum
.
ODS_EVENT_LOG
,
TopicTypeEnum
.
ODS_COMMUNITY_HISTORY
,
TopicTypeEnum
.
ODS_NEW_COLLECT_LOG
TopicTypeEnum
.
ODS_NEW_COLLECT_LOG
,
TopicTypeEnum
.
OPEN_SIMI_API
}).
map
(
TopicTypeEnum:
:
createKafkaTopic
)
.
collect
(
Collectors
.
toList
());
...
...
eagleEye-flink_kafka/src/main/java/com/flink/processor/impl/UserDailyActivityProcessor.java
0 → 100644
View file @
f0508aeb
package
com
.
flink
.
processor
.
impl
;
import
java.util.Arrays
;
import
java.util.List
;
import
java.util.stream.Collectors
;
import
com.flink.achieve.doris.UserDailyActivityAchi
;
import
com.flink.enums.JobTypeEnum
;
import
com.flink.enums.TopicTypeEnum
;
import
com.flink.processor.JobProcessor
;
import
com.flink.vo.KafkaTopic
;
/**
* @author wjs
* @version 创建时间:2025-7-31 11:03:25
* 类说明
*/
public
class
UserDailyActivityProcessor
implements
JobProcessor
{
@Override
public
void
process
()
throws
Exception
{
new
UserDailyActivityAchi
().
handleDataStreamSource
(
createTopicList
(),
JobTypeEnum
.
USER_DAILY_ACTIVITY
);
}
private
static
List
<
KafkaTopic
>
createTopicList
()
{
return
Arrays
.
stream
(
new
TopicTypeEnum
[]{
TopicTypeEnum
.
ODS_NEW_COLLECT_LOG
,
TopicTypeEnum
.
ODS_EVENT_LOG
,
TopicTypeEnum
.
ODS_PC_EVENT_LOG
,
TopicTypeEnum
.
ODS_PC_COLLECT_LOG
,
TopicTypeEnum
.
SIMI_USER_LIST_TOPIC
,
TopicTypeEnum
.
ABROAD_SIMI_USER_LIST_TOPIC
}).
map
(
TopicTypeEnum:
:
createKafkaTopic
)
.
collect
(
Collectors
.
toList
());
}
}
eagleEye-flink_kafka/src/main/java/com/flink/vo/CombinedLog.java
0 → 100644
View file @
f0508aeb
package
com
.
flink
.
vo
;
import
java.io.Serializable
;
import
lombok.Data
;
import
lombok.ToString
;
/**
* @author wjs
* @version 创建时间:2025-8-1 16:17:44
* 类说明
*/
@Data
@ToString
public
class
CombinedLog
implements
Serializable
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
private
String
deviceIdV1
;
private
String
appKey
;
private
String
appType
;
private
String
dt
;
private
String
model
;
private
String
brand
;
private
String
osRelease
;
private
String
appVersion
;
private
String
cid
;
private
String
phone
;
private
String
nick
;
private
String
ip
;
private
String
areaName
;
private
long
waterMarkTime
;
private
String
zoneName
;
private
String
zoneType
;
private
String
zoneCode
;
private
long
firstTime
;
private
long
latestTime
;
public
CombinedLog
(
String
deviceIdV1
,
String
appKey
,
String
appType
,
String
dt
,
String
model
,
String
brand
,
String
osRelease
,
String
appVersion
,
String
cid
,
String
phone
,
String
nick
,
String
ip
,
String
areaName
,
long
waterMarkTime
,
String
zoneName
,
String
zoneType
,
String
zoneCode
,
long
firstTime
,
long
latestTime
)
{
super
();
this
.
deviceIdV1
=
deviceIdV1
;
this
.
appKey
=
appKey
;
this
.
appType
=
appType
;
this
.
dt
=
dt
;
this
.
model
=
model
;
this
.
brand
=
brand
;
this
.
osRelease
=
osRelease
;
this
.
appVersion
=
appVersion
;
this
.
cid
=
cid
;
this
.
phone
=
phone
;
this
.
nick
=
nick
;
this
.
ip
=
ip
;
this
.
areaName
=
areaName
;
this
.
waterMarkTime
=
waterMarkTime
;
this
.
zoneName
=
zoneName
;
this
.
zoneType
=
zoneType
;
this
.
zoneCode
=
zoneCode
;
this
.
firstTime
=
firstTime
;
this
.
latestTime
=
latestTime
;
}
}
eagleEye-flink_kafka/src/main/java/com/flink/vo/DeviceLog.java
0 → 100644
View file @
f0508aeb
package
com
.
flink
.
vo
;
import
java.io.Serializable
;
import
lombok.Data
;
import
lombok.ToString
;
/**
* @author wjs
* @version 创建时间:2025-7-31 16:20:48
* 类说明
*/
@Data
@ToString
public
class
DeviceLog
implements
Serializable
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
private
String
deviceId
;
private
String
deviceIdV1
;
private
String
appKey
;
private
String
uniqueId
;
private
String
appType
;
private
String
dt
;
private
String
model
;
private
String
brand
;
private
String
osRelease
;
private
String
appVersion
;
private
Long
waterMarkTime
;
private
String
zoneName
;
private
String
zoneType
;
private
String
zoneCode
;
public
DeviceLog
(
String
deviceId
,
String
deviceIdV1
,
String
appKey
,
String
uniqueId
,
String
appType
,
String
dt
,
String
model
,
String
brand
,
String
osRelease
,
String
appVersion
,
Long
waterMarkTime
,
String
zoneName
,
String
zoneType
,
String
zoneCode
)
{
this
.
deviceId
=
deviceId
;
this
.
deviceIdV1
=
deviceIdV1
;
this
.
appKey
=
appKey
;
this
.
uniqueId
=
uniqueId
;
this
.
appType
=
appType
;
this
.
dt
=
dt
;
this
.
model
=
model
;
this
.
brand
=
brand
;
this
.
osRelease
=
osRelease
;
this
.
appVersion
=
appVersion
;
this
.
waterMarkTime
=
waterMarkTime
;
this
.
zoneName
=
zoneName
;
this
.
zoneType
=
zoneType
;
this
.
zoneCode
=
zoneCode
;
}
}
eagleEye-flink_kafka/src/main/java/com/flink/vo/EnrichedLog.java
0 → 100644
View file @
f0508aeb
package
com
.
flink
.
vo
;
import
java.io.Serializable
;
import
lombok.Data
;
import
lombok.ToString
;
/**
* @author wjs
* @version 创建时间:2025-8-1 16:24:00
* 类说明
*/
@Data
@ToString
(
callSuper
=
true
)
public
class
EnrichedLog
extends
CombinedLog
implements
Serializable
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
private
String
phoneName
;
private
String
networkModel
;
/**
* 核心构造方法:基于原始日志对象和维度数据创建增强日志
* @param baseLog 原始日志对象
* @param phoneName 品牌手机名称(可空)
* @param networkModel 入网型号(可空)
*/
public
EnrichedLog
(
CombinedLog
baseLog
,
String
phoneName
,
String
networkModel
)
{
// 调用父类构造方法初始化基础字段
super
(
baseLog
.
getDeviceIdV1
(),
baseLog
.
getAppKey
(),
baseLog
.
getAppType
(),
baseLog
.
getDt
(),
baseLog
.
getModel
(),
baseLog
.
getBrand
(),
baseLog
.
getOsRelease
(),
baseLog
.
getAppVersion
(),
baseLog
.
getCid
(),
baseLog
.
getPhone
(),
baseLog
.
getNick
(),
baseLog
.
getIp
(),
baseLog
.
getAreaName
(),
baseLog
.
getWaterMarkTime
(),
baseLog
.
getZoneName
(),
baseLog
.
getZoneType
(),
baseLog
.
getZoneCode
(),
baseLog
.
getFirstTime
(),
baseLog
.
getLatestTime
()
);
this
.
phoneName
=
phoneName
;
this
.
networkModel
=
networkModel
;
}
}
eagleEye-flink_kafka/src/main/java/com/flink/vo/EventLog.java
0 → 100644
View file @
f0508aeb
package
com
.
flink
.
vo
;
import
java.io.Serializable
;
import
lombok.Data
;
import
lombok.ToString
;
/**
* @author wjs
* @version 创建时间:2025-8-1 15:07:12 类说明
*/
@Data
@ToString
public
class
EventLog
implements
Serializable
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
private
String
deviceId
;
private
String
uniqueId
;
private
String
cid
;
private
String
phone
;
private
String
nick
;
private
String
ip
;
private
String
areaName
;
private
String
eventTime
;
private
Long
waterMarkTime
;
private
String
appKey
;
private
String
appType
;
private
String
createTime
;
private
String
dt
;
public
EventLog
(
String
deviceId
,
String
uniqueId
,
String
cid
,
String
phone
,
String
nick
,
String
ip
,
String
areaName
,
String
eventTime
,
Long
waterMarkTime
,
String
appKey
,
String
appType
,
String
createTime
,
String
dt
)
{
this
.
deviceId
=
deviceId
;
this
.
uniqueId
=
uniqueId
;
this
.
cid
=
cid
;
this
.
phone
=
phone
;
this
.
nick
=
nick
;
this
.
ip
=
ip
;
this
.
areaName
=
areaName
;
this
.
eventTime
=
eventTime
;
this
.
waterMarkTime
=
waterMarkTime
;
this
.
appKey
=
appKey
;
this
.
appType
=
appType
;
this
.
createTime
=
createTime
;
this
.
dt
=
dt
;
}
}
eagleEye-flink_kafka/src/main/java/com/flink/vo/UserDailyActivityOutputLog.java
0 → 100644
View file @
f0508aeb
package
com
.
flink
.
vo
;
import
java.io.Serializable
;
import
lombok.Data
;
import
lombok.ToString
;
/**
* @author wjs
* @version 创建时间:2025-8-1 18:17:36
* 类说明
*/
@Data
@ToString
public
class
UserDailyActivityOutputLog
implements
Serializable
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
private
String
deviceId
;
private
String
cid
;
private
String
appKey
;
private
String
platform
;
private
String
appType
;
private
String
dt
;
private
String
countryCode
;
private
String
phone
;
private
String
nick
;
private
String
brand
;
private
String
model
;
private
String
osRelease
;
private
String
appVersion
;
private
String
ip
;
private
String
areaName
;
private
String
networkIp
;
private
String
networkAreaName
;
private
Long
firstTime
;
private
Long
latestTime
;
private
String
phoneName
;
private
String
networkModel
;
private
String
deviceName
;
private
String
zoneName
;
private
String
zoneType
;
private
String
zoneCode
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment