Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
eagleEye
/
eagleEye-flink_kafka
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
93042482
authored
Sep 11, 2025
by
魏建枢
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
采集用户行为宽表
parent
27bba8ff
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
285 additions
and
241 deletions
eagleEye-flink_kafka/src/main/java/com/flink/achieve/base/CommonConsumeBaseAchi.java
eagleEye-flink_kafka/src/main/java/com/flink/achieve/table/InitCreateTempKafkaSqlTable.java
eagleEye-flink_kafka/src/main/java/com/flink/achieve/table/RegistrationCheckAchi.java
eagleEye-flink_kafka/src/main/java/com/flink/achieve/table/schema/DorisBaseSchema.java
eagleEye-flink_kafka/src/main/java/com/flink/achieve/table/schema/KafkaBaseSchema.java
eagleEye-flink_kafka/src/main/java/com/flink/achieve/table/sql/BaseKafkaToSqlCreateTable.java
eagleEye-flink_kafka/src/main/java/com/flink/achieve/table/sql/SysLogTable.java
eagleEye-flink_kafka/src/main/java/com/flink/achieve/table/udf/ParseSysLogListUDTF.java
eagleEye-flink_kafka/src/main/java/com/flink/common/SourceCommonBase.java
eagleEye-flink_kafka/src/main/java/com/flink/common/StreamEnvironmentSettings.java
eagleEye-flink_kafka/src/main/java/com/flink/enums/TopicTypeEnum.java
eagleEye-flink_kafka/src/main/java/com/flink/processor/impl/CommonConsumeSqlBaseProcessor.java
eagleEye-flink_kafka/src/main/java/com/flink/util/SqlLoader.java
eagleEye-flink_kafka/src/main/resources/sql-config.yaml
eagleEye-flink_kafka/src/main/java/com/flink/achieve/base/CommonConsumeBaseAchi.java
View file @
93042482
...
...
@@ -59,6 +59,10 @@ public class CommonConsumeBaseAchi extends SourceCommonBase implements Serializa
DataStreamSource
<
String
>
exceptionEventStreamSource
=
kafkaDataSource
.
getDataStreamSource
();
ExceptionEventAchi
.
exceptionEvent
(
exceptionEventStreamSource
);
}
if
(
StringUtils
.
equals
(
kafkaDataSource
.
getTopic
(),
TopicTypeEnum
.
ODS_COLLECT_USER_BEHAVIOR
.
getTopic
()))
{
DataStreamSource
<
String
>
collectUserBehaviorStreamSource
=
kafkaDataSource
.
getDataStreamSource
();
CollectUserBehaviorAchi
.
collectUserBehavior
(
collectUserBehaviorStreamSource
);
}
}
}
else
{
return
;
...
...
eagleEye-flink_kafka/src/main/java/com/flink/achieve/table/InitCreateTempKafkaSqlTable.java
0 → 100644
View file @
93042482
package
com
.
flink
.
achieve
.
table
;
import
java.io.Serializable
;
import
java.util.List
;
import
org.apache.flink.api.common.io.ParseException
;
import
org.apache.flink.streaming.api.datastream.DataStreamSource
;
import
org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
;
import
org.apache.flink.table.api.bridge.java.StreamTableEnvironment
;
import
com.flink.common.SourceCommonBase
;
import
com.flink.enums.TopicTypeEnum
;
import
com.flink.util.SqlLoader
;
import
com.flink.vo.KafkaDataSource
;
/**
* @author wjs
* @version 创建时间:2025-9-11 14:34:19
* 类说明
*/
public
class
InitCreateTempKafkaSqlTable
extends
SourceCommonBase
implements
Serializable
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
@Override
public
void
parseSourceKafkaJson
(
DataStreamSource
<
String
>
dataStreamSource
)
throws
ParseException
,
Exception
{
// TODO Auto-generated method stub
}
@Override
public
void
parseSourceKafkaToSqlTable
(
TopicTypeEnum
topicTypeEnum
,
StreamTableEnvironment
tableEnv
,
SqlLoader
loader
)
throws
ParseException
,
Exception
{
// TODO Auto-generated method stub
}
@Override
public
void
parseMultipleSourceKafkaJson
(
List
<
KafkaDataSource
>
dataSourceList
)
throws
ParseException
,
Exception
{
// TODO Auto-generated method stub
}
@Override
public
void
parseMultipleSourceKafkaToSqlTable
(
StreamExecutionEnvironment
env
,
StreamTableEnvironment
tableEnv
,
SqlLoader
loader
)
throws
ParseException
,
Exception
{
String
insertSql
=
loader
.
getSql
(
"collect_user_behavior"
);
tableEnv
.
executeSql
(
insertSql
);
}
}
eagleEye-flink_kafka/src/main/java/com/flink/achieve/table/RegistrationCheckAchi.java
View file @
93042482
...
...
@@ -65,9 +65,11 @@ public class RegistrationCheckAchi extends SourceCommonBase implements Serializa
// String sql = loader.getSql("registrationCheck");
// System.out.println("Join SQL:\n" + sql);
//执行SQL
Table
result
=
tableEnv
.
sqlQuery
(
"SELECT * FROM content_interaction_view
"
);
// Table result = tableEnv.sqlQuery("SELECT * FROM kafka_collect_user_behavior
");
result
.
execute
().
print
();
Table
kafkaTable
=
tableEnv
.
from
(
"kafka_collect_user_behavior"
);
kafkaTable
.
execute
().
print
();
}
}
eagleEye-flink_kafka/src/main/java/com/flink/achieve/table/schema/DorisBaseSchema.java
0 → 100644
View file @
93042482
package
com
.
flink
.
achieve
.
table
.
schema
;
import
static
org
.
apache
.
flink
.
table
.
api
.
DataTypes
.
STRING
;
import
static
org
.
apache
.
flink
.
table
.
api
.
DataTypes
.
TIMESTAMP
;
import
java.util.UUID
;
import
org.apache.doris.flink.sink.writer.LoadConstants
;
import
org.apache.flink.table.api.Schema
;
import
org.apache.flink.table.api.TableDescriptor
;
import
org.apache.flink.table.api.bridge.java.StreamTableEnvironment
;
import
com.flink.util.LoadPropertiesFile
;
/**
* @author wjs
* @version 创建时间:2025-9-11 15:08:07
* 类说明
*/
public
class
DorisBaseSchema
{
//-----------------------------------------------------创建TableDescriptor--------------------------------------------------------------------------
/**
*
* @param tableEnv 表环境
* @param schema 表结构
* @param tempTableName tempTbaleName
* @param dbTableName bi.tableName
*/
public
static
void
createTableDescriptor
(
StreamTableEnvironment
tableEnv
,
Schema
schema
,
String
tempTableName
,
String
dbTableName
)
{
String
uniqueLabelPrefix
=
"doris_label_"
+
UUID
.
randomUUID
().
toString
();
TableDescriptor
descriptor
=
TableDescriptor
.
forConnector
(
"doris"
)
.
schema
(
schema
)
.
option
(
"connector"
,
"doris"
)
.
option
(
"fenodes"
,
LoadPropertiesFile
.
getPropertyFileValues
(
"doris.fe"
))
.
option
(
"table.identifier"
,
dbTableName
)
.
option
(
"username"
,
LoadPropertiesFile
.
getPropertyFileValues
(
"doris.username"
))
.
option
(
"password"
,
""
)
.
option
(
"sink.properties.format"
,
LoadConstants
.
JSON
)
.
option
(
"sink.properties.read_json_by_line"
,
"true"
)
.
option
(
"sink.properties.strip_outer_array"
,
"false"
)
.
option
(
"sink.enable-2pc"
,
"true"
)
.
option
(
"sink.enable-delete"
,
"true"
)
.
option
(
"sink.label-prefix"
,
"doris_label_"
+
uniqueLabelPrefix
)
// 以下是性能调优相关参数,可根据实际情况调整
.
option
(
"sink.buffer-flush.max-rows"
,
"10000"
)
// 重点修改:必须 >=10000
.
option
(
"sink.buffer-flush.interval"
,
"10s"
)
.
option
(
"sink.buffer-size"
,
"4194304"
)
// 可调整为4MB
.
option
(
"sink.buffer-count"
,
"3"
)
.
option
(
"sink.enable.batch-mode"
,
"true"
)
// 保持开启以优化性能
.
option
(
"sink.max-retries"
,
"3"
)
// .option("sink.buffer-size", "4194304") // 缓冲区大小(字节),例如4MB
// .option("sink.buffer-count", "3") // 缓冲区条数
// .option("sink.buffer-flush.interval", "10s") // 刷新间隔(毫秒),例如10秒
// .option("sink.max-retries", "3") // 重试次数
.
build
();
tableEnv
.
createTemporaryTable
(
tempTableName
,
descriptor
);
}
//-----------------------------------------------------定义 Doris源表结构并创建Schema--------------------------------------------------------------------------
public
static
void
collectUserBehaviorSchema
(
StreamTableEnvironment
tableEnv
,
String
tempTableName
,
String
dbTableName
)
{
Schema
schema
=
Schema
.
newBuilder
()
.
column
(
"id"
,
STRING
())
.
column
(
"device_id"
,
STRING
())
.
column
(
"app_type"
,
STRING
())
.
column
(
"app_key"
,
STRING
())
.
column
(
"cid"
,
STRING
())
.
column
(
"phone"
,
STRING
())
.
column
(
"event_type"
,
STRING
())
.
column
(
"event_time"
,
TIMESTAMP
(
3
))
.
column
(
"view_type"
,
STRING
())
.
column
(
"view_id"
,
STRING
())
.
column
(
"uid"
,
STRING
())
.
column
(
"nick"
,
STRING
())
.
column
(
"unique_id"
,
STRING
())
.
column
(
"create_time"
,
TIMESTAMP
(
3
))
.
column
(
"strategy_group_id"
,
STRING
())
.
column
(
"strategy_version"
,
STRING
())
.
column
(
"send_time"
,
TIMESTAMP
(
3
))
.
column
(
"app_channel"
,
STRING
())
.
column
(
"zone_code"
,
STRING
())
.
column
(
"zone_name"
,
STRING
())
.
column
(
"zone_type"
,
STRING
())
.
column
(
"sdk_version"
,
STRING
())
.
column
(
"user_agent"
,
STRING
())
.
column
(
"user_properties"
,
STRING
())
.
column
(
"route_ip"
,
STRING
())
.
column
(
"ip_name"
,
STRING
())
.
column
(
"area_name"
,
STRING
())
.
column
(
"device_id_v1"
,
STRING
())
.
column
(
"other_info"
,
STRING
())
.
column
(
"device_info"
,
STRING
())
.
column
(
"env_info"
,
STRING
())
.
column
(
"network_ip"
,
STRING
())
.
column
(
"network_area_name"
,
STRING
())
.
column
(
"network_model"
,
STRING
())
.
column
(
"phone_name"
,
STRING
())
.
column
(
"device_name"
,
STRING
())
.
column
(
"brand"
,
STRING
())
.
column
(
"device_model"
,
STRING
())
.
column
(
"os_release"
,
STRING
())
.
column
(
"app_version"
,
STRING
())
.
column
(
"platform"
,
STRING
())
.
column
(
"third_id"
,
STRING
())
.
column
(
"country_code"
,
STRING
())
.
column
(
"register_time"
,
TIMESTAMP
(
3
))
.
column
(
"user_state"
,
STRING
())
.
column
(
"user_head_url"
,
STRING
())
.
column
(
"content"
,
STRING
())
.
column
(
"screen_name"
,
STRING
())
.
column
(
"touch_pressure"
,
STRING
())
.
column
(
"draw_point"
,
STRING
())
.
columnByMetadata
(
"topic"
,
STRING
(),
"topic"
,
true
)
// Kafka 元数据字段
.
watermark
(
"create_time"
,
"create_time - INTERVAL '5' SECOND"
)
// 水印策略
.
build
();
createTableDescriptor
(
tableEnv
,
schema
,
tempTableName
,
dbTableName
);
}
}
eagleEye-flink_kafka/src/main/java/com/flink/achieve/table/schema/KafkaBaseSchema.java
View file @
93042482
...
...
@@ -91,30 +91,6 @@ public class KafkaBaseSchema {
createTableDescriptor
(
tableEnv
,
schema
,
topic
,
groupId
,
tableName
);
}
public
static
void
sysLogSchema
(
StreamTableEnvironment
tableEnv
,
String
topic
,
String
groupId
,
String
tableName
)
{
Schema
schema
=
Schema
.
newBuilder
()
.
column
(
"method"
,
STRING
())
.
column
(
"create_time"
,
TIMESTAMP
(
3
))
.
column
(
"description"
,
STRING
())
.
column
(
"request_method"
,
STRING
())
.
column
(
"title"
,
STRING
())
.
column
(
"operate_url"
,
STRING
())
.
column
(
"operation_log_type"
,
STRING
())
.
column
(
"operate_name"
,
STRING
())
.
column
(
"request_body"
,
STRING
())
.
column
(
"success"
,
STRING
())
.
column
(
"operate_address"
,
STRING
())
.
column
(
"logging_type"
,
STRING
())
.
column
(
"browser"
,
STRING
())
.
column
(
"request_headers"
,
STRING
())
.
column
(
"system_os"
,
STRING
())
.
column
(
"id"
,
STRING
())
.
columnByMetadata
(
"topic"
,
STRING
(),
"topic"
,
true
)
// Kafka 元数据字段
.
watermark
(
"create_time"
,
"create_time - INTERVAL '5' SECOND"
)
// 水印策略
.
build
();
createTableDescriptor
(
tableEnv
,
schema
,
topic
,
groupId
,
tableName
);
}
public
static
void
simiUserSchema
(
StreamTableEnvironment
tableEnv
,
String
topic
,
String
groupId
,
String
tableName
)
{
Schema
schema
=
Schema
.
newBuilder
()
.
column
(
"nick"
,
STRING
())
...
...
@@ -208,4 +184,62 @@ public class KafkaBaseSchema {
.
build
();
createTableDescriptor
(
tableEnv
,
schema
,
topic
,
groupId
,
tableName
);
}
public
static
void
collectUserBehaviorSchema
(
StreamTableEnvironment
tableEnv
,
String
topic
,
String
groupId
,
String
tableName
)
{
Schema
schema
=
Schema
.
newBuilder
()
.
column
(
"id"
,
STRING
())
.
column
(
"device_id"
,
STRING
())
.
column
(
"app_type"
,
STRING
())
.
column
(
"app_key"
,
STRING
())
.
column
(
"cid"
,
STRING
())
.
column
(
"phone"
,
STRING
())
.
column
(
"event_type"
,
STRING
())
.
column
(
"event_time"
,
TIMESTAMP
(
3
))
.
column
(
"view_type"
,
STRING
())
.
column
(
"view_id"
,
STRING
())
.
column
(
"uid"
,
STRING
())
.
column
(
"nick"
,
STRING
())
.
column
(
"unique_id"
,
STRING
())
.
column
(
"create_time"
,
TIMESTAMP
(
3
))
.
column
(
"strategy_group_id"
,
STRING
())
.
column
(
"strategy_version"
,
STRING
())
.
column
(
"send_time"
,
TIMESTAMP
(
3
))
.
column
(
"app_channel"
,
STRING
())
.
column
(
"zone_code"
,
STRING
())
.
column
(
"zone_name"
,
STRING
())
.
column
(
"zone_type"
,
STRING
())
.
column
(
"sdk_version"
,
STRING
())
.
column
(
"user_agent"
,
STRING
())
.
column
(
"user_properties"
,
STRING
())
.
column
(
"route_ip"
,
STRING
())
.
column
(
"ip_name"
,
STRING
())
.
column
(
"area_name"
,
STRING
())
.
column
(
"device_id_v1"
,
STRING
())
.
column
(
"other_info"
,
STRING
())
.
column
(
"device_info"
,
STRING
())
.
column
(
"env_info"
,
STRING
())
.
column
(
"network_ip"
,
STRING
())
.
column
(
"network_area_name"
,
STRING
())
.
column
(
"network_model"
,
STRING
())
.
column
(
"phone_name"
,
STRING
())
.
column
(
"device_name"
,
STRING
())
.
column
(
"brand"
,
STRING
())
.
column
(
"device_model"
,
STRING
())
.
column
(
"os_release"
,
STRING
())
.
column
(
"app_version"
,
STRING
())
.
column
(
"platform"
,
STRING
())
.
column
(
"third_id"
,
STRING
())
.
column
(
"country_code"
,
STRING
())
.
column
(
"register_time"
,
TIMESTAMP
(
3
))
.
column
(
"user_state"
,
STRING
())
.
column
(
"user_head_url"
,
STRING
())
.
column
(
"content"
,
STRING
())
.
column
(
"screen_name"
,
STRING
())
.
column
(
"touch_pressure"
,
STRING
())
.
column
(
"draw_point"
,
STRING
())
// .columnByMetadata("topic", STRING(), "topic", true) // Kafka 元数据字段
.
watermark
(
"create_time"
,
"create_time - INTERVAL '5' SECOND"
)
// 水印策略
.
build
();
createTableDescriptor
(
tableEnv
,
schema
,
topic
,
groupId
,
tableName
);
}
}
eagleEye-flink_kafka/src/main/java/com/flink/achieve/table/sql/BaseKafkaToSqlCreateTable.java
View file @
93042482
...
...
@@ -6,6 +6,7 @@ import org.apache.commons.collections.CollectionUtils;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.flink.table.api.bridge.java.StreamTableEnvironment
;
import
com.flink.achieve.table.schema.DorisBaseSchema
;
import
com.flink.achieve.table.schema.KafkaBaseSchema
;
import
com.flink.enums.TopicTypeEnum
;
import
com.flink.vo.KafkaTopic
;
...
...
@@ -19,7 +20,7 @@ public class BaseKafkaToSqlCreateTable {
public
static
void
kafkaToSqlCreateTable
(
List
<
KafkaTopic
>
kafkaTopicList
,
StreamTableEnvironment
tableEnv
)
{
//1. 时区与配置优化
tableEnv
.
getConfig
().
set
(
"table.local-time-zone"
,
"Asia/Shanghai"
).
set
(
"parallelism.default"
,
"
4
"
);
tableEnv
.
getConfig
().
set
(
"table.local-time-zone"
,
"Asia/Shanghai"
).
set
(
"parallelism.default"
,
"
1
"
);
//2.定义 Kafka源表结构
if
(
CollectionUtils
.
isNotEmpty
(
kafkaTopicList
))
{
...
...
@@ -77,9 +78,9 @@ public class BaseKafkaToSqlCreateTable {
PcCollectLogTable
.
buildPcCollectLogQuery
(
tableEnv
);
}
else
if
(
StringUtils
.
equals
(
kafkaTopic
.
getTopic
(),
TopicTypeEnum
.
ODS_COMMUNITY_HISTORY
.
getTopic
()))
{
}
else
if
(
StringUtils
.
equals
(
kafkaTopic
.
getTopic
(),
TopicTypeEnum
.
ODS_
SYS_LOG
.
getTopic
()))
{
KafkaBaseSchema
.
sysLogSchema
(
tableEnv
,
topic
,
groupId
,
"kafka_sys_log
"
);
SysLogTable
.
buildSysLogQuery
(
tableEnv
);
}
else
if
(
StringUtils
.
equals
(
kafkaTopic
.
getTopic
(),
TopicTypeEnum
.
ODS_
COLLECT_USER_BEHAVIOR
.
getTopic
()))
{
KafkaBaseSchema
.
collectUserBehaviorSchema
(
tableEnv
,
topic
,
groupId
,
"kafka_collect_user_behavior
"
);
DorisBaseSchema
.
collectUserBehaviorSchema
(
tableEnv
,
"doris_collect_user_behavior"
,
"bi.collect_user_behavior"
);
}
}
}
...
...
eagleEye-flink_kafka/src/main/java/com/flink/achieve/table/sql/SysLogTable.java
deleted
100644 → 0
View file @
27bba8ff
package
com
.
flink
.
achieve
.
table
.
sql
;
import
org.apache.flink.table.api.bridge.java.StreamTableEnvironment
;
import
com.flink.achieve.table.udf.ParseSysLogListUDTF
;
/**
* @author wjs
* @version 创建时间:2025-8-19 14:07:56
* 类说明
*/
public
class
SysLogTable
{
public
static
void
buildSysLogQuery
(
StreamTableEnvironment
tableEnv
)
{
tableEnv
.
createTemporarySystemFunction
(
"ParseSysLogList"
,
ParseSysLogListUDTF
.
class
);
//查询sql数据 并执行
tableEnv
.
executeSql
(
"CREATE VIEW sys_log_view AS "
+
"SELECT "
+
" k.operate_address AS network_ip, "
+
" t.collect_info['network_area_name'] AS network_area_name, "
+
" t.collect_info['unique_id'] AS unique_id, "
+
" t.collect_info['device_id'] AS device_id, "
+
" t.collect_info['device_id_v1'] AS device_id_v1, "
+
" t.collect_info['send_type'] AS send_type, "
+
" t.collect_info['zone_name'] AS zone_name, "
+
" t.collect_info['zone_code'] AS zone_code, "
+
" t.collect_info['zone_type'] AS zone_type, "
+
" t.collect_info['send_time'] AS send_time, "
+
" t.collect_info['app_key'] AS app_key, "
+
" t.collect_info['app_type'] AS app_type, "
+
" t.collect_info['cid'] AS cid, "
+
" t.collect_info['phone'] AS phone, "
+
" t.collect_info['nick'] AS nick "
+
"FROM kafka_sys_log AS k "
+
"LEFT JOIN LATERAL TABLE(ParseSysLogList(operate_url,operate_address,request_body,request_headers)) AS t(collect_info) ON TRUE"
);
}
}
eagleEye-flink_kafka/src/main/java/com/flink/achieve/table/udf/ParseSysLogListUDTF.java
deleted
100644 → 0
View file @
27bba8ff
package
com
.
flink
.
achieve
.
table
.
udf
;
import
java.util.Base64
;
import
java.util.HashMap
;
import
java.util.Map
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.flink.table.annotation.DataTypeHint
;
import
org.apache.flink.table.annotation.FunctionHint
;
import
org.apache.flink.table.functions.TableFunction
;
import
org.apache.flink.types.Row
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.alibaba.fastjson.JSON
;
import
com.alibaba.fastjson.JSONObject
;
import
com.alibaba.fastjson.TypeReference
;
import
com.flink.enums.SendTypeEnum
;
import
com.flink.processor.function.GenDeviceIdProcessor
;
import
com.flink.processor.function.UserPropertiesProcessor
;
import
com.flink.util.CompareUtils
;
import
com.flink.util.TimeConvertUtil
;
import
com.flink.util.des.EncryptUtil
;
import
com.flink.util.ip2region.SearcherUtil
;
import
com.flink.vo.DeviceIdInfo
;
import
com.flink.vo.PcDeviceInfo
;
import
com.flink.vo.PcEventInfo
;
import
com.flink.vo.UserProperties
;
/**
* @author wjs
* @version 创建时间:2025-8-20 18:06:47
* 类说明
*/
@FunctionHint
(
output
=
@DataTypeHint
(
"ROW<collect_info MAP<STRING,STRING>>"
))
public
class
ParseSysLogListUDTF
extends
TableFunction
<
Row
>{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
private
static
final
Logger
logger
=
LoggerFactory
.
getLogger
(
ParseSysLogListUDTF
.
class
);
public
void
eval
(
String
operate_url
,
String
operate_address
,
String
request_body
,
String
request_headers
)
{
try
{
//判断路径
if
(!
CompareUtils
.
stringExists
(
operate_url
,
"/api/v1/event"
,
"/api/v1/pcEvent"
))
{
return
;
}
if
(
StringUtils
.
isAllEmpty
(
request_body
,
request_headers
))
{
return
;
}
JSONObject
jsonObjBody
=
JSON
.
parseObject
(
request_body
);
JSONObject
jsonObjHeaders
=
JSON
.
parseObject
(
request_headers
);
//解密
String
bdParams
=
jsonObjBody
.
getString
(
"bd"
);
String
hashDateKeyStr
=
jsonObjHeaders
.
getString
(
"hk0"
).
substring
(
0
,
48
);
String
appKey
=
jsonObjHeaders
.
getString
(
"ak0"
);
String
appType
=
jsonObjHeaders
.
getString
(
"at0"
);
String
sendTime
=
TimeConvertUtil
.
parseToStringSSS
(
jsonObjHeaders
.
getLongValue
(
"st0"
));
String
zoneName
=
new
String
(
StringUtils
.
isNotEmpty
(
jsonObjHeaders
.
getString
(
"zn0"
))?
Base64
.
getDecoder
().
decode
(
jsonObjHeaders
.
getString
(
"zn0"
)):
null
);
String
zoneCode
=
jsonObjHeaders
.
getString
(
"zc0"
);
String
zoneType
=
jsonObjHeaders
.
getString
(
"zt0"
);
String
sendType
=
jsonObjHeaders
.
getString
(
"st1"
);
//bd解密
if
(
StringUtils
.
isAllEmpty
(
bdParams
,
hashDateKeyStr
))
{
return
;
}
String
reqData
=
EncryptUtil
.
desDecrypt
(
bdParams
,
hashDateKeyStr
);
if
(
StringUtils
.
isEmpty
(
reqData
))
{
return
;
}
String
networkAreaName
=
SearcherUtil
.
getCityInfoByFile
(
operate_address
);
//解析请求body和headers
Map
<
String
,
Object
>
collectParams
=
new
HashMap
<>();
collectParams
.
put
(
"send_type"
,
sendType
);
collectParams
.
put
(
"zone_name"
,
zoneName
);
collectParams
.
put
(
"zone_code"
,
zoneCode
);
collectParams
.
put
(
"zone_type"
,
zoneType
);
collectParams
.
put
(
"send_time"
,
sendTime
);
collectParams
.
put
(
"app_key"
,
appKey
);
collectParams
.
put
(
"app_type"
,
appType
);
collectParams
.
put
(
"network_area_name"
,
networkAreaName
);
if
(
StringUtils
.
equals
(
operate_url
,
"/api/v1/pcEvent"
))
{
if
(
CompareUtils
.
stringExists
(
sendType
,
SendTypeEnum
.
DEVICE_INFO
.
getCode
()))
{
PcDeviceInfo
pcDeviceInfo
=
JSONObject
.
parseObject
(
reqData
,
new
TypeReference
<
PcDeviceInfo
>()
{});
String
deviceId
=
pcDeviceInfo
.
getI8
();
collectParams
.
put
(
"unique_id"
,
deviceId
);
collectParams
.
put
(
"device_id"
,
deviceId
);
collectParams
.
put
(
"device_id_v1"
,
deviceId
);
collectParams
.
put
(
"cid"
,
pcDeviceInfo
.
getCid
());
collectParams
.
put
(
"phone"
,
pcDeviceInfo
.
getPhone
());
collectParams
.
put
(
"nick"
,
pcDeviceInfo
.
getNick
());
}
if
(
CompareUtils
.
stringExists
(
sendType
,
SendTypeEnum
.
EVENT_INFO
.
getCode
()))
{
PcEventInfo
pcEventInfo
=
JSONObject
.
parseObject
(
reqData
,
new
TypeReference
<
PcEventInfo
>()
{});
if
(
null
==
pcEventInfo
)
{
return
;
}
String
deviceId
=
pcEventInfo
.
getI8
();
collectParams
.
put
(
"unique_id"
,
deviceId
);
collectParams
.
put
(
"device_id"
,
deviceId
);
collectParams
.
put
(
"device_id_v1"
,
deviceId
);
collectParams
.
put
(
"cid"
,
pcEventInfo
.
getCid
());
collectParams
.
put
(
"phone"
,
pcEventInfo
.
getPhone
());
collectParams
.
put
(
"nick"
,
pcEventInfo
.
getNick
());
}
}
else
if
(
StringUtils
.
equals
(
operate_url
,
"/api/v1/event"
))
{
String
deviceId
=
jsonObjHeaders
.
getString
(
"di0"
);
JSONObject
jsonObj
=
JSON
.
parseObject
(
reqData
);
String
other_info
=
jsonObj
.
getString
(
"i1"
);
String
device_info
=
jsonObj
.
getString
(
"a1"
);
String
env_info
=
jsonObj
.
getString
(
"g1"
);
String
user_properties
=
jsonObj
.
getString
(
"s2"
);
String
unique_id
=
jsonObj
.
getString
(
"s3"
);
if
(
CompareUtils
.
stringExists
(
sendType
,
SendTypeEnum
.
ALL
.
getCode
(),
SendTypeEnum
.
DEVICE_INFO
.
getCode
()
))
{
if
(!
StringUtils
.
isAllEmpty
(
device_info
,
env_info
))
{
DeviceIdInfo
deviceIdInfo
=
GenDeviceIdProcessor
.
genDeviceId
(
appType
,
appKey
,
other_info
,
device_info
,
env_info
);
UserProperties
userProperties
=
UserPropertiesProcessor
.
userPropertiesToJson
(
user_properties
);
collectParams
.
put
(
"unique_id"
,
unique_id
);
collectParams
.
put
(
"device_id"
,
deviceId
);
collectParams
.
put
(
"device_id_v1"
,
deviceIdInfo
.
getDeviceIdV1
());
collectParams
.
put
(
"cid"
,
userProperties
.
getCid
());
collectParams
.
put
(
"phone"
,
userProperties
.
getPhone
());
collectParams
.
put
(
"nick"
,
userProperties
.
getNick
());
}
}
if
(
CompareUtils
.
stringExists
(
sendType
,
SendTypeEnum
.
ALL
.
getCode
(),
SendTypeEnum
.
EVENT_INFO
.
getCode
()
))
{
UserProperties
userProperties
=
UserPropertiesProcessor
.
userPropertiesToJson
(
user_properties
);
collectParams
.
put
(
"unique_id"
,
unique_id
);
collectParams
.
put
(
"device_id"
,
deviceId
);
collectParams
.
put
(
"device_id_v1"
,
null
);
collectParams
.
put
(
"cid"
,
userProperties
.
getCid
());
collectParams
.
put
(
"phone"
,
userProperties
.
getPhone
());
collectParams
.
put
(
"nick"
,
userProperties
.
getNick
());
}
}
if
(
null
!=
collectParams
)
{
collect
(
Row
.
of
(
collectParams
));
}
}
catch
(
Exception
e
)
{
logger
.
error
(
"ParseSysLogListUDTF eval e:{}"
,
e
.
toString
());
}
}
}
eagleEye-flink_kafka/src/main/java/com/flink/common/SourceCommonBase.java
View file @
93042482
...
...
@@ -40,7 +40,7 @@ public abstract class SourceCommonBase {
List
<
KafkaTopic
>
kafkaTopicList
=
Arrays
.
asList
(
new
KafkaTopic
(
topicTypeEnum
.
getGroup
(),
topicTypeEnum
.
getTopic
())
);
StreamTableEnvironment
tableEnv
=
StreamEnvironmentSettings
.
createTableEnv
(
env
,
jobTypeEnum
.
getCode
()
,
kafkaTopicList
,
false
);
StreamTableEnvironment
tableEnv
=
StreamEnvironmentSettings
.
createTableEnv
(
env
,
jobTypeEnum
,
kafkaTopicList
,
false
);
logger
.
info
(
"2. table的环境设置成功"
);
//3.资源配置文件信息的获取
SqlLoader
loader
=
new
SqlLoader
();
...
...
@@ -65,13 +65,13 @@ public abstract class SourceCommonBase {
* @param jobType
* @param useTableAPI
*/
public
void
multipleExecuteJob
(
List
<
KafkaTopic
>
kafkaTopicList
,
JobTypeEnum
job
Name
,
boolean
useTableAPI
,
boolean
useStreamAPI
)
throws
Exception
{
public
void
multipleExecuteJob
(
List
<
KafkaTopic
>
kafkaTopicList
,
JobTypeEnum
job
TypeEnum
,
boolean
useTableAPI
,
boolean
useStreamAPI
)
throws
Exception
{
//1. 环境的设置
StreamExecutionEnvironment
env
=
StreamEnvironmentSettings
.
createStreamEnv
(
job
Name
.
getCode
());
StreamExecutionEnvironment
env
=
StreamEnvironmentSettings
.
createStreamEnv
(
job
TypeEnum
.
getCode
());
logger
.
info
(
"1. 环境的设置成功"
);
if
(
useTableAPI
)
{
//2.table环境设置
StreamTableEnvironment
tableEnv
=
StreamEnvironmentSettings
.
createTableEnv
(
env
,
job
Name
.
getCode
()
,
kafkaTopicList
,
false
);
StreamTableEnvironment
tableEnv
=
StreamEnvironmentSettings
.
createTableEnv
(
env
,
job
TypeEnum
,
kafkaTopicList
,
false
);
logger
.
info
(
"2. table的环境设置成功"
);
//3.资源配置文件信息的获取
//初始化加载器 加载配置文件
...
...
@@ -97,8 +97,9 @@ public abstract class SourceCommonBase {
//3.Kafka资源ETL
parseMultipleSourceKafkaJson
(
dataSourceList
);
logger
.
info
(
"3.Kafka资源ETL操作成功"
);
env
.
execute
(
jobTypeEnum
.
getDescription
());
}
env
.
execute
(
jobName
.
getDescription
());
}
/**
...
...
eagleEye-flink_kafka/src/main/java/com/flink/common/StreamEnvironmentSettings.java
View file @
93042482
...
...
@@ -21,6 +21,7 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
com.flink.achieve.table.sql.BaseKafkaToSqlCreateTable
;
import
com.flink.enums.JobTypeEnum
;
import
com.flink.util.LoadPropertiesFile
;
import
com.flink.vo.KafkaTopic
;
...
...
@@ -103,8 +104,8 @@ public class StreamEnvironmentSettings {
}
//创建支持嵌套JSON的Table环境
public
static
StreamTableEnvironment
createTableEnv
(
StreamExecutionEnvironment
env
,
String
jobType
,
List
<
KafkaTopic
>
kafkaTopicList
,
boolean
enableHiveCatalog
)
{
logger
.
info
(
"StreamEnvironmentSettings createTableEnv start jobType:{}"
,
jobType
);
public
static
StreamTableEnvironment
createTableEnv
(
StreamExecutionEnvironment
env
,
JobTypeEnum
jobTypeEnum
,
List
<
KafkaTopic
>
kafkaTopicList
,
boolean
enableHiveCatalog
)
{
logger
.
info
(
"StreamEnvironmentSettings createTableEnv start jobType:{}"
,
jobType
Enum
.
getCode
()
);
// Blink Planner配置
EnvironmentSettings
settings
=
EnvironmentSettings
.
newInstance
()
// 使用 Builder 模式
...
...
@@ -122,6 +123,7 @@ public class StreamEnvironmentSettings {
tableConfig
.
set
(
TableConfigOptions
.
TABLE_SQL_DIALECT
,
"DEFAULT"
);
// 或 "HIVE"
//动态加载生产配置
tableConfig
.
addConfiguration
(
getProductionConfig
());
tableConfig
.
set
(
"pipeline.name"
,
jobTypeEnum
.
getDescription
());
//按需注册 Hive Catalog(SQL方式)暂时用不上
// if (enableHiveCatalog) {
// tableEnv.executeSql(
...
...
@@ -136,7 +138,7 @@ public class StreamEnvironmentSettings {
//获取kafka数据转为table表
BaseKafkaToSqlCreateTable
.
kafkaToSqlCreateTable
(
kafkaTopicList
,
tableEnv
);
logger
.
info
(
"StreamEnvironmentSettings createTableEnv end jobType:{}"
,
jobType
);
logger
.
info
(
"StreamEnvironmentSettings createTableEnv end jobType:{}"
,
jobType
Enum
.
getCode
()
);
return
tableEnv
;
}
...
...
@@ -148,7 +150,7 @@ public class StreamEnvironmentSettings {
prodConfig
.
set
(
TaskManagerOptions
.
NETWORK_MEMORY_MIN
,
MemorySize
.
parse
(
"512mb"
));
prodConfig
.
set
(
TaskManagerOptions
.
NETWORK_MEMORY_MAX
,
MemorySize
.
parse
(
"512mb"
));
prodConfig
.
set
(
TaskManagerOptions
.
TOTAL_PROCESS_MEMORY
,
MemorySize
.
ofMebiBytes
(
4096L
));
prodConfig
.
set
(
TaskManagerOptions
.
NUM_TASK_SLOTS
,
4
);
prodConfig
.
set
(
TaskManagerOptions
.
NUM_TASK_SLOTS
,
1
);
return
prodConfig
;
}
}
\ No newline at end of file
eagleEye-flink_kafka/src/main/java/com/flink/enums/TopicTypeEnum.java
View file @
93042482
...
...
@@ -8,7 +8,7 @@ import com.flink.vo.KafkaTopic;
* 类说明
*/
public
enum
TopicTypeEnum
{
DWD_SYS_LOG
(
"dwd_sys_log"
,
"dwdSysLog"
),
DWD_SYS_LOG
(
"dwd_sys_log"
,
"dwdSysLog"
),
//弃用
ODS_EVENT_LOG
(
"ods_event_log"
,
"eventLogGroup"
),
ODS_NEW_COLLECT_LOG
(
"ods_new_collect_log"
,
"odsNewCollectLog"
),
ODS_ZIPPER_STRATEGY
(
"ods_zipper_strategy"
,
"odsZipperStrategy"
),
...
...
@@ -30,7 +30,7 @@ public enum TopicTypeEnum {
ODS_PC_EVENT_LOG
(
"ods_pc_event_log"
,
"odsPcEventLog"
),
ODS_PC_COLLECT_LOG
(
"ods_pc_collect_log"
,
"odsPcCollectLog"
),
ODS_COMMUNITY_HISTORY
(
"ods_community_history"
,
"odsCommunityHistory"
),
ODS_SYS_LOG
(
"ods_sys_log"
,
"odsSysLog"
),
ODS_SYS_LOG
(
"ods_sys_log"
,
"odsSysLog"
),
//弃用
ODS_COLLECT_USER_BEHAVIOR
(
"ods_collect_user_behavior"
,
"odsCollectUserBehavior"
),
ODS_EXCEPTION_EVENT_TOPIC
(
"ods_exception_event_topic"
,
"odsExceptionEventTopic"
),
...
...
eagleEye-flink_kafka/src/main/java/com/flink/processor/impl/CommonConsumeSqlBaseProcessor.java
View file @
93042482
...
...
@@ -4,7 +4,7 @@ import java.util.Arrays;
import
java.util.List
;
import
java.util.stream.Collectors
;
import
com.flink.achieve.table.
TableSqlSinkKafkaAchi
;
import
com.flink.achieve.table.
InitCreateTempKafkaSqlTable
;
import
com.flink.enums.JobTypeEnum
;
import
com.flink.enums.TopicTypeEnum
;
import
com.flink.processor.JobProcessor
;
...
...
@@ -19,7 +19,7 @@ public class CommonConsumeSqlBaseProcessor implements JobProcessor{
@Override
public
void
process
()
throws
Exception
{
new
TableSqlSinkKafkaAchi
().
multipleExecuteJob
(
new
InitCreateTempKafkaSqlTable
().
multipleExecuteJob
(
createTopicList
(),
JobTypeEnum
.
COMMON_CONSUME_SQL_BASE
,
true
,
...
...
@@ -29,7 +29,15 @@ public class CommonConsumeSqlBaseProcessor implements JobProcessor{
private
static
List
<
KafkaTopic
>
createTopicList
()
{
return
Arrays
.
stream
(
new
TopicTypeEnum
[]{
TopicTypeEnum
.
ODS_SYS_LOG
// TopicTypeEnum.ODS_EVENT_LOG,
// TopicTypeEnum.ODS_NEW_COLLECT_LOG,
// TopicTypeEnum.SIMI_USER_LIST_TOPIC,
// TopicTypeEnum.ABROAD_SIMI_USER_LIST_TOPIC,
// TopicTypeEnum.OPEN_SIMI_API,
// TopicTypeEnum.ODS_PC_EVENT_LOG,
// TopicTypeEnum.ODS_PC_COLLECT_LOG,
// TopicTypeEnum.ODS_COMMUNITY_HISTORY,
TopicTypeEnum
.
ODS_COLLECT_USER_BEHAVIOR
}).
map
(
TopicTypeEnum:
:
createKafkaTopic
)
.
collect
(
Collectors
.
toList
());
...
...
eagleEye-flink_kafka/src/main/java/com/flink/util/SqlLoader.java
View file @
93042482
package
com
.
flink
.
util
;
import
java.io.InputStream
;
import
java.io.Serializable
;
import
java.util.HashMap
;
import
java.util.Map
;
...
...
@@ -11,7 +12,11 @@ import org.apache.flink.shaded.jackson2.org.yaml.snakeyaml.Yaml;
* @version 创建时间:2025-8-26 14:55:56
* 类说明
*/
public
class
SqlLoader
{
public
class
SqlLoader
implements
Serializable
{
/**
*
*/
private
static
final
long
serialVersionUID
=
1L
;
private
final
Map
<
String
,
String
>
sqlMap
=
new
HashMap
<>();
/**
...
...
eagleEye-flink_kafka/src/main/resources/sql-config.yaml
View file @
93042482
...
...
@@ -5,3 +5,8 @@ queries:
event_log_view
:
SELECT * FROM event_log_view
collect_user_behavior
:
INSERT INTO doris_collect_user_behavior
SELECT *
FROM kafka_collect_user_behavior
WHERE event_type NOT IN ('socket_event', 'socket_error', 'socket_time', 'refresh_token', 'all_time', 'enter_act', 'exit_act', 'show_act', 'activity_red_exposure')
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment