Commit d894f28f by 魏建枢

双流join

parent db8a7d63
Showing with 310 additions and 49 deletions
......@@ -32,17 +32,17 @@
<!--通用依赖 -->
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.12</artifactId>
<version>1.20.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java</artifactId>
<version>1.20.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.12</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- json -->
<dependency>
<groupId>com.alibaba</groupId>
......@@ -81,7 +81,11 @@
<artifactId>flink-connector-files</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-json</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc</artifactId>
......@@ -127,7 +131,7 @@
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime-web</artifactId>
<version>1.20.0</version>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
......@@ -166,12 +170,12 @@
<version>2.6.4</version>
</dependency>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.8</version>
<scope>system</scope>
<!-- 根据 JDK 版本选择路径 -->
<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.8</version>
<scope>system</scope>
<!-- 根据 JDK 版本选择路径 -->
<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
</dependency>
</dependencies>
......
......@@ -32,7 +32,7 @@ import com.flink.vo.UserProperties;
/**
* @author wjs
* @version 创建时间:2025-5-6 16:01:23
* 类说明
* 类说明 https://www.bookstack.cn/read/flink-1.20-zh/60d184d31c0a61a5.md
*/
public class EventIpConvertAchi extends SourceCommonBase implements Serializable{
......
package com.flink.common;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.collections.CollectionUtils;
import org.apache.flink.api.common.io.ParseException;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.flink.enums.JobTypeEnum;
import com.flink.vo.KafkaDataSource;
import com.flink.vo.KafkaTopic;
/**
* @author wjs
* @version 创建时间:2024-12-20 10:43:56
* 类说明 抽象类对接kafka的数据,并解析关键字段
*/
public abstract class MultipleSourceCommonBase {
private static final Logger logger = LoggerFactory.getLogger(MultipleSourceCommonBase.class);
public void handleDataStreamSource(List<KafkaTopic> kafkaTopicList,JobTypeEnum jobName) throws Exception {
//1. 环境的设置
StreamExecutionEnvironment env = EnvironmentSettings.environmentSettings(jobName.getCode());
logger.info("1. 环境的设置成功");
//2.资源配置文件信息的获取
List<KafkaDataSource> dataSourceList = new ArrayList<>();
if(CollectionUtils.isNotEmpty(kafkaTopicList)) {
for(KafkaTopic kafkaTopic : kafkaTopicList) {
KafkaDataSource kafkaDataSource = new KafkaDataSource();
String topic = kafkaTopic.getTopic();
String group = kafkaTopic.getGroup();
DataStreamSource<String> dataStreamSource = KafkaConnector.sourceKafka(env, topic, group);
kafkaDataSource.setDataStreamSource(dataStreamSource);
kafkaDataSource.setTopic(topic);
dataSourceList.add(kafkaDataSource);
}
}
logger.info("2.资源配置文件信息的获取成功");
//3.Kafka资源ETL
parseSourceKafkaJson(dataSourceList);
logger.info("3.Kafka资源ETL操作成功");
env.execute(jobName.getDescription());
}
/**
* 解析JSON数据(抽象方法的设置)
* @param dataSourceList
* @throws ParseException
* @throws Exception
*/
public abstract void parseSourceKafkaJson(List<KafkaDataSource> dataSourceList) throws ParseException, Exception;
}
......@@ -6,7 +6,8 @@ import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject;
import com.flink.enums.JobTypeEnum;
import com.flink.enums.TopicTypeEnum;
/**
* @author wjs
......@@ -16,17 +17,17 @@ import com.alibaba.fastjson.JSONObject;
public abstract class SourceCommonBase {
private static final Logger logger = LoggerFactory.getLogger(SourceCommonBase.class);
public void handleDataStreamSource(String jobName,String topic,String group,String jobType) throws Exception {
public void handleDataStreamSource(JobTypeEnum jobTypeEnum,TopicTypeEnum topicTypeEnum) throws Exception {
//1. 环境的设置
StreamExecutionEnvironment env = EnvironmentSettings.environmentSettings(jobType);
StreamExecutionEnvironment env = EnvironmentSettings.environmentSettings(jobTypeEnum.getCode());
logger.info("1. 环境的设置成功");
//2.资源配置文件信息的获取
DataStreamSource<String> dataStreamSource = KafkaConnector.sourceKafka(env, topic, group);
DataStreamSource<String> dataStreamSource = KafkaConnector.sourceKafka(env, topicTypeEnum.getTopic(), topicTypeEnum.getGroup());
logger.info("2.资源配置文件信息的获取成功");
//3.Kafka资源ETL
parseSourceKafkaJson(dataStreamSource);
logger.info("3.Kafka资源ETL操作成功");
env.execute(jobName);
env.execute(jobTypeEnum.getDescription());
}
/**
......
......@@ -23,6 +23,7 @@ public enum JobTypeEnum {
REAL_USERS("JOB_04", "真实用户作业"),
REAL_BALANCE("JOB_05", "真实余额作业"),
COLLECT_LOG("JOB_06", "日志采集作业"),
EVENT_IP_CONVERT_CID("JOB_07", "最新事件IP作业"),
;
......
package com.flink.enums;
import com.flink.vo.KafkaTopic;
/**
* @author wjs
......@@ -23,6 +24,7 @@ public enum TopicTypeEnum {
ODS_CID_GROUP_OVERLAP("ods_cid_group_overlap","odsCidGroupOverlap"),
ODS_EVENT_IP_CONVERT("ods_event_ip_convert","odsEventIpConvert"),
ODS_USER_INVITATION("ods_user_invitation","odsUserInvitation"),
SIMI_USER_LIST_TOPIC("simi_user_list","simiUserList"),
;
private String topic;
......@@ -34,6 +36,13 @@ public enum TopicTypeEnum {
this.group = group;
}
public KafkaTopic createKafkaTopic() {
KafkaTopic obj = new KafkaTopic();
obj.setTopic(this.topic);
obj.setGroup(this.group);
return obj;
}
public String getTopic() {
return topic;
}
......
......@@ -3,6 +3,7 @@ package com.flink.factory;
import com.flink.enums.JobTypeEnum;
import com.flink.processor.JobProcessor;
import com.flink.processor.impl.CollectLogProcessor;
import com.flink.processor.impl.EventIpLatestProcessor;
import com.flink.processor.impl.EventIpConvertProcessor;
import com.flink.processor.impl.RealBalanceProcessor;
import com.flink.processor.impl.RealKycProcessor;
......@@ -30,6 +31,8 @@ public class JobProcessorFactory {
return new RealBalanceProcessor();
case COLLECT_LOG:
return new CollectLogProcessor();
case EVENT_IP_CONVERT_CID:
return new EventIpLatestProcessor();
default:
throw new IllegalArgumentException("未知的Job类型: " + jobType);
}
......
......@@ -15,10 +15,8 @@ public class CollectLogProcessor implements JobProcessor{
@Override
public void process() throws Exception {
new CollectLogAchi().handleDataStreamSource(
JobTypeEnum.COLLECT_LOG.getDescription(),
TopicTypeEnum.ODS_NEW_COLLECT_LOG.getTopic(),
TopicTypeEnum.ODS_NEW_COLLECT_LOG.getGroup(),
JobTypeEnum.COLLECT_LOG.getCode()
JobTypeEnum.COLLECT_LOG,
TopicTypeEnum.ODS_NEW_COLLECT_LOG
);
}
......
......@@ -15,10 +15,8 @@ public class EventIpConvertProcessor implements JobProcessor{
@Override
public void process() throws Exception {
new EventIpConvertAchi().handleDataStreamSource(
JobTypeEnum.EVENT_IP_CONVERT.getDescription(),
TopicTypeEnum.ODS_EVENT_LOG.getTopic(),
TopicTypeEnum.ODS_EVENT_LOG.getGroup(),
JobTypeEnum.EVENT_IP_CONVERT.getCode()
JobTypeEnum.EVENT_IP_CONVERT,
TopicTypeEnum.ODS_EVENT_LOG
);
}
......
package com.flink.processor.impl;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import com.flink.achieve.doris.EventIpLatestAchi;
import com.flink.enums.JobTypeEnum;
import com.flink.enums.TopicTypeEnum;
import com.flink.processor.JobProcessor;
import com.flink.vo.KafkaTopic;
/**
* @author wjs
* @version 创建时间:2025-5-26 14:39:44
* 类说明
*/
public class EventIpLatestProcessor implements JobProcessor{
@Override
public void process() throws Exception {
new EventIpLatestAchi().handleDataStreamSource(
createTopicList(),
JobTypeEnum.EVENT_IP_CONVERT_CID
);
}
private static List<KafkaTopic> createTopicList() {
return Arrays.stream(new TopicTypeEnum[]{
TopicTypeEnum.ODS_EVENT_LOG,
TopicTypeEnum.SIMI_USER_LIST_TOPIC
}).map(TopicTypeEnum::createKafkaTopic)
.collect(Collectors.toList());
}
}
......@@ -15,10 +15,8 @@ public class RealBalanceProcessor implements JobProcessor{
@Override
public void process() throws Exception {
new RealBalanceAchi().handleDataStreamSource(
JobTypeEnum.REAL_BALANCE.getDescription(),
TopicTypeEnum.ODS_USER_INVITATION.getTopic(),
TopicTypeEnum.ODS_USER_INVITATION.getGroup(),
JobTypeEnum.REAL_BALANCE.getCode()
JobTypeEnum.REAL_BALANCE,
TopicTypeEnum.ODS_USER_INVITATION
);
}
......
......@@ -15,10 +15,8 @@ public class RealKycProcessor implements JobProcessor{
@Override
public void process() throws Exception {
new RealKycAchi().handleDataStreamSource(
JobTypeEnum.REAL_KYC.getDescription(),
TopicTypeEnum.ODS_USER_INVITATION.getTopic(),
TopicTypeEnum.ODS_USER_INVITATION.getGroup(),
JobTypeEnum.REAL_KYC.getCode()
JobTypeEnum.REAL_KYC,
TopicTypeEnum.ODS_USER_INVITATION
);
}
......
......@@ -15,10 +15,8 @@ public class RealTransactionProcessor implements JobProcessor{
@Override
public void process() throws Exception {
new RealTransactionAchi().handleDataStreamSource(
JobTypeEnum.REAL_TRANSACTION.getDescription(),
TopicTypeEnum.ODS_USER_INVITATION.getTopic(),
TopicTypeEnum.ODS_USER_INVITATION.getGroup(),
JobTypeEnum.REAL_TRANSACTION.getCode()
JobTypeEnum.REAL_TRANSACTION,
TopicTypeEnum.ODS_USER_INVITATION
);
}
......
......@@ -15,10 +15,8 @@ public class RealUsersProcessor implements JobProcessor{
@Override
public void process() throws Exception {
new RealUsersAchi().handleDataStreamSource(
JobTypeEnum.REAL_USERS.getDescription(),
TopicTypeEnum.ODS_USER_INVITATION.getTopic(),
TopicTypeEnum.ODS_USER_INVITATION.getGroup(),
JobTypeEnum.REAL_USERS.getCode()
JobTypeEnum.REAL_USERS,
TopicTypeEnum.ODS_USER_INVITATION
);
}
......
package com.flink.util;
import java.text.SimpleDateFormat;
import java.util.TimeZone;
/**
* @author wjs
* @version 创建时间:2025-5-27 14:33:05
* 类说明
*/
public class TimeConvertUtil {
public static long convertToTimestamp(String timeStr) throws Exception {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
sdf.setTimeZone(TimeZone.getTimeZone("UTC"));
return sdf.parse(timeStr).getTime();
}
}
package com.flink.vo;
import java.io.Serializable;
import lombok.Data;
import lombok.ToString;
/**
* @author wjs
* @version 创建时间:2025-5-26 17:47:06
* 类说明
*/
@Data
@ToString
public class EventIp implements Serializable{
/**
*
*/
private static final long serialVersionUID = 1L;
private String ip;
private String areaName;
private String cid;
private String phone;
private String nick;
private String appKey;
private String appType;
private Long eventTime;
private String createTime;
}
package com.flink.vo;
import java.io.Serializable;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import lombok.Data;
import lombok.ToString;
/**
* @author wjs
* @version 创建时间:2025-5-27 15:43:13
* 类说明
*/
@Data
@ToString
public class KafkaDataSource implements Serializable{
/**
*
*/
private static final long serialVersionUID = 1L;
private String topic;
private DataStreamSource<String> dataStreamSource;
}
package com.flink.vo;
import java.io.Serializable;
import lombok.Data;
import lombok.ToString;
/**
* @author wjs
* @version 创建时间:2025-5-27 15:37:52
* 类说明
*/
@Data
@ToString
public class KafkaTopic implements Serializable{
/**
*
*/
private static final long serialVersionUID = 1L;
private String topic;
private String group;
}
package com.flink.vo;
import java.io.Serializable;
import lombok.Data;
import lombok.ToString;
/**
* @author wjs
* @version 创建时间:2025-5-26 17:54:45
* 类说明
*/
@Data
@ToString
public class Result implements Serializable{
/**
*
*/
private static final long serialVersionUID = 1L;
private String cid;
private String phone;
private String ip;
private String areaName;
private String countryCode;
private String userState;
private String nick;
private String appKey;
private String appType;
private String createTime;
}
package com.flink.vo;
import java.io.Serializable;
import lombok.Data;
import lombok.ToString;
/**
* @author wjs
* @version 创建时间:2025-5-26 16:44:29 类说明
*/
@Data
@ToString
public class SimiUserInfo implements Serializable {
/**
*
*/
private static final long serialVersionUID = 1L;
private String nick;
private String country_code;
private String user_head_url;
private String create_time;
private String phone_number;
private String register_time;
private String flume_type;
private String cid;
private String user_state;
public Long updateTime; // 时间戳用于水印
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment