Commit da551c2d by 魏建枢

代码调整

parent 2bd5e5be
...@@ -5,50 +5,6 @@ ...@@ -5,50 +5,6 @@
<artifactId>eagleEye-flink_kafka</artifactId> <artifactId>eagleEye-flink_kafka</artifactId>
<version>1.0-SNAPSHOT</version> <version>1.0-SNAPSHOT</version>
<build> <build>
<resources>
<resource>
<directory>src/main/resources</directory>
</resource>
<resource>
<directory>src/main/java</directory>
<includes>
<include>**/*.xml</include>
</includes>
</resource>
</resources>
<finalName>${project.name}</finalName>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<version>${spring.boot.version}</version>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
<configuration>
<fork>true</fork>
<finalName>${project.build.finalName}</finalName>
</configuration>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>${maven.plugin.version}</version>
<configuration>
<source>${java.version}</source>
<target>${java.version}</target>
<encoding>UTF-8</encoding>
<compilerArgs>
<arg>-parameters</arg>
</compilerArgs>
</configuration>
</plugin>
</plugins>
</pluginManagement>
<plugins> <plugins>
<plugin> <plugin>
<artifactId>maven-shade-plugin</artifactId> <artifactId>maven-shade-plugin</artifactId>
...@@ -62,10 +18,7 @@ ...@@ -62,10 +18,7 @@
<configuration> <configuration>
<artifactSet> <artifactSet>
<excludes> <excludes>
<exclude>org.apache.flink:force-shading</exclude> <exclude>com.google.code.findbugs:jsr305</exclude>
<exclude>org.google.code.flindbugs:jar305</exclude>
<exclude>org.slf4j:*</exclude>
<excluder>org.apache.logging.log4j:*</excluder>
</excludes> </excludes>
</artifactSet> </artifactSet>
<filters> <filters>
...@@ -82,6 +35,7 @@ ...@@ -82,6 +35,7 @@
<transformer> <transformer>
<mainClass>com.flink.KafkaStreamingJob</mainClass> <mainClass>com.flink.KafkaStreamingJob</mainClass>
</transformer> </transformer>
<transformer />
</transformers> </transformers>
</configuration> </configuration>
</execution> </execution>
...@@ -109,38 +63,28 @@ ...@@ -109,38 +63,28 @@
</pluginRepositories> </pluginRepositories>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>2.17.1</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.17.1</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.17.1</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId> <groupId>org.apache.flink</groupId>
<artifactId>flink-runtime-web</artifactId> <artifactId>flink-runtime-web</artifactId>
<version>1.20.0</version> <version>1.20.0</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.8</version>
<scope>system</scope>
<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
</dependency>
</dependencies> </dependencies>
<properties> <properties>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <maven.plugin.version>3.8.1</maven.plugin.version>
<java.version>8</java.version> <flink.version>1.20.0</flink.version>
<scala.binary.version>2.11</scala.binary.version> <scala.binary.version>2.11</scala.binary.version>
<maven.compiler.target>1.8</maven.compiler.target>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<log4j.version>2.17.1</log4j.version> <log4j.version>2.17.1</log4j.version>
<flink.version>1.20.0</flink.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<fastjson.version>1.2.75</fastjson.version> <fastjson.version>1.2.75</fastjson.version>
<maven.plugin.version>3.8.1</maven.plugin.version>
</properties> </properties>
</project> </project>
...@@ -11,7 +11,8 @@ ...@@ -11,7 +11,8 @@
<!--属性设置 --> <!--属性设置 -->
<properties> <properties>
<!--java_JDK版本 --> <!--java_JDK版本 -->
<java.version>8</java.version> <maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<!--maven打包插件 --> <!--maven打包插件 -->
<maven.plugin.version>3.8.1</maven.plugin.version> <maven.plugin.version>3.8.1</maven.plugin.version>
<!--编译编码UTF-8 --> <!--编译编码UTF-8 -->
...@@ -28,8 +29,20 @@ ...@@ -28,8 +29,20 @@
<scala.binary.version>2.11</scala.binary.version> <scala.binary.version>2.11</scala.binary.version>
</properties> </properties>
<!--通用依赖 --> <!--通用依赖 -->
<dependencies> <dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.12</artifactId>
<version>1.20.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java</artifactId>
<version>1.20.0</version>
</dependency>
<!-- json --> <!-- json -->
<dependency> <dependency>
<groupId>com.alibaba</groupId> <groupId>com.alibaba</groupId>
...@@ -80,7 +93,6 @@ ...@@ -80,7 +93,6 @@
<artifactId>mysql-connector-java</artifactId> <artifactId>mysql-connector-java</artifactId>
<version>8.0.27</version> <version>8.0.27</version>
</dependency> </dependency>
<!--================================集成外部依赖========================================== -->
<!--集成日志框架 start --> <!--集成日志框架 start -->
<dependency> <dependency>
<groupId>org.apache.logging.log4j</groupId> <groupId>org.apache.logging.log4j</groupId>
...@@ -99,9 +111,7 @@ ...@@ -99,9 +111,7 @@
<artifactId>log4j-core</artifactId> <artifactId>log4j-core</artifactId>
<version>${log4j.version}</version> <version>${log4j.version}</version>
</dependency> </dependency>
<!--================================集成外部依赖========================================== -->
<!--集成日志框架 end -->
<!--kafka依赖 start --> <!--kafka依赖 start -->
<dependency> <dependency>
<groupId>org.apache.flink</groupId> <groupId>org.apache.flink</groupId>
...@@ -123,14 +133,29 @@ ...@@ -123,14 +133,29 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.doris</groupId> <groupId>org.apache.doris</groupId>
<artifactId>flink-doris-connector-1.16</artifactId> <artifactId>flink-doris-connector-1.20</artifactId>
<version>25.0.0</version> <version>25.0.0</version>
</dependency> </dependency>
<!-- <dependency> --> <!-- <dependency> -->
<!-- <groupId>org.apache.hadoop</groupId> --> <!-- <groupId>org.apache.hadoop</groupId> -->
<!-- <artifactId>hadoop-client</artifactId> --> <!-- <artifactId>hadoop-client</artifactId> -->
<!-- <version>3.4.0</version> --> <!-- <version>3.4.0</version> -->
<!-- </dependency> --> <!-- </dependency> -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.3.6</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>3.3.6</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>3.3.6</version>
</dependency>
<dependency> <dependency>
<groupId>org.projectlombok</groupId> <groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId> <artifactId>lombok</artifactId>
...@@ -141,24 +166,18 @@ ...@@ -141,24 +166,18 @@
<artifactId>ip2region</artifactId> <artifactId>ip2region</artifactId>
<version>2.6.4</version> <version>2.6.4</version>
</dependency> </dependency>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.8</version>
<scope>system</scope>
<!-- 根据 JDK 版本选择路径 -->
<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
</dependency>
</dependencies> </dependencies>
<!--编译打包 -->
<build>
<finalName>${project.name}</finalName>
<!--资源文件打包 -->
<resources>
<resource>
<directory>src/main/resources</directory>
</resource>
<resource>
<directory>src/main/java</directory>
<includes>
<include>**/*.xml</include>
</includes>
</resource>
</resources>
<build>
<plugins> <plugins>
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
...@@ -173,14 +192,13 @@ ...@@ -173,14 +192,13 @@
<configuration> <configuration>
<artifactSet> <artifactSet>
<excludes> <excludes>
<exclude>org.apache.flink:force-shading</exclude> <exclude>com.google.code.findbugs:jsr305</exclude>
<exclude>org.google.code.flindbugs:jar305</exclude>
<exclude>org.slf4j:*</exclude>
<excluder>org.apache.logging.log4j:*</excluder>
</excludes> </excludes>
</artifactSet> </artifactSet>
<filters> <filters>
<filter> <filter>
<!-- Do not copy the signatures in the META-INF folder. Otherwise,
this might cause SecurityExceptions when using the JAR. -->
<artifact>*:*</artifact> <artifact>*:*</artifact>
<excludes> <excludes>
<exclude>META-INF/*.SF</exclude> <exclude>META-INF/*.SF</exclude>
...@@ -192,51 +210,17 @@ ...@@ -192,51 +210,17 @@
<transformers> <transformers>
<transformer <transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<!-- Replace this with the main class of your job -->
<mainClass>com.flink.KafkaStreamingJob</mainClass> <mainClass>com.flink.KafkaStreamingJob</mainClass>
</transformer> </transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
</transformers> </transformers>
</configuration> </configuration>
</execution> </execution>
</executions> </executions>
</plugin> </plugin>
</plugins> </plugins>
<!--插件统一管理 -->
<pluginManagement>
<plugins>
<!--maven打包插件 -->
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<version>${spring.boot.version}</version>
<configuration>
<fork>true</fork>
<finalName>${project.build.finalName}</finalName>
</configuration>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
</plugin>
<!--编译打包插件 -->
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>${maven.plugin.version}</version>
<configuration>
<source>${java.version}</source>
<target>${java.version}</target>
<encoding>UTF-8</encoding>
<compilerArgs>
<arg>-parameters</arg>
</compilerArgs>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build> </build>
<!--配置Maven项目中需要使用的远程仓库 --> <!--配置Maven项目中需要使用的远程仓库 -->
......
...@@ -4,7 +4,6 @@ import org.slf4j.Logger; ...@@ -4,7 +4,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.flink.achieve.ods.OdsEventLogSourceAchi; import com.flink.achieve.ods.OdsEventLogSourceAchi;
import com.flink.common.KafkaSourceConnector;
/** /**
* @author wjs * @author wjs
...@@ -13,28 +12,10 @@ import com.flink.common.KafkaSourceConnector; ...@@ -13,28 +12,10 @@ import com.flink.common.KafkaSourceConnector;
*/ */
public class KafkaStreamingJob { public class KafkaStreamingJob {
private static final Logger logger = LoggerFactory.getLogger(KafkaSourceConnector.class); private static final Logger logger = LoggerFactory.getLogger(KafkaStreamingJob.class);
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
// StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// DataStreamSource<String> dataStreamSource = KafkaSourceConnector.sourceKafka(env, "ods_collect_log", "collectGroup");
// //=================5.数据简单处理======================
// dataStreamSource.flatMap(new FlatMapFunction<String, String>() {
// /**
// *
// */
// private static final long serialVersionUID = 1L;
// @Override
// public void flatMap(String record, Collector<String> collector) throws Exception {
// logger.info("正在预处理源数据:{}", record);
// }
// });
// //=================6.启动服务=========================================
// env.execute("聚合统计JOB");
// OdsCollectLogSourceAchi sourceCommonBase = new OdsCollectLogSourceAchi();
// sourceCommonBase.handleDataStreamSource("测试", "ods_collect_log", "collectGroup");
OdsEventLogSourceAchi sourceEventLog = new OdsEventLogSourceAchi(); OdsEventLogSourceAchi sourceEventLog = new OdsEventLogSourceAchi();
sourceEventLog.handleDataStreamSource("事件日志聚合JOB", "ods_event_log", "eventLogGroup"); sourceEventLog.handleDataStreamSource("事件日志聚合JOB", "ods_event_log", "eventLogGroup");
} }
......
package com.flink.achieve.ods; package com.flink.achieve.ods;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.flink.connector.jdbc.JdbcConnectionOptions;
import org.apache.flink.connector.jdbc.JdbcSink;
import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -16,9 +11,7 @@ import com.alibaba.fastjson.JSONObject; ...@@ -16,9 +11,7 @@ import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.TypeReference; import com.alibaba.fastjson.TypeReference;
import com.flink.common.SourceCommonBase; import com.flink.common.SourceCommonBase;
import com.flink.enums.AppTypeEnum; import com.flink.enums.AppTypeEnum;
import com.flink.util.LoadPropertiesFile;
import com.flink.vo.OdsCollectLog; import com.flink.vo.OdsCollectLog;
import com.flink.vo.EventIpConvert;
import com.flink.vo.android.AndroidCollectionBody; import com.flink.vo.android.AndroidCollectionBody;
import com.flink.vo.android.deviceInfo.AndroidA1; import com.flink.vo.android.deviceInfo.AndroidA1;
import com.flink.vo.android.envInfo.AndroidEnvInfo; import com.flink.vo.android.envInfo.AndroidEnvInfo;
...@@ -41,7 +34,7 @@ public class OdsCollectLogSourceAchi extends SourceCommonBase implements Seriali ...@@ -41,7 +34,7 @@ public class OdsCollectLogSourceAchi extends SourceCommonBase implements Seriali
private static final Logger logger = LoggerFactory.getLogger(OdsCollectLogSourceAchi.class); private static final Logger logger = LoggerFactory.getLogger(OdsCollectLogSourceAchi.class);
@Override @Override
public void parseSourceKafkaJson(String record) throws Exception { public JSONObject parseSourceKafkaJson(String record) throws Exception {
logger.info("record:{}",record); logger.info("record:{}",record);
// TODO 数据的 ETL 处理 // TODO 数据的 ETL 处理
OdsCollectLog odsCollectLog = JSONObject.parseObject(record,new TypeReference<OdsCollectLog>(){}); OdsCollectLog odsCollectLog = JSONObject.parseObject(record,new TypeReference<OdsCollectLog>(){});
...@@ -82,6 +75,7 @@ public class OdsCollectLogSourceAchi extends SourceCommonBase implements Seriali ...@@ -82,6 +75,7 @@ public class OdsCollectLogSourceAchi extends SourceCommonBase implements Seriali
iosBodyObj.setA1(a1); iosBodyObj.setA1(a1);
iosBodyObj.setG1(g1); iosBodyObj.setG1(g1);
} }
return null;
} }
@Override @Override
......
package com.flink.achieve.ods; package com.flink.achieve.ods;
import java.io.Serializable; import java.io.Serializable;
import java.net.Inet6Address;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.io.ParseException; import org.apache.flink.api.common.io.ParseException;
import org.apache.flink.connector.jdbc.JdbcConnectionOptions;
import org.apache.flink.connector.jdbc.JdbcSink;
import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
...@@ -23,9 +15,7 @@ import com.alibaba.fastjson.JSONObject; ...@@ -23,9 +15,7 @@ import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.TypeReference; import com.alibaba.fastjson.TypeReference;
import com.flink.common.SourceCommonBase; import com.flink.common.SourceCommonBase;
import com.flink.enums.AppTypeEnum; import com.flink.enums.AppTypeEnum;
import com.flink.util.LoadPropertiesFile;
import com.flink.util.ip2region.SearcherUtil; import com.flink.util.ip2region.SearcherUtil;
import com.flink.vo.EventIpConvert;
import com.flink.vo.OdsEventLog; import com.flink.vo.OdsEventLog;
import com.flink.vo.UserProperties; import com.flink.vo.UserProperties;
...@@ -43,7 +33,7 @@ public class OdsEventLogSourceAchi extends SourceCommonBase implements Serializa ...@@ -43,7 +33,7 @@ public class OdsEventLogSourceAchi extends SourceCommonBase implements Serializa
private static final Logger logger = LoggerFactory.getLogger(OdsEventLogSourceAchi.class); private static final Logger logger = LoggerFactory.getLogger(OdsEventLogSourceAchi.class);
@Override @Override
public void parseSourceKafkaJson(String record) throws ParseException, Exception { public JSONObject parseSourceKafkaJson(String record) throws ParseException, Exception {
logger.info("OdsEventLogSourceAchi record:{}",record); logger.info("OdsEventLogSourceAchi record:{}",record);
// TODO 数据的 ETL 处理 // TODO 数据的 ETL 处理
OdsEventLog odsEventLog = JSONObject.parseObject(record,new TypeReference<OdsEventLog>(){}); OdsEventLog odsEventLog = JSONObject.parseObject(record,new TypeReference<OdsEventLog>(){});
...@@ -66,7 +56,7 @@ public class OdsEventLogSourceAchi extends SourceCommonBase implements Serializa ...@@ -66,7 +56,7 @@ public class OdsEventLogSourceAchi extends SourceCommonBase implements Serializa
String routeIp = odsEventLog.getRoute_ip(); String routeIp = odsEventLog.getRoute_ip();
String userProperties = odsEventLog.getUser_properties(); String userProperties = odsEventLog.getUser_properties();
String uniqueId = odsEventLog.getUnique_id(); String uniqueId = odsEventLog.getUnique_id();
logger.info("组装数据 body:{}",odsEventLog.toString());
String cid = null; String cid = null;
String phone = null; String phone = null;
String nick = null; String nick = null;
...@@ -88,40 +78,37 @@ public class OdsEventLogSourceAchi extends SourceCommonBase implements Serializa ...@@ -88,40 +78,37 @@ public class OdsEventLogSourceAchi extends SourceCommonBase implements Serializa
} }
} }
} }
List<String> ips = convertStringToList(routeIp); List<String> ips = SearcherUtil.convertStringToList(routeIp);
if(CollectionUtils.isEmpty(ips)) { if(CollectionUtils.isEmpty(ips)) {
return; return null;
} }
List<String> ipList = new ArrayList<>(); List<String> ipList = new ArrayList<>();
List<String> areaNameList = new ArrayList<>(); List<String> areaNameList = new ArrayList<>();
for(String ip:ips) { for(String ip:ips) {
if(!ipv6(ip)) { if(!SearcherUtil.ipv6(ip)) {
String area_name = SearcherUtil.getCityInfoByFile(ip); String area_name = SearcherUtil.getCityInfoByFile(ip);
ipList.add(ip); ipList.add(ip);
areaNameList.add(area_name); areaNameList.add(area_name);
} }
} }
logger.info("组装数据开始"); logger.info("组装数据开始");
List<EventIpConvert> eventIpConvertList = new ArrayList<>();
EventIpConvert eventIpConvert = new EventIpConvert(); JSONObject jsonObj = new JSONObject();
eventIpConvert.setId(id); jsonObj.put("id", id);
eventIpConvert.setIp(ipList.toString()); jsonObj.put("ips", ips.toString());
eventIpConvert.setAreaName(areaNameList.toString()); jsonObj.put("areaNameList", areaNameList.toString());
eventIpConvert.setDeviceId(deviceId); jsonObj.put("deviceId", deviceId);
eventIpConvert.setCid(cid); jsonObj.put("cid", cid);
eventIpConvert.setPhone(phone); jsonObj.put("phone", phone);
eventIpConvert.setNick(nick); jsonObj.put("nick", nick);
eventIpConvert.setCreateTime(createTime); jsonObj.put("createTime", createTime);
eventIpConvert.setDt(createTime);
eventIpConvertList.add(eventIpConvert);
logger.info("组装数据结束");
this.insert(eventIpConvertList);
if(StringUtils.equals(appType, AppTypeEnum.ANDROID.getCode())) { if(StringUtils.equals(appType, AppTypeEnum.ANDROID.getCode())) {
}else if(StringUtils.equals(appType, AppTypeEnum.IOS.getCode())) { }else if(StringUtils.equals(appType, AppTypeEnum.IOS.getCode())) {
} }
return jsonObj;
} }
@Override @Override
...@@ -130,66 +117,6 @@ public class OdsEventLogSourceAchi extends SourceCommonBase implements Serializa ...@@ -130,66 +117,6 @@ public class OdsEventLogSourceAchi extends SourceCommonBase implements Serializa
} }
public static List<String> convertStringToList(String str) {
if (StringUtils.isEmpty(str) || str.trim().isEmpty()) {
return new ArrayList<>(); // 返回空列表
}
// 去除中括号和多余空格
String trimmedStr = str.replaceAll("^\\[|\\]$", "").trim();
// 如果去除中括号后字符串为空,返回空列表
if (trimmedStr.isEmpty()) {
return new ArrayList<>();
}
// 按逗号拆分并去除空格
return Arrays.stream(trimmedStr.split(","))
.map(String::trim)
.collect(Collectors.toList());
}
private static Boolean ipv6(String ip) {
Boolean flag = false;
try {
Inet6Address address = (Inet6Address) InetAddress.getByName(ip);
System.out.println(ip + " is an IPv6 address.");
flag = true;
} catch (UnknownHostException e) {
System.out.println(ip + " is not a valid IPv6 address.");
} catch (ClassCastException e) {
System.out.println(ip + " is not an IPv6 address.");
}
return flag;
}
public static void insert(List<EventIpConvert> eventIpConvertList) throws Exception {
logger.info("insert eventIpConvertList:{}",eventIpConvertList.toString());
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.fromData(eventIpConvertList)
.addSink(JdbcSink.sink(
"INSERT INTO `bi`.`event_ip_convert` " +
"(`id`, `ip`, `area_name`, `device_id`, `cid`, `phone`, `nick`, `create_time`, `dt`) " +
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);",
(ps, data) -> {
ps.setString(1, data.getId());
ps.setString(2, data.getIp());
ps.setString(3, data.getAreaName());
ps.setString(4, data.getDeviceId());
ps.setString(5, data.getCid());
ps.setString(6, data.getPhone());
ps.setString(7, data.getNick());
ps.setString(8, data.getCreateTime());
ps.setString(9, data.getDt());
},
new JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
.withUsername(LoadPropertiesFile.getPropertyFileValues("doris.username"))
.withPassword("")
.withDriverName(LoadPropertiesFile.getPropertyFileValues("doris.driver_class_name"))
.withUrl(LoadPropertiesFile.getPropertyFileValues("doris.jdbc_url"))
.build()
));
env.execute();
}
} }
package com.flink.common;
import java.util.Properties;
import org.apache.doris.flink.cfg.DorisExecutionOptions;
import org.apache.doris.flink.cfg.DorisOptions;
import org.apache.doris.flink.sink.DorisSink;
import org.apache.doris.flink.sink.writer.LoadConstants;
import org.apache.doris.flink.sink.writer.serializer.RowDataSerializer;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.DataType;
import com.flink.util.LoadPropertiesFile;
/**
* @author wjs
* @version 创建时间:2024-12-16 18:21:22
* 类说明 sink Kafka
*/
public class DorisConnector {
public static DorisSink<RowData> sinkDoris(String[] fields,DataType[] types,String tableName) {
//=================设置属性=========================================
Properties streamLoadProps = new Properties();
streamLoadProps.setProperty("format", "json");
streamLoadProps.setProperty("read_json_by_line", "true");
streamLoadProps.setProperty("strip_outer_array", "false");
streamLoadProps.setProperty("sink.enable-2pc", "true");
//=================Doris Sink 配置=========================================
DorisOptions dorisOptions = DorisOptions.builder()
.setFenodes(LoadPropertiesFile.getPropertyFileValues("doris.fe"))
.setTableIdentifier(tableName)
.setUsername(LoadPropertiesFile.getPropertyFileValues("doris.username"))
.setPassword("")
.build();
//=================流式处理=========================================
DorisSink<RowData> dorisSink = DorisSink.<RowData>builder()
.setDorisOptions(dorisOptions)
.setDorisExecutionOptions(DorisExecutionOptions.builder()
.setLabelPrefix("label-doris")
.setStreamLoadProp(streamLoadProps)
.build())
.setSerializer(RowDataSerializer.builder()
.setFieldNames(fields)
.setFieldType(types)
.setType(LoadConstants.JSON)
.build())
.build();
return dorisSink;
}
}
...@@ -19,23 +19,38 @@ public class EnvironmentSettings { ...@@ -19,23 +19,38 @@ public class EnvironmentSettings {
//环境设置 //环境设置
public static StreamExecutionEnvironment environmentSettings() { public static StreamExecutionEnvironment environmentSettings() {
Configuration conf = new Configuration(); Configuration config = new Configuration();
conf.setInteger("rest.port", 8081); // config.setString("parallelism.default", "4");
// config.setString("taskmanager.numberOfTaskSlots", "16");
// config.setString("taskmanager.memory.flink.size", "8192m");
// config.setString("taskmanager.memory.jvm-metaspace.size", "4096m");
config.setString("state.backend", "filesystem");
// 指定检查点目录(必须是持久化存储路径,如 HDFS)
config.setString("state.checkpoints.dir", "hdfs://140.245.112.44:8020/user/ck");
// 可选:指定保存点目录
config.setString("state.savepoints.dir", "hdfs://140.245.112.44:8020/user/savepoints");
// conf.setInteger("rest.port", 8081);
// StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(conf); // StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(conf);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(config);
env.getCheckpointConfig().disableCheckpointing();
// env.setParallelism(4); // 调整并行度
//=================启动服务========================================= //=================启动服务=========================================
//开启flink的checkpoint功能:每隔5000ms启动一个检查点(设置checkpoint的声明周期) //开启flink的checkpoint功能:每隔5000ms启动一个检查点(设置checkpoint的声明周期)
//设置有且仅有一次模式 目前支持EXACTLY_ONCE/AT_LEAST_ONCE //设置有且仅有一次模式 目前支持EXACTLY_ONCE/AT_LEAST_ONCE
env.enableCheckpointing(60000L, CheckpointingMode.EXACTLY_ONCE); env.enableCheckpointing(180000L, CheckpointingMode.EXACTLY_ONCE);
// env.getCheckpointConfig().setCheckpointingConsistencyMode(CheckpointingMode.EXACTLY_ONCE); // env.getCheckpointConfig().setCheckpointingConsistencyMode(CheckpointingMode.EXACTLY_ONCE);
//checkpoint高级选项设置 //checkpoint高级选项设置
//设置checkpoint的模式为exactly-once(这也是默认值) //设置checkpoint的模式为exactly-once(这也是默认值)
//确保检查点之间至少有500ms间隔(即checkpoint的最小间隔) //确保检查点之间至少有500ms间隔(即checkpoint的最小间隔)
env.getCheckpointConfig().setMinPauseBetweenCheckpoints(500); env.getCheckpointConfig().setMinPauseBetweenCheckpoints(50000);
//确保检查必须在1min之内完成,否则就会被丢弃掉(即checkpoint的超时时间) //确保检查必须在1min之内完成,否则就会被丢弃掉(即checkpoint的超时时间)
env.getCheckpointConfig().setCheckpointTimeout(60000); env.getCheckpointConfig().setCheckpointInterval(30000); // 30秒检查点间隔
env.getCheckpointConfig().setCheckpointTimeout(600000);
//同一时间只允许操作一个检查点 //同一时间只允许操作一个检查点
env.getCheckpointConfig().setMaxConcurrentCheckpoints(1); env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
env.getCheckpointConfig().setTolerableCheckpointFailureNumber(3); // 允许3次失败
// 在这个基础之上,添加快照 // 在这个基础之上,添加快照
//开启在 job 中止后仍然保留的 externalizedcheckpoints 程序即使被cancel后,也会保留checkpoint数据,以便根据实际需要恢复到指定的checkpoint //开启在 job 中止后仍然保留的 externalizedcheckpoints 程序即使被cancel后,也会保留checkpoint数据,以便根据实际需要恢复到指定的checkpoint
env.getCheckpointConfig().setExternalizedCheckpointRetention(ExternalizedCheckpointRetention.RETAIN_ON_CANCELLATION); env.getCheckpointConfig().setExternalizedCheckpointRetention(ExternalizedCheckpointRetention.RETAIN_ON_CANCELLATION);
...@@ -43,6 +58,7 @@ public class EnvironmentSettings { ...@@ -43,6 +58,7 @@ public class EnvironmentSettings {
env.setRuntimeMode(RuntimeExecutionMode.STREAMING); env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
//开启checkpoints //开启checkpoints
env.getCheckpointConfig().enableUnalignedCheckpoints(); env.getCheckpointConfig().enableUnalignedCheckpoints();
// env.disableOperatorChaining();
return env; return env;
} }
} }
...@@ -21,8 +21,8 @@ import com.flink.util.LoadPropertiesFile; ...@@ -21,8 +21,8 @@ import com.flink.util.LoadPropertiesFile;
* @version 创建时间:2024-12-16 15:57:41 * @version 创建时间:2024-12-16 15:57:41
* 类说明 source Kafka * 类说明 source Kafka
*/ */
public class KafkaSourceConnector { public class KafkaConnector {
private static final Logger logger = LoggerFactory.getLogger(KafkaSourceConnector.class); private static final Logger logger = LoggerFactory.getLogger(KafkaConnector.class);
//kafka资源配置文件信息的获取 //kafka资源配置文件信息的获取
public static DataStreamSource<String> sourceKafka(StreamExecutionEnvironment env,String topic,String group) { public static DataStreamSource<String> sourceKafka(StreamExecutionEnvironment env,String topic,String group) {
...@@ -30,11 +30,11 @@ public class KafkaSourceConnector { ...@@ -30,11 +30,11 @@ public class KafkaSourceConnector {
.setBootstrapServers(LoadPropertiesFile.getPropertyFileValues("kafka.bootstrapServers"))//设置kafka地址 .setBootstrapServers(LoadPropertiesFile.getPropertyFileValues("kafka.bootstrapServers"))//设置kafka地址
.setTopics(topic)//设置主题,支持多种主题组合 .setTopics(topic)//设置主题,支持多种主题组合
.setGroupId(group)//设置消费这组id .setGroupId(group)//设置消费这组id
.setValueOnlyDeserializer(new SafeStringDeserializer()) // 自定义容错反序列化器
.setStartingOffsets(OffsetsInitializer.earliest())//设置消费模式,支持多种消费模式 .setStartingOffsets(OffsetsInitializer.earliest())//设置消费模式,支持多种消费模式
.setValueOnlyDeserializer(new SimpleStringSchema())//设置反序列化器 .setProperty("partition.discovery.interval.ms", "10000")//动态检查新分区, 30 秒检查一次新分区
.setProperty("partition.discovery.interval.ms", "10000")//动态检查新分区, 10 秒检查一次新分区
.build();//构建全部参数 .build();//构建全部参数
return env.fromSource(kafkaSource, WatermarkStrategy.noWatermarks(), "odsCollectLog"); return env.fromSource(kafkaSource, WatermarkStrategy.noWatermarks(), topic);
} }
//发送kafka //发送kafka
......
package com.flink.common;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.base.DeliveryGuarantee;
import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema;
import org.apache.flink.connector.kafka.sink.KafkaSink;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import com.flink.util.LoadPropertiesFile;
/**
* @author wjs
* @version 创建时间:2024-12-16 18:21:22
* 类说明 sink Kafka
*/
public class KafkaSinkConnector {
public static void sinkKafka(StreamExecutionEnvironment env,String topic) {
String bootstrapServers = LoadPropertiesFile.getPropertyFileValues("kafka.bootstrapServers");
KafkaSink<String> sink = KafkaSink.<String>builder()
//设置kafka地址
.setBootstrapServers(bootstrapServers)
//设置消息序列号方式
.setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic(topic)
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
//至少一次
.setDeliveryGuarantee(DeliveryGuarantee.AT_LEAST_ONCE)
.build();
}
}
package com.flink.common;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
/**
* @author wjs
* @version 创建时间:2025-4-25 19:59:53
* 类说明
*/
public class SafeStringDeserializer extends SimpleStringSchema {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public String deserialize(byte[] message) {
try {
return super.deserialize(message);
} catch (Exception e) {
// 记录死信到指定Topic
return null;
}
}
}
package com.flink.common; package com.flink.common;
import org.apache.flink.api.common.functions.FlatMapFunction; import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Objects;
import org.apache.doris.flink.sink.DorisSink;
import org.apache.flink.api.common.io.ParseException; import org.apache.flink.api.common.io.ParseException;
import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector; import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.data.StringData;
import org.apache.flink.table.data.TimestampData;
import org.apache.flink.table.types.DataType;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.alibaba.fastjson.JSONObject;
/** /**
* @author wjs * @author wjs
* @version 创建时间:2024-12-20 10:43:56 * @version 创建时间:2024-12-20 10:43:56
...@@ -20,35 +33,50 @@ public abstract class SourceCommonBase { ...@@ -20,35 +33,50 @@ public abstract class SourceCommonBase {
//1. 环境的设置 //1. 环境的设置
StreamExecutionEnvironment env = EnvironmentSettings.environmentSettings(); StreamExecutionEnvironment env = EnvironmentSettings.environmentSettings();
//2.资源配置文件信息的获取 //2.资源配置文件信息的获取
DataStreamSource<String> dataStreamSource = KafkaSourceConnector.sourceKafka(env, topic, group); DataStreamSource<String> dataStreamSource = KafkaConnector.sourceKafka(env, topic, group);
//3.消费者接收数据并做json的粗粒度解析
DataStreamSource<String> resultDataStreamSource = praseJson(dataStreamSource);
//4.将处理完之后的数据发往 kafka 队列
// sendToSinkKafka(resultDataStreamSource);
//=================6.启动服务=========================================
env.execute(jobName);
}
/** //=================配置入库字段=========================================
* 数据的 ETL 处理 String[] fields = {"id", "ip", "area_name", "device_id", "cid", "phone", "nick","create_time","dt","__DORIS_DELETE_SIGN__"};
* @return DataType[] types = {DataTypes.STRING(), DataTypes.STRING(), DataTypes.STRING(), DataTypes.STRING(),
*/ DataTypes.STRING(), DataTypes.STRING(), DataTypes.STRING(),DataTypes.STRING(),DataTypes.TIMESTAMP()};
public DataStreamSource<String> praseJson(DataStreamSource<String> dataStreamSource) { //=================流式处理=========================================
//=================5.数据简单处理====================== String tableName = "bi.event_ip_convert";
dataStreamSource.flatMap(new FlatMapFunction<String, String>() { DorisSink<RowData> dorisSink = DorisConnector.sinkDoris(fields, types, tableName);
/**
* // 数据处理流水线
*/ dataStreamSource
private static final long serialVersionUID = 1L; .map(value -> {
@Override try {
public void flatMap(String record, Collector<String> collector) throws Exception { JSONObject jsonObj = parseSourceKafkaJson(value);
logger.info("正在预处理源数据:{}", record); if(null == jsonObj) {
// 抽象方法的设置 return null;
parseSourceKafkaJson(record);
} }
}); // String[] parts = parsed.split(",");
GenericRowData row = new GenericRowData(9);
DateTimeFormatter formatter = DateTimeFormatter.ISO_INSTANT;
System.out.println("value" + value);
// 字段映射
row.setField(0, StringData.fromString((String)jsonObj.get("id")));
row.setField(1, StringData.fromString((String) jsonObj.get("ips")));
row.setField(2, StringData.fromString((String) jsonObj.get("areaNameList")));
row.setField(3, StringData.fromString((String) jsonObj.get("deviceId")));
row.setField(4, StringData.fromString((String) jsonObj.get("cid")));
row.setField(5, StringData.fromString((String) jsonObj.get("phone")));
row.setField(6, StringData.fromString((String) jsonObj.get("nick")));
row.setField(7, StringData.fromString((String) jsonObj.get("createTime")));
row.setField(8, TimestampData.fromInstant(Instant.now()));
return (RowData)row;
} catch (Exception e) {
System.err.println("解析失败: "+e.toString());
return null; return null;
} }
})
.filter(Objects::nonNull)
// .print(">>>>>>>>>>>>>>>");
.sinkTo(dorisSink);
env.execute(jobName);
}
/** /**
* 解析JSON数据(抽象方法的设置) * 解析JSON数据(抽象方法的设置)
...@@ -57,7 +85,7 @@ public abstract class SourceCommonBase { ...@@ -57,7 +85,7 @@ public abstract class SourceCommonBase {
* @throws ParseException * @throws ParseException
* @throws Exception * @throws Exception
*/ */
public abstract void parseSourceKafkaJson(String record) throws ParseException, Exception; public abstract JSONObject parseSourceKafkaJson(String record) throws ParseException, Exception;
/** /**
* 将处理完之后的数据发往 kafka 队列 供下游计算使用(抽象方法的设置) * 将处理完之后的数据发往 kafka 队列 供下游计算使用(抽象方法的设置)
......
package com.flink.util.ip2region; package com.flink.util.ip2region;
import java.io.IOException; import java.io.IOException;
import java.net.Inet6Address;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.lionsoul.ip2region.xdb.Searcher; import org.lionsoul.ip2region.xdb.Searcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import lombok.extern.log4j.Log4j2;
/** /**
* @author wjs * @author wjs
* @version 创建时间:2025-2-14 14:38:41 * @version 创建时间:2025-2-14 14:38:41
* 类说明 https://blog.csdn.net/qq_37284798/article/details/130005988 * 类说明 https://blog.csdn.net/qq_37284798/article/details/130005988
*/ */
@Log4j2
public class SearcherUtil { public class SearcherUtil {
private static final Logger logger = LoggerFactory.getLogger(SearcherUtil.class);
public static String getCityInfoByFile(String ip) { public static String getCityInfoByFile(String ip) {
// 1、创建 searcher 对象 // 1、创建 searcher 对象
String dbPath = "D:\\gitEagleEye\\eagleEye-service\\src\\main\\resources\\ip2region.xdb"; // String dbPath = "D:\\gitEagleEye\\eagleEye-service\\src\\main\\resources\\ip2region.xdb";
// String dbPath = "/home/opc/ip2region.xdb"; String dbPath = "/home/opc/ip2region.xdb";
Searcher searcher; Searcher searcher;
try { try {
searcher = Searcher.newWithFileOnly(dbPath); searcher = Searcher.newWithFileOnly(dbPath);
} catch (IOException e) { } catch (IOException e) {
log.error("failed to create searcher with `{}`: ", dbPath, e); logger.error("failed to create searcher with `{}`: ", dbPath, e);
return null; return null;
} }
...@@ -32,21 +42,51 @@ public class SearcherUtil { ...@@ -32,21 +42,51 @@ public class SearcherUtil {
long sTime = System.nanoTime(); long sTime = System.nanoTime();
String region = searcher.search(ip); String region = searcher.search(ip);
long cost = TimeUnit.NANOSECONDS.toMicros(System.nanoTime() - sTime); long cost = TimeUnit.NANOSECONDS.toMicros(System.nanoTime() - sTime);
log.info("{region: {}, ioCount: {}, took: {} μs}", region, searcher.getIOCount(), cost); logger.info("{region: {}, ioCount: {}, took: {} μs}", region, searcher.getIOCount(), cost);
return region; return region;
} catch (Exception e) { } catch (Exception e) {
log.info("failed to search({}): ", ip, e); logger.info("failed to search({}): ", ip, e);
}finally { }finally {
try { try {
searcher.close(); searcher.close();
} catch (IOException e) { } catch (IOException e) {
log.info("failed to close({}): ", ip, e); logger.info("failed to close({}): ", ip, e);
} }
} }
return null; return null;
// 3、备注:并发使用,每个线程需要创建一个独立的 searcher 对象单独使用。 // 3、备注:并发使用,每个线程需要创建一个独立的 searcher 对象单独使用。
} }
public static List<String> convertStringToList(String str) {
if (StringUtils.isEmpty(str) || str.trim().isEmpty()) {
return new ArrayList<>(); // 返回空列表
}
// 去除中括号和多余空格
String trimmedStr = str.replaceAll("^\\[|\\]$", "").trim();
// 如果去除中括号后字符串为空,返回空列表
if (trimmedStr.isEmpty()) {
return new ArrayList<>();
}
// 按逗号拆分并去除空格
return Arrays.stream(trimmedStr.split(","))
.map(String::trim)
.collect(Collectors.toList());
}
public static Boolean ipv6(String ip) {
Boolean flag = false;
try {
Inet6Address address = (Inet6Address) InetAddress.getByName(ip);
System.out.println(ip + " is an IPv6 address.");
flag = true;
} catch (UnknownHostException e) {
System.out.println(ip + " is not a valid IPv6 address.");
} catch (ClassCastException e) {
System.out.println(ip + " is not an IPv6 address.");
}
return flag;
}
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
getCityInfoByFile("1.9.241.214"); getCityInfoByFile("1.9.241.214");
} }
......
package com.flink.vo;
import java.io.Serializable;
import lombok.Data;
/**
* @author wjs
* @version 创建时间:2025-4-24 13:06:19
* 类说明
*/
@Data
public class EventIpConvert implements Serializable{
/**
*
*/
private static final long serialVersionUID = 1L;
private String id;
private String ip;
private String areaName;
private String deviceId;
private String cid;
private String phone;
private String nick;
private String createTime;
private String dt;
}
...@@ -6,6 +6,9 @@ kafka.topic=ods_collect_log ...@@ -6,6 +6,9 @@ kafka.topic=ods_collect_log
#kafka消费者组 #kafka消费者组
kafka.group=collectGroup kafka.group=collectGroup
doris.jdbc_url=jdbc:mysql://140.245.112.44:9030/bi?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8&nullCatalogMeansCurrent=true #doris.jdbc_url=jdbc:mysql://10.0.0.105 9030/bi?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8&nullCatalogMeansCurrent=true
#doris.jdbc_url=jdbc:mysql://140.245.112.44:9030/bi?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8&nullCatalogMeansCurrent=true
#doris.fe=140.245.112.44:8030
doris.fe=10.0.0.105:8030
doris.username=root doris.username=root
doris.driver_class_name=com.mysql.cj.jdbc.Driver doris.driver_class_name=com.mysql.cj.jdbc.Driver
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment