当前位置:   article > 正文

Java 开发flink流/批处理程序_java flink

java flink

Java 开发flink 流/批处理程序


一、安装pom依赖,配置打包插件以及入口类

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.lemon</groupId>
    <artifactId>FlinkTutorial</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <flink.version>1.14.4</flink.version>
        <slf4j.version>1.7.36</slf4j.version>
        <scala.version>2.12</scala.libary.version>
    </properties>

    <dependencies>
        <!--引入flink相关依赖-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_${scala.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_${scala.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <!--引入日志管理相关依赖-->
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>${slf4j.version}</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>${slf4j.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-to-slf4j</artifactId>
            <version>2.17.2</version>
        </dependency>
        <!--引入redis相关依赖-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-redis_2.11</artifactId>
            <version>1.1.5</version>
        </dependency>
    </dependencies>

    <!-- 下面包含两个打包插件:maven-assembly-plugin 、maven-shade-plugin (二选一使用)-->
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>3.3.0</version>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                    <!-- 设置jar包的入口类(可选) -->
                    <archive>
                        <manifest>
                            <mainClass>com.lemon.flink.StreamWordCount</mainClass>
                        </manifest>
                    </archive>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <filters>
                                <filter>
                                    <artifact>*:*</artifact>
                                    <excludes>
                                        <!-- zip -d learn_spark.jar META-INF/*.RSA META-INF/*.DSA META-INF/*.SF -->
                                        <exclude>META-INF/*.SF</exclude>
                                        <exclude>META-INF/*.DSA</exclude>
                                        <exclude>META-INF/*.RSA</exclude>
                                    </excludes>
                                </filter>
                            </filters>
                            <transformers>
                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <!-- 设置jar包的入口类(可选) -->
                                    <mainClass>com.lemon.flink.BatchWordCount</mainClass>
                                </transformer>
                            </transformers>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

</project>
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122

二、编写Flink程序

  1. 创建执行环境
/**
 * 获取flink执行环境(两种方式)ExecutionEnvironment 、StreamExecutionEnvironment
 * StreamExecutionEnvironment:默认就是流处理模式,但可以强制指定其他处理模式
 * 在flink中,有界与无界数据流都可以强指定为流式运行环境,但是,如果明知一个数据来源为流式数据,就必须设置环境为AUTOMATIC 或STREAMING,不可以指定     	    为BATCH否则程序会报错!
*/
// 方式一:获取flink的批处理执行环境
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
// 方式二:获取flink的流处理执行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 指定数据处理模式 AUTOMATIC BATCH STREAMING
env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
env.setRuntimeMode(RuntimeExecutionMode.BATCH);
env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  1. 获取数据

    //	批处理
    DataSet<String> linesData = env.readTextFile("src/main/resources/hello.txt");
    
    //	流处理(parameter工具获取参数)
    ParameterTool params = ParameterTool.fromArgs(args);
    String hostname = params.has("h") ? params.get("h") : "localhost";
    int port = params.has("p") ? params.getInt("p") : 9000;
    DataStreamSource<String> linesData = env.socketTextStream(hostname, port);
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
  2. flatMap算子

    //	批处理
    FlatMapOperator<String, Tuple2<String, Long>> streamOperator = linesData.flatMap((String line, Collector<Tuple2<String, Long>> collector) -> {
                String[] words = line.split(" ");
                for (String word : words) {
                    collector.collect(Tuple2.of(word, 1L));
                }
            }).returns(Types.TUPLE(Types.STRING, Types.LONG));
    
    //	流处理
    SingleOutputStreamOperator<Tuple2<String, Long>> streamOperator = linesData.flatMap((String line, Collector<Tuple2<String, Long>> collector) -> {
                String[] words = line.split(" ");
                for (String word : words) {
                    collector.collect(Tuple2.of(word, 1L));
                }
            }).returns(Types.TUPLE(Types.STRING, Types.LONG));
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
  3. 数据分组

    //	批处理
    UnsortedGrouping<Tuple2<String, Long>> group = streamOperator.groupBy(0);
    //	流处理
    KeyedStream<Tuple2<String, Long>, String> keyedStream = streamOperator.keyBy(data -> data.f0);
    
    • 1
    • 2
    • 3
    • 4
  4. 聚合

    //	批处理
    AggregateOperator<Tuple2<String, Long>> sum = group.sum(1);
    //	流处理
    SingleOutputStreamOperator<Tuple2<String, Long>> sum = keyedStream.sum(1);
    
    • 1
    • 2
    • 3
    • 4
  5. 创建自己的RedisSink类,实现 RedisMapper 接口

    public static final class MyRedisSink implements RedisMapper<Tuple2<String, Long>> {
        @Override
        public RedisCommandDescription getCommandDescription() {
            return new RedisCommandDescription(RedisCommand.SET, null);
        }
    
        @Override
        public String getKeyFromData(Tuple2<String, Long> data) {
            return data.f0;
        }
    
        @Override
        public String getValueFromData(Tuple2<String, Long> data) {
            return data.f1.toString();
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
  6. 写入redis

    //实例化Flink和Redis关联类FlinkJedisPoolConfig,设置Redis端口
    FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost("127.0.0.1").setPassword("root").setPort(6379).build();
    //实例化RedisSink,并通过flink的addSink的方式将flink计算的结果插入到redis
    sum.addSink(new RedisSink<Tuple2<String, Long>>(conf, new MyRedisSink()));
    
    • 1
    • 2
    • 3
    • 4
  7. 打印

    //	流、批处理
    sum.print();
    
    • 1
    • 2
  8. 执行

    //	仅限流处理
    //	"stream_word_count" 定义当前工作的job名
    env.execute("stream_word_count");
    
    • 1
    • 2
    • 3

三、完整的java代码

package com.lemon.flink;

import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.redis.RedisSink;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
import org.apache.flink.util.Collector;

/**
 * @description: 执行flink流处理计算,并将结果写入redis
 * @author: LemonCoder
 * @date: 4/12/2022
 */
public class StreamWordCount {

    /**
     * @description: main方法
     * @author: LemonCoder
     * @date: 4/12/2022
     */
    public static void main(String[] args) throws Exception {

        ParameterTool params = ParameterTool.fromArgs(args);
        String hostname = params.has("h") ? params.get("h") : "localhost";
        int port = params.has("p") ? params.getInt("p") : 9000;

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStreamSource<String> linesData = env.socketTextStream(hostname, port);

        SingleOutputStreamOperator<Tuple2<String, Long>> streamOperator = linesData.flatMap((String line, Collector<Tuple2<String, Long>> collector) -> {
            String[] words = line.split(" ");
            for (String word : words) {
                collector.collect(Tuple2.of(word, 1L));
            }
        }).returns(Types.TUPLE(Types.STRING, Types.LONG));

        KeyedStream<Tuple2<String, Long>, String> keyedStream = streamOperator.keyBy(data -> data.f0);

        SingleOutputStreamOperator<Tuple2<String, Long>> sum = keyedStream.sum(1);

        sum.print();

        //实例化Flink和Redis关联类FlinkJedisPoolConfig,设置Redis服务的地址、端口、密码
        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost("127.0.0.1").setPassword("root").setPort(6379).build();
        //实例化RedisSink,并通过flink的addSink的方式将flink计算的结果写入到redis
        sum.addSink(new RedisSink<Tuple2<String, Long>>(conf, new MyRedisSink()));

        env.execute("stream_word_count");
    }


    /**
     * @description: 定义自己的RedisSink类,并实现RedisMapper接口
     * @author: LemonCoder
     * @date: 4/12/2022
     */
    public static final class MyRedisSink implements RedisMapper<Tuple2<String, Long>> {
        @Override
        public RedisCommandDescription getCommandDescription() {
            return new RedisCommandDescription(RedisCommand.SET, null);
        }

        @Override
        public String getKeyFromData(Tuple2<String, Long> data) {
            return data.f0;
        }

        @Override
        public String getValueFromData(Tuple2<String, Long> data) {
            return data.f1.toString();
        }
    }
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82

四、测试

此处用hercules工具模拟socket通信tcp服务端
在这里插入图片描述

在这里插入图片描述

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/小桥流水78/article/detail/972314
推荐阅读
相关标签