赞
踩
<!-- flink包依赖配置-start -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
<!-- <scope>provided</scope> -->
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- flink包依赖配置-end -->
1.2.1代码编写
import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.core.fs.FileSystem.WriteMode; import org.apache.flink.util.Collector; /** * Flink实现离线数据DataSet版本的WordCount经典案例 * * */ public class FlinkWordCount4DataSet { public static void main(String[] args) throws Exception { // 创建Flink的代码执行离线数据流上下文环境变量 ExecutionEnvironment env = ExecutionEnvironment .getExecutionEnvironment(); // 定义从本地文件系统当中文件路径 String filePath = ""; if (args == null || args.length == 0) { filePath = "D:\\temp\\input.txt"; } else { filePath = args[0]; } // 获取输入文件对应的DataSet对象 DataSet<String> inputLineDataSet = env.readTextFile(filePath); // 对数据集进行多个算子处理,按空白符号分词展开,并转换成(word, 1)二元组进行统计 DataSet<Tuple2<String, Integer>> resultSet = inputLineDataSet .flatMap( new FlatMapFunction<String, Tuple2<String, Integer>>()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。