赞
踩
如果是spark1.6.0请添加maven:
- <dependency>
- <groupId>com.databricks</groupId>
- <artifactId>spark-csv_2.10</artifactId>
- <version>1.4.0</version>
- <scope>compile</scope>
- </dependency>
如果是spark2.0+就不用添加maven了,因为spark2.0内部集成了读写csv文件。
- package com.egridcloud.spark
- import org.apache.spark.sql.{DataFrame, SQLContext}
- import org.apache.spark.{SparkConf, SparkContext}
- /**
- * Created by LHX on 2018/3/20 13:26.
- */
- object SparkReadFile {
- def main(args: Array[String]): Unit = {
- val localpath="D:\\input\\word.csv"
- val outpath="D:\\output\\word2"
- val conf = new SparkConf()
- conf.setAppName("SparkReadFile")
- conf.setMaster("local")
- val sparkContext = new SparkContext(conf)
- val sqlContext = new SQLContext(sparkContext)
- //读csv文件
- val data: DataFrame = sqlContext.read.format("com.databricks.spark.csv")
- .option("header", "false") //在csv第一行有属性"true",没有就是"false"
- .option("inferSchema", true.toString) //这是自动推断属性列的数据类型
- .load(localpath)
- // data.show()
- // 写csv文件
- data.repartition(1).write.format("com.databricks.spark.csv")
- .option("header", "false")//在csv第一行有属性"true",没有就是"false"
- .option("delimiter",",")//默认以","分割
- .save(outpath)
- sparkContext.stop()
- }
- }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。