赞
踩
读取文件的多种方式:
1、spark直接读取http数据,直接生成DataSet
2、InputStreamReader
3、Source.fromInputStream
- package com.ku.test
-
- import java.io.{BufferedReader, InputStreamReader}
- import java.net.{URL, URLConnection}
-
- import org.apache.http.client.methods.HttpGet
- import org.apache.http.impl.client.DefaultHttpClient
-
- import scala.io.Source
-
- import org.apache.spark.SparkFiles
- import org.apache.spark.sql.SparkSession
-
-
- object TokenTest {
-
- def main(args: Array[String]): Unit = {
-
- //方法1
- val spark = SparkSession.builder()
- .appName("tokenTest")
- .master("local")
- .getOrCreate()
- spark.sparkContext.setLogLevel("WARN")
-
- val fileName = "test.txt"
- spark.sparkContext.addFile("http://127.0.0.1/test/" + fileName)
- spark.read
- .textFile("file:///" + SparkFiles.get(fileName))
- .filter(s => !s.isEmpty)
- .show(truncate = true, numRows = 20000)
-
- //方法2
- val url2: URL = new URL("http://127.0.0.1/test/test.txt")
- val urlConn: URLConnection = url2.openConnection()
- val in = new BufferedReader(new InputStreamReader(urlConn.getInputStream))
- var line: String = ""
- while (!"".equals(line = in.readLine())) {
- println(line)
- }
-
-
-
- //方法3
- val url3 = "http://127.0.0.1/test/test.txt"
- val httpclient = new DefaultHttpClient()
- try {
- val response = httpclient.execute(new HttpGet(url3))
- val entity = response.getEntity
- val file = Source.fromInputStream(entity.getContent) //.getLines().mkString //.foreach(row => println(row))
- for (line <- file.getLines) {
- println(line)
- }
- }
- }
- }
先记录下,后面作详细更新
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。