当前位置:   article > 正文

spark scala读取http数据信息_spark http datasource

spark http datasource

读取文件的多种方式:

1、spark直接读取http数据,直接生成DataSet

2、InputStreamReader

3、Source.fromInputStream

  1. package com.ku.test
  2. import java.io.{BufferedReader, InputStreamReader}
  3. import java.net.{URL, URLConnection}
  4. import org.apache.http.client.methods.HttpGet
  5. import org.apache.http.impl.client.DefaultHttpClient
  6. import scala.io.Source
  7. import org.apache.spark.SparkFiles
  8. import org.apache.spark.sql.SparkSession
  9. object TokenTest {
  10. def main(args: Array[String]): Unit = {
  11. //方法1
  12. val spark = SparkSession.builder()
  13. .appName("tokenTest")
  14. .master("local")
  15. .getOrCreate()
  16. spark.sparkContext.setLogLevel("WARN")
  17. val fileName = "test.txt"
  18. spark.sparkContext.addFile("http://127.0.0.1/test/" + fileName)
  19. spark.read
  20. .textFile("file:///" + SparkFiles.get(fileName))
  21. .filter(s => !s.isEmpty)
  22. .show(truncate = true, numRows = 20000)
  23. //方法2
  24. val url2: URL = new URL("http://127.0.0.1/test/test.txt")
  25. val urlConn: URLConnection = url2.openConnection()
  26. val in = new BufferedReader(new InputStreamReader(urlConn.getInputStream))
  27. var line: String = ""
  28. while (!"".equals(line = in.readLine())) {
  29. println(line)
  30. }
  31. //方法3
  32. val url3 = "http://127.0.0.1/test/test.txt"
  33. val httpclient = new DefaultHttpClient()
  34. try {
  35. val response = httpclient.execute(new HttpGet(url3))
  36. val entity = response.getEntity
  37. val file = Source.fromInputStream(entity.getContent) //.getLines().mkString //.foreach(row => println(row))
  38. for (line <- file.getLines) {
  39. println(line)
  40. }
  41. }
  42. }
  43. }

先记录下,后面作详细更新

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/你好赵伟/article/detail/579718
推荐阅读
相关标签
  

闽ICP备14008679号