SparkSQL基本操作----作业二_并按“id:1,name:ella,age:36”的格式

作者：我家小花儿 | 2024-04-27 21:30:33

踩

并按“id:1,name:ella,age:36”的格式

题目：

编程实现将 RDD 转换为 DataFrame 源文件内容如下（包含 id, name, age）：

1, Ella, 36

2, Bob, 29

3, Jack, 29

请将数据复制保存到 Linux 系统中，命名为 employee.txt，实现从 RDD 转换得

到 DataFrame，并按“ id: 1, name: Ella, age: 36” 的格式打印出 DataFrame 的所有

数据。请写出程序代码。

代码：


import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.IntegerType
 
object homework {
 
  case class Student(id:Int,name:String,age:Int)
  def main(args:Array[String])
  {
 
    val spark=SparkSession.builder().master("local").appName("RDD2Dataset").getOrCreate()
    import spark.implicits._
    dynamicCreate(spark)
  }
 
 
 
  /**
   * 动态转换
   * @param spark
   */
  private def dynamicCreate(spark:SparkSession):Unit={
    val stuRDD=spark.sparkContext.textFile("E:/file/employee.txt")
    import spark.implicits._
    val schemaString="id,name,age"
    val fields=schemaString.split(",").map(fieldName => StructField(fieldName, StringType, nullable = true))
    val schema=StructType(fields)
    val rowRDD=stuRDD.map(_.split(",")).map(parts⇒Row(parts(0),parts(1),parts(2)))
    val stuDf=spark.createDataFrame(rowRDD, schema)
    stuDf.printSchema()
    stuDf.createOrReplaceTempView("people")
//    val nameDf=spark.sql("select name from people where age<20")
//    //nameDf.write.text("result") //将查询结果写入一个文件
//    nameDf.show()
 
    val results = spark.sql("SELECT id,name,age FROM people")
 
    results.map(attributes => "id: " + attributes(0)+","+"name:"+attributes(1)+","+"age:"+attributes(2)).show()
  }
}

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/我家小花儿/article/detail/498786