- package com.sql
- import org.apache.spark.sql.SparkSession
- import org.apache.spark.sql.types._
- import org.junit.Test
- import java.util.Properties
- class Spark58_FixBug2024 {
- val spark = SparkSession.builder()
- .master("local[6]")
- .appName("租房数据实时计算V1.0")
- .getOrCreate()
- //课程数仓CSV 模式
- val ods_house_Schema = StructType(
- List(
- StructField("house_title", StringType),
- StructField("house_pay", StringType),
- StructField("house_pay_way", StringType),
- StructField("rent_way",StringType),
- StructField("house_type", StringType),
- StructField("house_area", StringType),
- StructField("house_decora",StringType),
- StructField("toward", StringType),
- StructField("floor", StringType),
- StructField("floor_height", StringType),
- StructField("house_estate", StringType),
- StructField("area", StringType),
- StructField("address", StringType),
- StructField("pic", StringType),
- StructField("time", StringType),
- StructField("agent_name", StringType),
- StructField("house_disposal", StringType),
- StructField("house_spot", StringType),
- StructField("house_desc", StringType),
- StructField("url", StringType),
- StructField("sentiments", FloatType)
- )
- )
- val ods_house_Df = spark.read.option("header", "false").schema(ods_house_Schema).csv("hdfs://bigdata:9000/beike/house/beike.csv")
- @Test
- def init(): Unit = {
- //school_province_score_Df.show()
- //ods_courses_Df.show()
- ods_house_Df.show()
- //school_special_score_Df.show()
- //school_Df.show()
- //ruanke_rank_Df.show()
- //qs_world_Df.show()
- }
- // ----剩余使用spark_sql完成
- // ---指标8:租金支付方式占比
- @Test
- def tables08(): Unit = {
- ods_house_Df.createOrReplaceTempView("ods_house")
- val df2 = spark.sql(
- """
- select house_pay_way,count(1) num
- from ods_house
- group by house_pay_way
- order by num desc
- limit 10
- """)
- df2
- // .show(50)
- .coalesce(1)
- .write
- .mode("overwrite")
- .option("driver", "com.mysql.cj.jdbc.Driver")
- .option("user", "root")
- .option("password", "123456")
- .jdbc(
- "jdbc:mysql://bigdata:3306/beike_hive?useSSL=false",
- "table08",
- new Properties()
- )
- }
- // ---指标9:地址词云
- @Test
- def tables09(): Unit = {
- ods_house_Df.createOrReplaceTempView("ods_house")
- val df2 = spark.sql(
- """
- select address,count(1) num
- from ods_house
- group by address
- order by num desc
- limit 10
- """)
- df2
- // .show(50)
- .coalesce(1)
- .write
- .mode("overwrite")
- .option("driver", "com.mysql.cj.jdbc.Driver")
- .option("user", "root")
- .option("password", "123456")
- .jdbc(
- "jdbc:mysql://bigdata:3306/beike_hive?useSSL=false",
- "table09",
- new Properties()
- )
- }
- }

三、进度安排、应完成的工作量: 1.2023年11月16日-2024年2月5日 完成开题报告、采集广州租房数据 2.2024年2月5日-3月31日 使用与集成 协同过滤推荐算法 3.2024年4月1日-4月15日 开发数据可视化分析大屏 4.2024年4月16日-4月30日 业务代码编写,测试程序 5.2024年5月1日-5月20日 编写论文,进行毕业设计答辩 | |
四、主要参考文献 [1]杨俊锋.重庆市住宅价格预测模型研究[D].云南财经大学,2023. [2]朱慧明,刘智伟.时间序列向量自回归模型的贝叶斯推断理论[J.统计与决策,2022(01):11-12. [3]Guo J.Housing Price Forecasting based on Stochastic Time Series Model[J]. InternationalJournal of Business Management and Economic Research,2022,3(2). [4Jane P,Browna,Haivan Song & Alan Me Gillivray.Forecasting UK House Prices:A TimeVarying Coefficient Approach[J].Econoinic Modelling,2022(04). [5]N Nghiep,C Al.Predicting housing value:A comparison of multiple regression analysis andartificial neural networks[J].Journal of Real Estate Research,2021. [6]Hasa Selim.Determinants of House Prices in Turkey: Hedonic Regression Versus ArtificialNeural Network[J].Dogus University Journal,2021(01). [7]杨沐晞.基于随机森林模型的二手房价格评估研究[D].中南大学,2023. [8]Gu J, Zhu M, Jiang L. Housing Price Forecasting Based on Genetic Algorithm and Supportvector machine[J]. Expert Systems with Applications, 2011,38(4): 3383-3386. [9]EA Antipov,EB Pokryshevskaya.Mass appraisal of residential apartments:An application ofRandom forest for valuation and a CART-based approach for model diagnostics[J].ExpertSystems With Applications,2022. [10]Sarip A G,Hafez M B,Daud M N.Application of Fuzzy Regression Model for Real EstatePrice Prediction.Malaysian Journal of Computer Science[J].Malaysian Journal of ComputerScience,2022,29(1): 15-27. [11]邱启荣,于婷.基于主成分分析的BP神经网络对房价的预测研究[J].湖南文理学院学报(自然科学版),2022,23(03):24-26+36. [12]温海珍,贾生华.住宅的特征与特征的价格——基于特征价格模型的分析[J].浙江大学学报(工学版),2022(10):101-105+112. [13]邵飞波,张鑫.基于Hedonic模型的上海住宅价格影响因素分析[J].经济论坛,2020(23):9-13. [14]王卓琳,秦伟伟.特征价格模型研究综述[J].经济论坛,2020(12):130-131. [15]高玉明,张仁津.基于遗传算法和 BP神经网络的房价预测分析[J.计算机工程,2020,40(4):187-191. [16]陈世鹏,金升平.基于随机森林模型的房价预测[J].科技创新与应用,2022(04):52.[23]陶顾宇.美国埃姆斯市房价预测回归分析[J].通讯世界,2022(09):302-304. [17]Andy Liaw and Matthew Wiener:Classification and Regression by RandomForest.R News2022,12(2/3):18-22. [18]刘艳丽.随机森林综述[D].南开大学,2022. [19]励嘉豪,曾丹.房价预测机器学习之集成学习][J].电子技术与软件工程,2022(20):90-93. [20]孙宪华,张臣曦.房屋质量及其对房地产价格指数的影响[J].统计与信息论坛,2022(9):43-47. | |
