赞
踩
# 命令行
start-all.sh
nohup hive --service metastore &
import org.apache.spark.sql.SparkSession import org.apache.spark.sql.functions.col import org.apache.spark.sql.functions._ object cleandata { def main(args: Array[String]): Unit = { //创建spark对象 val spark = SparkSession.builder().appName("HiveSupport").master("local[*]") .config("spark.sql.warehouse.dir", "hdfs://127.0.0.1:9000/opt/hive/warehouse") .config("hive.metastore.uris", "thrift://127.0.0.1:9083") .config("dfs.client.use.datanode.hostname", "true") .enableHiveSupport().getOrCreate() //############# Begin ############ //创建hive数据库daobidata spark.sql("create database daobidata") //创建hive数据表 spark.sql("use daobidata") //创建diedata表 spark.sql("create table if not exists diedata(bianh int,com_name string," + "com_addr string,cat string,se_cat string,com_des string,born_data string," + "death_data string,live_days int,financing string,total_money int,death_reason string,"+ "invest_name string,ceo_name string,ceo_des string"+ ")row format delimited fields terminated by ',';") //将本地datadie.csv文件导入至hive数据库diedata表中 spark.sql("load data local inpath '/data/workspace/myshixun/data/datadie.csv' into table diedata;") //进入diedata表进行清洗操作,删除为空的数据,根据倒闭原因切分出最主要原因,根据成立时间切分出,企业成立的年份,根据倒闭时间切分出,企业倒闭的年份 val c1 = spark.table("diedata").na.drop("any").distinct() .withColumn("death_reason",split(col("death_reason")," ")(0)) .withColumn("bornyear",split(col("born_data"),"/")(0)) .withColumn("deathyear",split(col("death_data"),"/")(0)) c1.createOrReplaceTempView("c1") //创建die_data表 spark.sql("create table if not exists die_data(bianh int,com_name string," + "com_addr string,cat string,se_cat string,com_des string,born_data string," + "death_data string,live_days int,financing string,total_money int,death_reason string,"+ "invest_name string,ceo_name string,ceo_des string,bornyear string,deathyear string"+ ")row format delimited fields terminated by ',';") //将清洗完的数据导入至die_data表中 spark.sql("insert overwrite table die_data select * from c1") //############# End ############## spark.stop() } }
import org.apache.spark.sql.{SaveMode, SparkSession} object citydiedata { def main(args: Array[String]): Unit = { val spark = SparkSession.builder().appName("SparkCleanJob") .master("local[*]").getOrCreate() //************* Begin ************** //读取数据,用逗号分隔,第一行不做为数据,做为标题 val df1 = spark.read.option("delimiter", ",").option("header",true).csv("/data/workspace/myshixun/die_data.csv") df1.createOrReplaceTempView("df1") //使用spark SQL语句,根据城市统计企业倒闭top5 val df=spark.sql("select df1.com_addr as com_addr,count(df1.com_addr) as saddr from df1 group by df1.com_addr order by saddr desc limit 5") .repartition(1) .write //连接数据库 .format("jdbc") .option("url", "jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8") .option("driver","com.mysql.jdbc.Driver") //保存至数据库的数据表名 .option("dbtable", "addr") //用户名 .option("user", "root") //连接数据库的密码 .option("password", "123123") //不破坏数据表结构,在后添加 .mode(SaveMode.Append) .save() //************ End *********** spark.stop() } }
import org.apache.spark.sql.{SaveMode, SparkSession} object industrydata { def main(args: Array[String]): Unit = { val spark = SparkSession.builder().appName("SparkCleanJob") .master("local[*]").getOrCreate() //########## Begin ############ //读取数据,用逗号分隔,第一行不做为数据,做为标题 val df1 = spark.read.option("delimiter", ",").option("header",true).csv("/data/workspace/myshixun/die_data.csv") df1.createOrReplaceTempView("df1") //使用spark SQL语句,根据行业统计企业倒闭top10 val df=spark.sql("select df1.cat as industry,count(df1.cat) as catindustry from df1 group by df1.cat order by catindustry desc limit 10 ") .repartition(1) .write //连接数据库 .format("jdbc") .option("url", "jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8") .option("driver","com.mysql.jdbc.Driver") //数据表名 .option("dbtable", "industry") .option("user", "root") .option("password", "123123") //不破坏数据表结构,在后添加 .mode(SaveMode.Append) .save() //############ End ########### spark.stop() } }
import org.apache.spark.sql.{SaveMode, SparkSession} object closedown { def main(args: Array[String]): Unit = { val spark = SparkSession.builder().appName("SparkCleanJob") .master("local[*]").getOrCreate() //############ Begin ########### //读取数据,用逗号分隔,第一行不做为数据,做为标题 val df1 = spark.read.option("delimiter", ",").option("header",true).csv("/data/workspace/myshixun/die_data.csv") df1.createOrReplaceTempView("df1") //使用spark SQL语句,根据倒闭原因字段,找到企业倒闭的主要原因,统计主要原因的个数 val df=spark.sql("select df1.death_reason as death_reason,count(df1.death_reason) as dreason from df1 group by df1.death_reason order by dreason desc") .repartition(1) .write //连接数据库 .format("jdbc") //数据库名 .option("url", "jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8") .option("driver","com.mysql.jdbc.Driver") //数据表名 .option("dbtable", "cldown") .option("user", "root") .option("password", "123123") //不破坏表结构,在后面添加 .mode(SaveMode.Append) .save() //############ End ########### spark.stop() } }
import org.apache.spark.sql.{SaveMode, SparkSession} object comfinanc { def main(args: Array[String]): Unit = { val spark = SparkSession.builder().appName("SparkCleanJob") .master("local[*]").getOrCreate() //############ Begin ########### //读取数据,用逗号分隔,去除表头,第一行不做为数据,做为标题 val df1 = spark.read.option("delimiter", ",").option("header",true).csv("/data/workspace/myshixun/die_data.csv") df1.createOrReplaceTempView("df1") //使用spark SQL语句,根据行业细分领域字段,统计企业倒闭分布情况top20 val df=spark.sql("select df1.se_cat as se_cat,count(df1.se_cat) as countsecat from df1 group by df1.se_cat order by countsecat desc limit 10") .repartition(1) .write //连接数据库 .format("jdbc") .option("url", "jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8") .option("driver","com.mysql.jdbc.Driver") //数据表名 .option("dbtable", "secat") .option("user", "root") .option("password", "123123") //不破坏表结构,在后面添加 .mode(SaveMode.Append) .save() //使用spark SQL语句,统计倒闭企业融资情况 val d1=spark.sql("select df1.financing as financing,count(df1.financing) as countfinanc from df1 group by df1.financing order by countfinanc desc") .repartition(1) .write //连接数据库 .format("jdbc") .option("url", "jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8") .option("driver","com.mysql.jdbc.Driver") //数据表名 .option("dbtable", "financing") .option("user", "root") .option("password", "123123") //不破坏表结构,在后面添加 .mode(SaveMode.Append) .save() //########## End ######### spark.stop() } }
import org.apache.spark.sql.{SaveMode, SparkSession} object yeardata { def main(args: Array[String]): Unit = { val spark = SparkSession.builder().appName("SparkCleanJob") .master("local[*]").getOrCreate() //############ Begin ########### //读取数据,用逗号分隔,第一行不做为数据,做为标题 val df1 = spark.read.option("delimiter", ",").option("header",true).csv("/data/workspace/myshixun/die_data.csv") df1.createOrReplaceTempView("df1") //根据企业成立时间字段,统计每年有多少成立的企业 val d1=spark.sql("select df1.bornyear as bornyear,count(df1.bornyear) as byear from df1 group by df1.bornyear order by bornyear desc limit 10") .repartition(1) .write //连接数据库 .format("jdbc") .option("url", "jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8") .option("driver","com.mysql.jdbc.Driver") //数据表名 .option("dbtable", "bornyear") .option("user", "root") .option("password", "123123") //不破坏表结构,在后面添加 .mode(SaveMode.Append) .save() //根据企业倒闭年份字段,统计企业每个年份倒闭的数量 val d2=spark.sql("select df1.deathyear as deathyear,count(df1.deathyear) as dyear from df1 group by df1.deathyear order by deathyear desc limit 10") .repartition(1) .write //连接数据库 .format("jdbc") //数据库名 .option("url", "jdbc:mysql://127.0.0.1:3306/diedata?useUnicode=true&characterEncoding=utf-8") .option("driver","com.mysql.jdbc.Driver") //数据表名 .option("dbtable", "deathyear") .option("user", "root") .option("password", "123123") //不破坏表结构,在后面添加 .mode(SaveMode.Append) .save() //############# End ############ spark.stop() } }
from app import db
class diedata(db.Model):
__tablename__ = "addr"
#**************** Begin ************#
ID = db.Column(db.Integer, primary_key=True) ##序号 主键
com_addr = db.Column(db.String(255)) ##城市
saddr = db.Column(db.Integer) ##统计企业倒闭数量
#************* End *************#
from flask import render_template from app.views import index from app import db from app.model.models import diedata @index.route("/city") def index1(): selectdata = db.session.query(diedata.com_addr).all() selectdata1 = db.session.query(diedata.saddr).all() list1 =[] list2=[] #********** Begin **********# #获取城市倒闭企业top5的数据 for k in selectdata: data = { "com_addr": k.com_addr, } list1.append(data) for i in selectdata1: list2.append(i[0]) return render_template("test3.html", com_addr=list1, saddr=list2) #*********** End ***********#
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>城市倒闭企业统计情况</title> <script type="text/javascript" src="../static/js/echarts-all-3.js" ></script> </head> <body> <!--准备一个DOM容器--> <div id="main" style="width: 1500px;height: 650px;"></div> <script> var myChart = echarts.init(document.getElementById('main')); //*********** Begin *************** com_addr=[] {% for a in com_addr %} com_addr.push('{{ a.com_addr }}'); {% endfor %} var saddr={{saddr|tojson}}; option = { title:{ text:'城市倒闭企业top5展示图', left:'center' }, legend: { data: ['城市倒闭企业个数'], //这里设置柱状图上面的方块,名称跟series里的name保持一致 align: 'right', //图例显示的位置:靠左,靠右还是居中的设置.不设置则居中 right: 10, }, xAxis: { type: 'category', data: com_addr }, yAxis: { type: 'value', name: '倒闭个数', axisLabel: { formatter: '{value} 个' } }, series: [ { data: saddr, type: 'bar', name: '城市倒闭企业个数', itemStyle: { normal: { color:'blue', lineStyle:{ color:'blue' }, label : {show: true} } } } ] }; myChart.setOption(option); //************ End *************** </script> </body> </html>
from app import db
class diedata(db.Model):
__tablename__ = "industrydata"
#************* Begin ************
ID = db.Column(db.Integer, primary_key=True) ##序号 主键
industry = db.Column(db.String(255)) ##行业名
catindustry = db.Column(db.Integer) ##行业倒闭数
#************* End ************
from flask import render_template from app.views import index from app import db from app.model.models import diedata @index.route("/industry") def index1(): #************* Begin ************ selectdata = db.session.query(diedata.industry).all() selectdata1 = db.session.query(diedata.catindustry).all() list1 =[] list2=[] for k in selectdata: data = { "industry": k.industry, } list1.append(data) for i in selectdata1: list2.append(i[0]) return render_template("test3.html", industry=list1, catindustry=list2) #************* End *************
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>城市倒闭企业统计情况</title> <script type="text/javascript" src="../static/js/echarts-all-3.js" ></script> </head> <body> <!--准备一个DOM容器--> <div id="main" style="width: 1500px;height: 650px;"></div> <script> var myChart = echarts.init(document.getElementById('main')); //************* Begin ************ industry=[] {% for a in industry %} industry.push('{{ a.industry }}'); {% endfor %} var catindustry={{catindustry|tojson}}; option = { title:{ text:'行业企业倒闭top10折线图', left:'center' }, legend: { data: ['行业企业倒闭数'], //这里设置柱状图上面的方块,名称跟series里的name保持一致 align: 'right', //图例显示的位置:靠左,靠右还是居中的设置.不设置则居中 right: 10, }, xAxis: { type: 'category', name: '行业分类', axisLabel: { formatter: '{value}' }, data: industry }, yAxis: { type: 'value', name: '行业企业倒闭数', axisLabel: { formatter: '{value} 个' } }, series: [ { name:'行业企业倒闭数', data: catindustry, type: 'line', smooth: true, label:{ show:true }, itemStyle: { normal: { color:'green', lineStyle:{ color:'green' }, label : {show: true} } } } ] }; myChart.setOption(option); //************* End ************ </script> </body> </html>
from app import db
class diedata(db.Model):
__tablename__ = "closedown"
############ Begin ###########
ID = db.Column(db.Integer, primary_key=True) ##序号 主键
death_reason = db.Column(db.String(255)) ##倒闭原因
dreason = db.Column(db.Integer) ##倒闭原因统计
############ End ###########
from flask import render_template from app.views import index from app import db from app.model.models import diedata @index.route("/deathreason") def index1(): selectdata = db.session.query(diedata.death_reason,diedata.dreason).all() list1 =[] ############# Begin ############ for k in selectdata: data = { "name": k.death_reason, "value":k.dreason } list1.append(data) return render_template("test3.html", datas=list1) ############# End ############
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>企业倒闭的原因</title> <script type="text/javascript" src="../static/js/echarts-all-3.js" ></script> </head> <body> <!--准备一个DOM容器--> <div id="main" style="width: 1500px;height: 650px;"></div> <script> var myChart = echarts.init(document.getElementById('main')); //########### Begin ############# var datas={{datas|tojson}}; option = { title: { text: '企业倒闭原因结果统计图', left: 'center' }, legend: { top: 'bottom', data:datas }, tooltip: { trigger: 'item', formatter: '{b} : {c} ({d}%)' }, toolbox: { show: true }, series: [ { type: 'pie', radius: [50, 250], center: ['50%', '50%'], roseType: 'area', itemStyle: { borderRadius: 8 }, data:datas } ] }; myChart.setOption(option); //########### End ############# </script> </body> </html>
from app import db
class diedata(db.Model):
__tablename__ = "secat"
############## Begin ###########
ID = db.Column(db.Integer, primary_key=True) ##序号 主键
se_cat = db.Column(db.String(255)) ##细分领域
countsecat = db.Column(db.Integer) ##细分领域企业倒闭数
############## End ############
class diedata1(db.Model):
__tablename__ = "financing"
############## Begin ###########
ID = db.Column(db.Integer, primary_key=True) ##序号 主键
financing = db.Column(db.String(255)) ##融资名
countfinanc = db.Column(db.Integer) ##融资个数
############## End ############
from flask import render_template from app.views import index from app import db from app.model.models import diedata from app.model.models import diedata1 @index.route("/fincat") def index1(): selectdata = db.session.query(diedata.se_cat).all() selectdata1 =db.session.query(diedata.countsecat).all() selectdata2=db.session.query(diedata1.financing).all() selectdata3=db.session.query(diedata1.countfinanc).all() list1 =[] list2 = [] list3 = [] list4 = [] ############## Begin ########### for i in selectdata: data = { "se_cat": i.se_cat, } list1.append(data) for j in selectdata1: list2.append(j[0]) for x in selectdata2: data = { "financing": x.financing, } list3.append(data) for y in selectdata3: list4.append(y[0]) return render_template("test3.html", se_cat=list1,countsecat=list2,financing=list3,countfinanc=list4) ############## End ###########
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>企业融资以及细分领域倒闭企业数据</title> <script type="text/javascript" src="../static/js/echarts-all-3.js" ></script> </head> <body> <!--准备一个DOM容器--> <div id="main" style="width: 1500px;height: 650px;"></div> <script> var myChart = echarts.init(document.getElementById('main')); //############## Begin ########### se_cat=[] {% for a in se_cat %} se_cat.push('{{ a.se_cat }}'); {% endfor %} var countsecat={{countsecat|tojson}}; financing=[] {% for b in financing %} financing.push('{{ b.financing }}'); {% endfor %} var countfinanc={{countfinanc|tojson}}; option = { title: [ { left: 'center', text: '细分领域企业倒闭数' }, { top: '55%', left: 'center', text: '企业融资情况' } ], tooltip: { trigger: 'axis' }, legend: { data: ['细分领域', '融资'], left: 10 }, xAxis: [ { data: se_cat }, { data: financing, gridIndex: 1 } ], yAxis: [ {}, { gridIndex: 1 } ], grid: [ { bottom: '60%' }, { top: '60%' } ], series: [ { name:'细分领域', type: 'bar', showSymbol: true, data: countsecat, label:{ show:true }, itemStyle: { normal: { color:'red', lineStyle:{ color:'red' }, label : {show: true} } } }, { name:'融资', type: 'line', showSymbol: true, data: countfinanc, xAxisIndex: 1, yAxisIndex: 1, label:{ show:true }, itemStyle: { normal: { color:'green', lineStyle:{ color:'green' }, label : {show: true} } } } ] }; myChart.setOption(option); //############## End ########### </script> </body> </html>
from app import db
class diedata(db.Model):
__tablename__ = "bornyear"
########### Begin ##########
ID = db.Column(db.Integer, primary_key=True) ##序号 主键
bornyear = db.Column(db.String(255)) ##成立年份
byear = db.Column(db.Integer) ##计数
########### End ##########
class diedata1(db.Model):
__tablename__ = "deathyear"
########### Begin ##########
ID = db.Column(db.Integer, primary_key=True) ##序号 主键
deathyear = db.Column(db.String(255)) ##倒闭年份
dyear = db.Column(db.Integer) ##计数
########### End ##########
from flask import render_template from app.views import index from app import db from app.model.models import diedata from app.model.models import diedata1 @index.route("/ydata") def index1(): ########### Begin ########## selectdata = db.session.query(diedata.bornyear,diedata.byear).all() selectdata1 =db.session.query(diedata1.deathyear,diedata1.dyear).all() list1 =[] list2 = [] list3 = [] list4 = [] for x in selectdata: list1.append(str(x[0])+'年') list2.append(x[1]) for j in selectdata1: list3.append(str(j[0])+'年') list4.append(j[1]) ############ End ############ return render_template("test3.html", bornyear=list1,byear=list2,deathyear=list3,dyear=list4)
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>企业成立年份和倒闭年份</title> <script type="text/javascript" src="../static/js/echarts-all-3.js" ></script> </head> <body> <!--准备一个DOM容器--> <div id="main" style="width: 1500px;height: 650px;"></div> <script> //########### Begin ########### var myChart = echarts.init(document.getElementById('main')); var bornyear={{bornyear|tojson}}; var byear={{byear|tojson}}; var deathyear={{deathyear|tojson}}; var dyear={{dyear|tojson}}; option = { title: [ { left: 'center', text: '企业成立年份柱状图' }, { top: '55%', left: 'center', text: '企业倒闭年份柱状图' } ], tooltip: { trigger: 'axis' }, legend: { data: ['成立年份', '倒闭年份'], left: 10 }, xAxis: [ { data: bornyear }, { data: deathyear, gridIndex: 1 } ], yAxis: [ {}, { gridIndex: 1 } ], grid: [ { bottom: '60%' }, { top: '60%' } ], series: [ { name:'成立年份', type: 'bar', showSymbol: true, data: byear, label:{ show:true }, itemStyle: { normal: { color:'red', lineStyle:{ color:'red' }, label : {show: true} } } }, { name:'倒闭年份', type: 'bar', showSymbol: true, data: dyear, xAxisIndex: 1, yAxisIndex: 1, label:{ show:true }, itemStyle: { normal: { color:'green', lineStyle:{ color:'green' }, label : {show: true} } } } ] }; myChart.setOption(option); //########### End ########### </script> </body> </html>
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。