赞
踩
目录
参考内容https://www.bilibili.com/read/cv15490959/
数据文件、jar包、插件
https://pan.baidu.com/s/1Mpquo0EgkyZtLHrCPIK2Qg?pwd=7w0k
在FineBI6.0\webapps\webroot\WEB-INF\lib下放置jar包
启动FineBI服务器
安装hive隔离插件
选择该文件
重启服务器
在虚拟机后台启动metastore和hiveserver2服务(在hive目录下)
进入beeline客户端
- --hive2://后可以是主机名--
- !connect jdbc:hive2://192.168.224.112:10000
回车然后输入用户名,我的是root,再回车
密码根据自己的填(我没有),回车
如果不成功,就先配置虚拟机中/hadoop父文件夹/hadoop/etc/hadoop/core-site.xml文件
和/hive父文件夹/hive/conf/hive-site.xml文件
然后重启sh,后台挂起metastore,hiveserver2,启动beeline。
在Fine BI上新建hive数据库连接
数据库名称为自己在hive中创建的数据库,主机为虚拟机IP,端口10000,用户名root
创建dgy_30w表(myhive为我自己的数据库),操作在hive和beeline中都可以
- create table myhive.dgy_30w (
- msg_time string comment "消息发送时间",
- sender_name string comment "发送人昵称",
- sender_account string comment "发送人账号",
- sender_sex string comment "发送人性别",
- sender_ip string comment "发送人ip地址",
- sender_os string comment "发送人操作系统",
- sender_phonetype string comment "发送人手机型号",
- sender_network string comment "发送人网络类型",
- sender_gps string comment "发送人的GPS定位",
- receiver_name string comment "接收人呢称",
- receiver_ip string comment "接收人IP",
- receiver_account string comment "接收人账号",
- receiver_os string comment "接收人操作系统",
- receiver_phonetype string comment"接收人手机型号",
- receiver_network string comment "接收人网络类型",
- receiver_gps string comment"接收人的GPS定位",
- receiver_sex string comment"接收人性别",
- msg_type string comment"消息类型",
- distance string comment"双方距离",
- message string comment"消息内容"
- );
上传数据
方法一:
通过Xshell的Xftp把csv文件上传到虚拟机opt目录下
把csv文件数据上传到dgy_30w表中
LOAD DATA LOCAL INPATH '/opt/chat_data-30W.csv' OVERWRITE INTO TABLE dgy_30w;
方法二:
HDFS数据加载
将csv文件上传到hdfs /data下
hdfs dfs -put /opt/chat_data-30W.csv /data
在终端beeline中输入load data inpath '/data/chat_data-30W.csv' into table dgy_30w;
LOAD DATA INPATH '/data/chat_data-30W.csv' OVERWRITE INTO TABLE dgy_30w;
导入成功。
建立dgy_30w_etl表
- create table myhive.dgy_30w_etl (
- msg_time string comment "消息发送时间",
- sender_name string comment "发送人昵称",
- sender_account string comment "发送人账号",
- sender_sex string comment "发送人性别",
- sender_ip string comment "发送人ip地址",
- sender_os string comment "发送人操作系统",
- sender_phonetype string comment "发送人手机型号",
- sender_network string comment "发送人网络类型",
- sender_gps string comment "发送人的GPS定位",
- receiver_name string comment "接收人呢称",
- receiver_ip string comment "接收人IP",
- receiver_account string comment "接收人账号",
- receiver_os string comment "接收人操作系统",
- receiver_phonetype string comment"接收人手机型号",
- receiver_network string comment "接收人网络类型",
- receiver_gps string comment"接收人的GPS定位",
- receiver_sex string comment"接收人性别",
- msg_type string comment"消息类型",
- distance string comment"双方距离",
- message string comment"消息内容",
- msg_day string comment"消息日期(日)",
- msg_hour string comment"消息时间(小时)",
- sender_lng double comment"经度",
- sender_lat double comment"纬度"
- );
开始清洗
- INSERT OVERWRITE TABLE myhive.dgy_30w_etl
- SELECT *,
- to_date(msg_time) As msg_day,
- HOUR(msg_time) As msg_hour,
- SPLIT(sender_gps,',')[0] As sender_lng,
- SPLIT(sender_gps,',')[1] As sender_lat
- FROM myhive. dgy_30w
- WHERE LENGTH(sender_gps)>0;
运行成功,查询
统计今日消息总量
- CREATE TABLE IF NOT EXISTS myhive.tb_rs_total_msg_cnt
- COMMENT"每日消息总量" AS
- SELECT msg_day,COUNT(*) AS total_msg_cnt
- FROM myhive.dgy_30w_etl
- GROUP BY msg_day;
统计每小时消息量、发送和接收用户数
- CREATE TABLE IF NOT EXISTS myhive.tb_rs_hour_msg_cnt
- COMMENT"每小时消息量趋势" AS
- SELECT msg_hour,
- COUNT(*)AS total_msg_cnt,
- COUNT(DISTINCT sender_account)AS sender_user_cnt,
- COUNT(DISTINCT receiver_account)AS receiver_user_cnt
- FROM myhive.dgy_30w_etl GROUP BY msg_hour;
统计今日各地区发送消息总量
- CREATE TABLE IF NOT EXISTS myhive.tb_rs_loc_cnt
- COMMENT"今日各地区发送消息总量"AS
- SELECT
- msg_day,sender_lng,sender_lat,sender_gps,
- COUNT(*)AS total_msg_cnt FROM myhive.dgy_30w_etl
- GROUP BY msg_day,sender_lng,sender_lat,sender_gps;
统计今日发送和接收用户人数
- CREATE TABLE IF NOT EXISTS myhive.tb_rs_user_cnt
- COMMENT"今日发送消息人数、接收消息人数"AS
- SELECT msg_day,
- COUNT(DISTINCT sender_account)AS sender_user_cnt,
- COUNT(DISTINCT receiver_account)AS receiver_user_cnt
- FROM myhive.dgy_30w_etl
- GROUP BY msg_day;
统计发送消息条数最多的Top10用户
- CREATE TABLE IF NOT EXISTS myhive.tb_rs_s_user_top10
- COMMENT"发送消息条数最多的Top10用户"AS
- SELECT sender_name AS username,
- COUNT(*)AS sender_msg_cnt
- FROM myhive.dgy_30w_etl
- GROUP BY sender_name
- ORDER BY sender_msg_cnt DESC
- LIMIT 10;
统计接收消息条数最多的Top10用户
- CREATE TABLE IF NOT EXISTS myhive.tb_rs_r_user_top10
- COMMENT"接收消息条数最多的Top10用户" AS
- SELECT receiver_name AS username,
- COUNT(*)AS receiver_msg_cnt
- FROM myhive.dgy_30w_etl
- GROUP BY receiver_name
- ORDER BY receiver_msg_cnt DESC
- LIMIT 10;
统计发送人的手机型号分布情况
- CREATE TABLE IF NOT EXISTS myhive.tb_rs_sender_phone
- COMMENT"发送人的手机型号分布"AS
- SELECT sender_phonetype,
- COUNT(sender_account)AS cnt
- FROM myhive.dgy_30w_etl
- GROUP BY sender_phonetype;
统计发送人的手机操作系统分布
- CREATE TABLE IF NOT EXISTS myhive.tb_rs_sender_os
- COMMENT"发送人的手机操作系统分布"AS
- SELECT sender_os,
- COUNT(sender_account)AS cnt
- FROM myhive.dgy_30w_etl
- GROUP BY sender_os;
进入myhive数据库,查看创建的十个表
- use myhive;
-
- show tables;
启动服务器
进入FineBI
新建数据集,把数据库表导入FinBI中
更新数据
新建分析主题
选择数据表
底栏选择组件,对相应表选择合适的图表,添加仪表板
在组件中给每个表选择合适的图例,适当调整样式
最终展示
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。