赞
踩
下方有数据可免费下载
原始数据为csv格式,列数比较多
create database if not exists car;
use car; create external table cars( province string, --省份 month int, --月 city string, --市 district string, --区县 year int, --年 model string,--车辆型号 manufacturer string,--制造商 brand string, --品牌 vehicletype string,--车辆类型 ownership string,--所有权 nature string, --使用性质 quantity int,--数量 enginemodel string,--发动机型号 displacement int,--排量 power double, --功率 fuel string,--燃料种类 length1 int,--车长 width1 int,--车宽 height1 int,--车高 length2 int,--厢长 width2 int,--厢宽 height2 int,--厢高 numberofaxles int,--轴数 wheelbase int,--轴距 frontwheelbase int,--前轮距 tirespecification string,--轮胎规格 tirenumber int,--轮胎数 totalquality int,--总质量 completequality int,--整备质量 approvedquality int,--核定载质量 approvedpassenger string,--核定载客 tractionquality int,--准牵引质量 chassisenterprise string,--底盘企业 chassisbrand string,--底盘品牌 chassismodel string,--底盘型号 engineenterprise string,--发动机企业 vehiclename string,--车辆名称 age int,--年龄 gender string --性别 ) row format delimited fields terminated by ',' location '/cars' tblproperties("skip.header.line.count"="1"); --跳过文件行首1行; desc cars;
[hadoop@hadoop000 hive_data]$ hadoop fs -put ./cars.csv /cars
select * from cars limit 10;
select '非营运',sum(if(a.nature='非营运',a.cnt,0)),'营运',sum(if(a.nature!='非营运',a.cnt,0))
from
(select nature,count(*) as cnt
from cars group by nature having nature is not null and nature!='') a;
Total MapReduce CPU Time Spent: 6 seconds 810 msec
OK
非营运 66478 营运 3884
Time taken: 32.365 seconds, Fetched: 1 row(s)
select month,c1.ss/c2.sum
from
(select month,sum(quantity) as ss
from cars where province = '山西省' and year = '2013' group by month ) c1,
(select sum(quantity) as sum
from cars where province = '山西省' and year = '2013') c2;
Total MapReduce CPU Time Spent: 9 seconds 150 msec
OK
1 0.14799181376311077
2 0.05831272561894204
3 0.09306159574770473
4 0.06587362496802251
5 0.0732071288479577
6 0.05547028225462608
7 0.06323015263920867
8 0.06378442909525028
9 0.06948352804070379
10 0.1044882180722549
11 0.10053722179585571
12 0.1045592791563628
Time taken: 53.486 seconds, Fetched: 12 row(s)
select '男性',B.man/(B.man+B.woman),'女性', B.woman/(B.man+B.woman)
from
(select '男性',sum(if(A.gender='男性',A.cnt,0)) as man,'女性',sum(if(A.gender='女性',A.cnt,0)) as woman
from
(select gender,count(*) as cnt
from cars where gender is not null and gender != '' group by gender) A) B;
Total MapReduce CPU Time Spent: 5 seconds 880 msec
OK
男性 0.7010659323952227 女性 0.29893406760477725
Time taken: 34.488 seconds, Fetched: 1 row(s)
select gender,brand,count(*) as cnt
from cars
where gender is not null and gender != '' and age is not null
group by gender,brand
having brand is not null and brand !=' '
order by cnt desc
limit 5;
Total MapReduce CPU Time Spent: 5 seconds 330 msec
OK
男性 五菱 28208
女性 五菱 12004
男性 长安 3679
男性 东风 3214
男性 五菱宏光 2331
Time taken: 33.615 seconds, Fetched: 5 row(s)
select a.cnt,count(*)
from
(select concat(model,ownership,vehicletype) as cnt from cars) a
group by a.cnt;
ZK6726DX3单位大型专用校车 1
ZK6726DXA9单位大型专用校车 4
ZK6729D2单位大型普通客车 2
ZK6729DB单位大型普通客车 16
ZK6731DG1单位大型普通客车 6
ZK6731NG1单位大型普通客车 24
ZK6750D2单位大型普通客车 17
......
select month,vehicletype,count(*) from cars group by vehicletype,month having month is not null and vehicletype is null and vehicletype != '';
select brand,enginemodel,fuel,count(*) from cars group by brand,enginemodel,fuel;
select brand,month,count(*) from cars group by brand,month having brand='五菱';
Total MapReduce CPU Time Spent: 3 seconds 940 msec
OK
五菱 1 5589
五菱 2 2226
五菱 3 3557
五菱 4 2389
五菱 5 3351
五菱 6 2302
五菱 7 2893
五菱 8 2980
五菱 9 3422
五菱 10 5278
五菱 11 4809
五菱 12 4963
Time taken: 16.416 seconds, Fetched: 12 row(s)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。