当前位置:   article > 正文

hive实战开发-2(建表、写数、加载数据到hdfs、动态分区、列转行)_hive指定hdfs文件目录创建表,并加载分区数据

hive指定hdfs文件目录创建表,并加载分区数据

1、建表

--定义新表
create EXTERNAL table if not exists hdw_psi${g_he_dbpostfix}.psi_nginx_access_log_cnt(
statistics_dt string comment'统计日期'
,data_dt string comment'数据日期'
,d_cnt bigint comment'日调用次数'
,m_cnt bigint comment'月调用次数'
)
partitioned by (pdate string comment'分区')
stored as orc--表结构采用orc
tblproperties('creater'='22033799');
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10

2、写数

insert into hdw_psi_dev.test1(mac_id, start_time, end_time, wind_speed) 
select '001', '07:20', '07:49', '2,3' union all
select '001', '11:25', '12:30', '1,2,3' union all
select '002', '10:35', '11:13', '2' union all
select '002', '13:30', '14:15', '3,4' union all
select '002', '20:45', '21:00', '1,2,3';
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6

3、加载数据到hdfs

--将本地数据加载到hdfs
load data inpath '/user/udhuangj/spark/test_data_less.csv' into table hdw_psi_dev.opn_learn_day partition(pdate='20230314');
  • 1
  • 2

4、动态分区

create table hdw_psi_dev.opn_learn_day(
    statistics_dt              string COMMENT '使用日期 yyyy-MM-dd',
    mac_id                   string COMMENT 'macid',
    wifi_type                string COMMENT 'WiFitypeId',
    warmup_starttime         string COMMENT '预热开始时间 HH:mm',
    warmup_endtime           string COMMENT '预热结束时间 HH:mm',
    target_temperature       string COMMENT '目标温度'
) comment '用水明细表测试'
PARTITIONED BY (pdate string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\,' STORED AS TEXTFILE;

create table hdw_psi_dev.opn_learn_day_test(
    statistics_dt              string COMMENT '使用日期 yyyy-MM-dd',
    mac_id                   string COMMENT 'macid',
    wifi_type                string COMMENT 'WiFitypeId',
    warmup_starttime         string COMMENT '预热开始时间 HH:mm',
    warmup_endtime           string COMMENT '预热结束时间 HH:mm',
    target_temperature       string COMMENT '目标温度'
) comment '用水明细表测试'
PARTITIONED BY (wifi string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\,' STORED AS TEXTFILE;

--按照挑选出来的变量传递给分区
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict;
insert into hdw_psi_dev.opn_learn_day_test partition(wifi) 
select 
    statistics_dt,
    mac_id,
    wifi_type,
    warmup_starttime,
    warmup_endtime,
    target_temperature,
    wifi_type
from hdw_psi_dev.opn_learn_day
where pdate='20230313';
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36

5、利用group by + collect_list命令按照mac聚合数据

小批量的话不需要concat_ws,若数据量大需要配合加入concat_ws

--小批量
insert overwrite table hdw_psi_dev.opn_learn_day partition(pdate='20230330')
select 
    '20230330' as statistics_dt,
    t1.mac_id,
    t1.wifi_type,
    t1.warmup_starttime,
    t1.warmup_endtime,
    t1.target_temperature
from (
    select 
        mac_id,
        collect_list(distinct wifi_type) as wifi_type,
        collect_list(warmup_starttime) as warmup_starttime,
        collect_list(warmup_endtime) as warmup_endtime,
        collect_list(target_temperature) as target_temperature
    from hdw_psi_dev.opn_learn_day
    where pdate = '20230313'
    group by mac_id
    ) as t1;

--大批量
insert overwrite table hdw_psi_dev.opn_learn_day partition(pdate='20230330')
select 
    '20230330' as statistics_dt,
    t1.mac_id,
    t1.wifi_type,
    t1.warmup_starttime,
    t1.warmup_endtime,
    t1.target_temperature
from (
    select 
        mac_id,
        concat_ws('-', collect_list(distinct wifi_type)) as wifi_type,
        concat_ws('-', collect_list(warmup_starttime)) as warmup_starttime,
        concat_ws('-', collect_list(warmup_endtime)) as warmup_endtime,
        concat_ws('-', collect_list(target_temperature)) as target_temperature
    from hdw_psi_dev.opn_learn_day
    where pdate = '20230313'
    group by mac_id
    ) as t1;
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Cpp五条/article/detail/504857
推荐阅读
相关标签
  

闽ICP备14008679号