赞
踩
- CREATE TABLE IF NOT EXISTS test_dim_douyin_users
- (
- user_id INT COMMENT '用户ID',
- username STRING COMMENT '用户名',
- fans_cnt INT COMMENT '粉丝数',
- follow_cnt INT COMMENT '关注数',
- video_cnt INT COMMENT '视频数量',
- create_time STRING COMMENT '创建时间',
- update_time STRING COMMENT '修改时间',
- start_date STRING COMMENT '用户表同步日期和失效日期',
- end_date STRING COMMENT '最新用户表日期和更新日期'
- )
- COMMENT '抖音用户表'
- PARTITIONED BY (part_date STRING)
- STORED AS ORC;
-
- INSERT OVERWRITE TABLE test_dim_douyin_users PARTITION (part_date = '9999-12-31')
- VALUES (1, '大狼狗夫妇', 12, 5, 1, '2023-09-25 10:00:00', '2023-09-26 11:00:00', '2023-09-25', '9999-12-31');
-
-
- --创建增量表
- CREATE TABLE IF NOT EXISTS test_dim_douyin_users_incr
- (
- user_id INT COMMENT '用户ID',
- username STRING COMMENT '用户名',
- fans_cnt INT COMMENT '粉丝数',
- follow_cnt INT COMMENT '关注数',
- video_cnt INT COMMENT '视频数量',
- create_time STRING COMMENT '创建时间',
- update_time STRING COMMENT '修改时间'
- )
- COMMENT '抖音用户表-每日增量表'
- STORED AS ORC;
-
- INSERT OVERWRITE TABLE test_dim_douyin_users_incr
- VALUES (1, '大狼狗夫妇', 300, 5, 11, '2023-09-25 10:00:00', '2023-09-27 15:36:00');
- --可以将'2023-09-28'替换为${part_date},然后使用动态分区
- --SET hive.exec.dynamic.partition.mode=nonstrict;
-
-
- WITH temp1 AS (
- SELECT
- user_id,
- username,
- fans_cnt,
- follow_cnt,
- video_cnt,
- create_time,
- update_time,
- start_date,
- end_date
- FROM
- test_dim_douyin_users
- WHERE
- part_date = '9999-12-31'
-
- UNION ALL
- SELECT
- user_id,
- username,
- fans_cnt,
- follow_cnt,
- video_cnt,
- create_time,
- update_time,
- '2023-09-27' start_date,
- '9999-12-31' end_date
- FROM
- (
- SELECT
- user_id,
- username,
- fans_cnt,
- follow_cnt,
- video_cnt,
- create_time,
- update_time,
- row_number() over (partition by user_id order by update_time desc) as rn
- FROM
- test_dim_douyin_users_incr
- WHERE
- date_format(create_time, 'yyyy-MM-dd') = '2023-09-27'
- OR date_format(update_time, 'yyyy-MM-dd') = '2023-09-27'
- ) t1
- WHERE
- rn = 1
- ), temp2 AS (
- SELECT
- user_id,
- username,
- fans_cnt,
- follow_cnt,
- video_cnt,
- create_time,
- update_time,
- start_date,
- end_date,
- row_number() over (partition by user_id order by update_time desc) rn
- FROM
- temp1
- )
-
-
- INSERT OVERWRITE TABLE test_dim_douyin_users PARTITION (part_date)
- SELECT
- user_id,
- username,
- fans_cnt,
- follow_cnt,
- video_cnt,
- create_time,
- update_time,
- start_date,
- if(rn = 1, '9999-12-31', date_sub('2023-09-27', 1)) end_date,
- if(rn = 1, '9999-12-31', date_sub('2023-09-27', 1)) part_date
- FROM
- temp2;
-
- INSERT OVERWRITE TABLE test_dim_douyin_users_incr
- VALUES (1, '大狼狗夫妇', 1000, 5, 14, '2023-09-25 10:00:00', '2023-09-28 15:37:00');
-
-
-
-
- WITH temp1 AS (
- SELECT
- user_id,
- username,
- fans_cnt,
- follow_cnt,
- video_cnt,
- create_time,
- update_time,
- start_date,
- end_date
- FROM
- test_dim_douyin_users
- WHERE
- part_date = '9999-12-31'
-
- UNION ALL
- SELECT
- user_id,
- username,
- fans_cnt,
- follow_cnt,
- video_cnt,
- create_time,
- update_time,
- '2023-09-28' start_date,
- '9999-12-31' end_date
- FROM
- (
- SELECT
- user_id,
- username,
- fans_cnt,
- follow_cnt,
- video_cnt,
- create_time,
- update_time,
- row_number() over (partition by user_id order by update_time desc) as rn
- FROM
- test_dim_douyin_users_incr
- WHERE
- date_format(create_time, 'yyyy-MM-dd') = '2023-09-28'
- OR date_format(update_time, 'yyyy-MM-dd') = '2023-09-28'
- ) t1
- WHERE
- rn = 1
- ), temp2 AS (
- SELECT
- user_id,
- username,
- fans_cnt,
- follow_cnt,
- video_cnt,
- create_time,
- update_time,
- start_date,
- end_date,
- row_number() over (partition by user_id order by update_time desc) rn
- FROM
- temp1
- )
-
-
- INSERT OVERWRITE TABLE test_dim_douyin_users PARTITION (part_date)
- SELECT
- user_id,
- username,
- fans_cnt,
- follow_cnt,
- video_cnt,
- create_time,
- update_time,
- start_date,
- if(rn = 1, '9999-12-31', date_sub('2023-09-28', 1)) end_date,
- if(rn = 1, '9999-12-31', date_sub('2023-09-28', 1)) part_date
- FROM
- temp2;
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。