赞
踩
拉链表制作
从拉链表取出快照
1、建库
DROP DATABASE IF EXISTS b0 CASCADE;
CREATE DATABASE b0;
USE b0;
2、新增和变化表(按天分区)
DROP TABLE IF EXISTS ods_user;
CREATE TABLE ods_user(i STRING,balance INT) PARTITIONED BY (ymd DATE);
3、拉链表
DROP TABLE IF EXISTS dwd_user;
CREATE TABLE dwd_user(i STRING,balance INT,start_date DATE,end_date DATE);
此处为一次性插入,实际场景是每天插入
INSERT INTO ods_user (i,balance,ymd) VALUES
('a1',100,'2020-01-01'),
('a2',0,'2020-01-01'),
('a1',300,'2020-01-02'),
('a3',200,'2020-01-02'),
('a1',400,'2020-01-03'),
('a3',500,'2020-01-03');
SELECT * FROM ods_user;
SET ymd='2020-01-01';
INSERT OVERWRITE TABLE dwd_user
SELECT
i,
balance,
${hiveconf:ymd} start_date,
'9999-12-31' end_date
FROM ods_user WHERE ymd=${hiveconf:ymd};
SELECT * FROM dwd_user;
查看数据
SET ymd=TO_DATE('2020-01-02'); INSERT OVERWRITE TABLE dwd_user SELECT * FROM ( SELECT i, balance, ${hiveconf:ymd} start_date, TO_DATE('9999-12-31') end_date FROM ods_user WHERE ymd=${hiveconf:ymd} UNION ALL SELECT a.i, a.balance , a.start_date, IF(b.i IS NULL,a.end_date,b.ymd)end_date FROM dwd_user a LEFT JOIN( SELECT * FROM ods_user WHERE ymd=${hiveconf:ymd} )b ON CONCAT(a.i,a.end_date)=CONCAT(b.i,'9999-12-31') )c;
-- 全表
SELECT * FROM dwd_user;
-- 最新数据
SELECT * FROM dwd_user WHERE end_date='9999-12-31';
-- 快照2020-01-01
SELECT * FROM dwd_user WHERE start_date<='2020-01-01' AND end_date>'2020-01-01';
查看数据
SET ymd=TO_DATE('2020-01-03'); INSERT OVERWRITE TABLE dwd_user SELECT * FROM ( SELECT i, balance, ${hiveconf:ymd} start_date, TO_DATE('9999-12-31') end_date FROM ods_user WHERE ymd=${hiveconf:ymd} UNION ALL SELECT a.i, a.balance , a.start_date, IF(b.i IS NULL,a.end_date,b.ymd)end_date FROM dwd_user a LEFT JOIN( SELECT * FROM ods_user WHERE ymd=${hiveconf:ymd} )b ON CONCAT(a.i,a.end_date)=CONCAT(b.i,'9999-12-31') )c;
-- 全表
SELECT * FROM dwd_user;
-- 最新数据
SELECT * FROM dwd_user WHERE end_date='9999-12-31';
-- 快照2020-01-02
SELECT * FROM dwd_user WHERE start_date<='2020-01-02' AND end_date>'2020-01-02';
-- 快照2020-01-01
SELECT * FROM dwd_user WHERE start_date<='2020-01-01' AND end_date>'2020-01-01';
查看数据
UNION ALL
时,两个表的数据类型要一致DATE
,用STRING
也行JOIN ON
多字段,也可单字段2021-02-02
最新数据写到9999-12-31
分区,过期数据写到-1
的2021-02-01
分区静态分区的写法
INSERT OVERWRITE TABLE xxx_t1 PARTITION(dt='2021-06-15')
SELECT
xxx
FROM xxx_t0
WHERE dt='2021-06-15';
INSERT OVERWRITE TABLE xxx_t1 PARTITION(dt='2021-06-16')
SELECT
xxx
FROM xxx_t0
WHERE dt='2021-06-16';
动态分区写法(被SELECT
的dt
要在最后一行)
INSERT OVERWRITE TABLE xxx_t1 PARTITION(dt)
SELECT
xxx,
dt
FROM xxx_t0
WHERE dt='2021-06-15' OR dt='2021-06-16';
使用动态分区,需要提前设置参数
SET hive.exec.dynamici.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict;
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。