赞
踩
要保证所需要的数据全部到达数仓。
etl,和数据的计算校验,确保输出的数据准确。
确保输出端口一致,防止输出数据不准。
每天的定时调度。
表名,字段名要进行规范化处理。
确保数仓稳定。
1:建表语句
-- 建表 --drop table if exists dm.dim_dk_vehicle_info_dqc; create table if not exists dim.xxxx ( chk_diff string comment '僵尸表校验' ,duplicate_pk_cnt string comment '重复主键数' ,null_cnt string comment '非空数量' ,false_value_cnt string comment '内容判断' ,active_num string comment '同一车辆激活时间俩天有不同的数量' ,produce_num string comment '同一车辆生产时间俩天有不同的数量' ) comment 'aaaa' partitioned by ( p_dt string comment '分区日期(yyyy-MM-dd)') row format delimited fields terminated by '\t' collection items terminated by '\n' stored as orc ; -- 建表 --drop table if exists ibd_dw_dm.dws_data_monitor_dim_dk_vehicle; create table ibd_dw_dm.dim_dk_vehicle_info_dqc (p_dt varchar(20) comment '时间' ,chk_diff bigint comment '僵尸表校验' ,duplicate_pk_cnt bigint comment '重复主键数' ,null_cnt bigint comment'非空数量' ,false_value_cnt bigint comment '内容判断' ,active_num bigint comment '同一车辆激活时间俩天有不同的数量' ,produce_num bigint comment '同一车辆生产时间俩天有不同的数量' ,PRIMARY KEY (p_dt) ) comment '监控dim层车辆信息表' ;
2。sql
--当天 drop table if exists temp_db.temp_data_monitor_table_info_day_02; create table temp_db.temp_data_monitor_table_info_day_02 as select * from dim.aaaa where p_dt = '${dealDate}' ; -- 前一天 drop table if exists temp_db.temp_data_monitor_table_info_day_02_yesterday; create table temp_db.temp_data_monitor_table_info_day_02_yesterday as select * from dim.aaaa where p_dt = date_sub('${dealDate}', 1) ; -- 主键 with tmp_pk as ( select '${dealDate}' as p_dt , count(1) as duplicate_pk_cnt -- 重复主键数 from ( select vehicle_id , ble_id , manufacturer_id , manufacturer_name ,model_brand ,produce_time , count(1) as cnt from temp_db.temp_data_monitor_table_info_day_02 group by vehicle_id , ble_id , manufacturer_id , manufacturer_name ,model_brand ,produce_time having cnt > 1 ) t ) , -- 关键字段非空 tmp_null as ( select '${dealDate}' as p_dt , count(1) - count(concat(vehicle_id,ble_id,produce_time,manufacturer_id)) as null_cnt -- 非空数量 from temp_db.temp_data_monitor_table_info_day_02 ) , -- 数据量和僵尸表--数据条数是否多于昨天 tmp_cnt as ( select '${dealDate}' as p_dt , count(a.vehicle_id) as last_cnt -- 前一天数据量 , count(b.vehicle_id) as cur_cnt -- 当天数据量 , case when count(case when a.vehicle_id is null then b.vehicle_id end) > 0 or count(case when b.vehicle_id is null then a.vehicle_id end) > 0 then 0 else 1 end as chk_diff -- 僵尸表校验 from temp_db.temp_data_monitor_table_info_day_02_yesterday a -- 前一天 full join temp_db.temp_data_monitor_table_info_day_02 b -- 当天 on a.vehicle_id = b.vehicle_id and a.ble_id = b.ble_id and a.manufacturer_id = b.manufacturer_id and a.manufacturer_name = b.manufacturer_name and a.model_brand = b.model_brand and a.produce_time = b.produce_time ) , -- 字段值--车厂id和name是否匹配 tmp_value as ( select '${dealDate}' as p_dt ,'dim_dk_vehicle_info' as table_name ,sum(case when a.manufacturer_name =name then 0 else 1 end) as false_value_cnt from temp_db.temp_data_monitor_table_info_day_02 as a left join dim.dim_dk_common_manufacturer_info as b on a.manufacturer_id=b.id ) , -- 对激活日期进行校验 tmp_active as ( select '${dealDate}' as p_dt ,count(*) as active_num from temp_db.temp_data_monitor_table_info_day_02_yesterday as a left join temp_db.temp_data_monitor_table_info_day_02 as b on a.vehicle_id=b.vehicle_id where a.first_active_time <> b.first_active_time ) , -- 对生产日期进行校验 tmp_produce as ( select '${dealDate}' as p_dt ,count(*) as produce_num from temp_db.temp_data_monitor_table_info_day_02_yesterday as a left join temp_db.temp_data_monitor_table_info_day_02 as b on a.vehicle_id=b.vehicle_id where a.produce_time <> b.produce_time ) insert overwrite table dim.bbbb partition(p_dt='${dealDate}') select case when a.chk_diff = 0 then 1 else 0 end as chk_diff -- 数据条数是否多于昨天 ,case when b.duplicate_pk_cnt=0 then 1 else 0 end as duplicate_pk_cnt -- 重复主键数 ,case when c.null_cnt-3 = 0 then 1 else 0 end as null_cnt -- 非空数量 ,case when d.false_value_cnt = 0 then 1 else 0 end as false_value_cnt --车厂id和name是否匹配 ,case when e.active_num = 0 then 1 else 0 end as active_num --同一车辆激活时间俩天有不同的数量 ,case when f.produce_num = 0 then 1 else 0 end as produce_num --同一车辆生产时间俩天有不同的数量 from tmp_cnt a left join tmp_pk b on a.p_dt = b.p_dt left join tmp_null c on a.p_dt = c.p_dt left join tmp_value d on a.p_dt = d.p_dt left join tmp_active e on a.p_dt = e.p_dt left join tmp_produce f on a.p_dt = f.p_dt ;
给提需求方做报表展示。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。