赞
踩
DWD要处理的表有两张:订单表、订单产品表。
订单表是周期性事实表;为保留订单状态,可以使用拉链表进行处理;
订单产品表普通的事实表,用常规的方法进行处理;
1. 如果有数据清洗、数据转换的业务需求,ODS => DWD
2. 如果没有数据清洗、数据转换的业务需求,保留在ODS,不做任何变化【没有业务处理了】
订单状态:
订单从创建到最终完成,是有时间限制的;业务上也不允许订单在一个月之后,状态仍然在发生变化;
订单表建表:
与维表不同,订单事实表的记录数非常多
订单有生命周期;订单的状态不可能永远处于变化之中(订单的生命周期一般在15天左右)
订单是一个拉链表,而且是分区表
分区的目的:订单一旦终止,不会重复计算
分区的条件:订单创建日期;保证相同的订单在用一个分区
-- 订单事实表(拉链表) create table dwd.dwd_trade_orders( `orderId` int, `orderNo` string, `userId` bigint, `status` tinyint, `productMoney` decimal, `totalMoney` decimal, `payMethod` tinyint, `isPay` tinyint, `areaId` int, `tradeSrc` tinyint, `tradeType` int, `isRefund` tinyint, `dataFlag` tinyint, `createTime` string, `payTime` string, `modifiedTime` string, `start_date` string, `end_date` string ) COMMENT '订单事实拉链表' partitioned by (dt string) STORED AS PARQUET;
订单表数据加载:
#!/bin/bash source /etc/profile if [ -n "$1" ] then do_date=$1 else do_date=`date -d "-1 day" +%F` fi sql=" set hive.exec.dynamic.partition.mode=nonstrict; set hive.exec.dynamic.partition=true; INSERT OVERWRITE TABLE dwd.dwd_trade_orders partition(dt) SELECT orderId, orderNo, userId, status, productMoney, totalMoney, payMethod, isPay, areaId, tradeSrc, tradeType, isRefund, dataFlag, createTime, payTime, modifiedTime, case when modifiedTime is not null then from_unixtime(unix_timestamp(modifiedTime,'yyyy-MM-dd HH:mm:ss'),'yyyy-MM-dd') else from_unixtime(unix_timestamp(createTime,'yyyy-MM-dd HH:mm:ss'), 'yyyy-MM-dd') end as start_date, '9999-12-31' as end_date, from_unixtime(unix_timestamp(createTime, 'yyyy-MM-dd HH:mm:ss'), 'yyyy-MM-dd') as dt FROM ods.ods_trade_orders WHERE dt='$do_date' union all SELECT A.orderId, A.orderNo, A.userId, A.status, A.productMoney, A.totalMoney, A.payMethod, A.isPay, A.areaId, A.tradeSrc, A.tradeType, A.isRefund, A.dataFlag, A.createTime, A.payTime, A.modifiedTime, A.start_date, CASE WHEN B.orderid IS NOT NULL AND A.end_date > '$do_date' THEN date_add('$do_date', -1) ELSE A.end_date END AS end_date, from_unixtime(unix_timestamp(A.createTime, 'yyyy-MM-dd HH:mm:ss'), 'yyyy-MM-dd') as dt FROM (SELECT * FROM dwd.dwd_trade_orders WHERE dt>date_add('$do_date', -15)) A left outer join (SELECT * FROM ods.ods_trade_orders WHERE dt='$do_date') B ON A.orderId = B.orderId; " hive -e "$sql"
dws_trade_orders(订单明细)由以下表轻微聚合而成
dws_trade_orders_w(订单明细宽表)由以下表组成:
-- 订单明细表(轻度汇总事实表)。每笔订单的明细 DROP TABLE IF EXISTS dws.dws_trade_orders; create table if not exists dws.dws_trade_orders( orderid string, -- 订单id cat_3rd_id string, -- 商品三级分类id shopid string, -- 店铺id paymethod tinyint, -- 支付方式 productsnum bigint, -- 商品数量 paymoney double, -- 订单商品明细金额 paytime string -- 订单时间 ) partitioned by (dt string) STORED AS PARQUET; -- 订单明细表宽表 DROP TABLE IF EXISTS dws.dws_trade_orders_w; create table if not exists dws.dws_trade_orders_w( orderid string, -- 订单id cat_3rd_id string, -- 商品三级分类id thirdname string, -- 商品三级分类名称 secondname string, -- 商品二级分类名称 firstname string, -- 商品一级分类名称 shopid string, -- 店铺id shopname string, -- 店铺名 regionname string, -- 店铺所在大区 cityname string, -- 店铺所在城市 paymethod tinyint, -- 支付方式 productsnum bigint, -- 商品数量 paymoney double, -- 订单明细金额 paytime string -- 订单时间 ) partitioned by (dt string) STORED AS PARQUET;
DWS层订单明细表及订单明细宽表加载数据:
#!/bin/bash source /etc/profile if [ -n "$1" ] then do_date=$1 else do_date=`date -d "-1 day" +%F` fi sql=" insert overwrite table dws.dws_trade_orders partition(dt='$do_date') select t1.orderid as orderid, t3.categoryid as cat_3rd_id, t3.shopid as shopid, t1.paymethod as paymethod, t2.productnum as productsnum, t2.productnum*t2.productprice as pay_money, t1.paytime as paytime from (select orderid, paymethod, paytime from dwd.dwd_trade_orders where dt='$do_date') T1 left join (select orderid, productid, productnum, productprice from ods.ods_trade_order_product where dt='$do_date') T2 on t1.orderid = t2.orderid left join (select productid, shopid, categoryid from dim.dim_trade_product_info where start_dt <= '$do_date' and end_dt >= '$do_date' ) T3 on t2.productid=t3.productid; insert overwrite table dws.dws_trade_orders_w partition(dt='$do_date') select t1.orderid, t1.cat_3rd_id, t2.thirdname, t2.secondname, t2.firstname, t1.shopid, t3.shopname, t3.regionname, t3.cityname, t1.paymethod, t1.productsnum, t1.paymoney, t1.paytime from (select orderid, cat_3rd_id, shopid, paymethod, productsnum, paymoney, paytime from dws.dws_trade_orders where dt='$do_date') T1 join (select thirdid, thirdname, secondid, secondname,firstid, firstname from dim.dim_trade_product_cat where dt='$do_date') T2 on T1.cat_3rd_id = T2.thirdid join (select shopid, shopname, regionname, cityname from dim.dim_trade_shops_org where dt='$do_date') T3 on T1.shopid = T3.shopid " hive -e "$sql"
ADS层订单分析表需求:计算当天
ADS层订单分析表
create table if not exists ads.ads_trade_order_analysis(
areatype string, -- 区域范围:区域类型(全国、大区、城市)
regionname string, -- 区域名称
cityname string, -- 城市名称
categorytype string, -- 商品分类类型(一级、二级)
category1 string, -- 商品一级分类名称
category2 string, -- 商品二级分类名称
totalcount bigint, -- 订单数量
total_productnum bigint, -- 商品数量
totalmoney double -- 支付金额
)
partitioned by (dt string)
row format delimited fields terminated by ',';
ADS层订单分析表加载数据
#!/bin/bash source /etc/profile if [ -n "$1" ] then do_date=$1 else do_date=`date -d "-1 day" +%F` fi sql=" with mid_orders as ( select regionname, cityname, firstname category1, secondname category2, count(distinct orderid) as totalcount, sum(productsnum) as total_productnum, sum(paymoney) as totalmoney from dws.dws_trade_orders_w where dt='$do_date' group by regionname, cityname, firstname, secondname ) insert overwrite table ads.ads_trade_order_analysis partition(dt='$do_date') select '全国' as areatype, '' as regionname, '' as cityname, '' as categorytype, '' as category1, '' as category2, sum(totalcount), sum(total_productnum), sum(totalmoney) from mid_orders union all select '全国' as areatype, '' as regionname, '' as cityname, '一级' as categorytype, category1, '' as category2, sum(totalcount), sum(total_productnum), sum(totalmoney) from mid_orders group by category1 union all select '全国' as areatype, '' as regionname, '' as cityname, '二级' as categorytype, '' as category1, category2, sum(totalcount), sum(total_productnum), sum(totalmoney) from mid_orders group by category2 union all select '大区' as areatype, regionname, '' as cityname, '' as categorytype, '' as category1, '' as category2, sum(totalcount), sum(total_productnum), sum(totalmoney) from mid_orders group by regionname union all select '大区' as areatype, regionname, '' as cityname, '一级' as categorytype, category1, '' as category2, sum(totalcount), sum(total_productnum), sum(totalmoney) from mid_orders group by regionname, category1 union all select '大区' as areatype, regionname, '' as cityname, '二级' as categorytype, '' as category1, category2, sum(totalcount), sum(total_productnum), sum(totalmoney) from mid_orders group by regionname, category2 union all select '城市' as areatype, '' as regionname, cityname, '' as categorytype, '' as category1, '' as category2, sum(totalcount), sum(total_productnum), sum(totalmoney) from mid_orders group by cityname union all select '城市' as areatype, '' as regionname, cityname, '一级' as categorytype, category1, '' as category2, sum(totalcount), sum(total_productnum), sum(totalmoney) from mid_orders group by cityname, category1 union all select '城市' as areatype, '' as regionname, cityname, '二级' as categorytype, '' as category1, category2, sum(totalcount), sum(total_productnum), sum(totalmoney) from mid_orders group by cityname, category2; " hive -e "$sql"
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。