离线数仓-8-数据仓库开发DWD层-工具域&互动域&流量域&用户域相关事实表
- 离线数仓-8-数据仓库开发DWD层设计要点-工具域&互动域&流量域&用户域相关事实表
- 一、工具域相关事实表
- 1.工具域优惠券领取事务事实表&使用(下单)事务事实表&使用(支付)事务事实表
- 1.事务事实表 前期梳理
- 2.事务事实表 DDL表设计分析
- 1.优惠券领取事务事实表 DDL
- 2.优惠券使用(下单)事务事实表 DDL
- 3.优惠券使用(支付)事务事实表 DDL
- 3.事务事实表 加载数据分析
- 1.优惠券领取事务事实表 加载数据
- 2.优惠券使用(下单)事务事实表 加载数据
- 3.优惠券使用(支付)事务事实表 加载数据
- 二、互动域相关事实表
- 1.互动域收藏商品事务事实表&评价事务事实表
- 1.事务事实表 前期梳理
- 2.事务事实表 DDL表设计分析
- 3.事务事实表 加载数据分析
- 2.互动域评价事务事实表
- 1.事务事实表 前期梳理
- 2.事务事实表 DDL表设计分析
- 3.事务事实表 加载数据分析
- 三、流量域相关事实表
- 1.流量域页面浏览事务事实表
- 1.事务事实表 前期梳理
- 2.事务事实表 DDL表设计分析
- 3.事务事实表 加载数据分析
- 2.流量域启动事务事实表
- 1.启动事务事实表 前期梳理
- 2.启动事务事实表 DDL表设计分析
- 3.启动事务事实表 加载数据分析
- 3.流量域动作事务事实表
- 1.动作事务事实表 前期梳理
- 2.动作事务事实表 DDL表设计分析
- 3.动作事务事实表 加载数据分析
- 4.流量域错误事务事实表
- 1.错误事务事实表 前期梳理
- 2.错误事务事实表 DDL表设计分析
- 3.错误事务事实表 加载数据分析
- 四、用户域相关事实表
- 1.用户域用户注册事务事实表
- 1.用户注册事务事实表 前期梳理
- 2.用户注册事务事实表 DDL表设计分析
- 3.用户注册事务事实表 加载数据分析
- 1.首日全量装载数据
- 2.每日增量装载数据
- 2.用户域用户登录事务事实表
- 1.用户登录事务事实表 前期梳理
- 2.用户登录事务事实表 DDL表设计分析
- 3.用户登录事务事实表 加载数据分析
- 五、建表语句脚本
- 六、首日装载脚本
- 七、每日装载脚本
离线数仓-8-数据仓库开发DWD层设计要点-工具域&互动域&流量域&用户域相关事实表
一、工具域相关事实表
1.工具域优惠券领取事务事实表&使用(下单)事务事实表&使用(支付)事务事实表
1.事务事实表 前期梳理
- 关联的表格coupon_info
- 领取后,会新增一条数据
- 下单后,会更新状态,以及更新时间
- 支付后,会更新状态,以及更新时间
- 有的业务过程没有明显的度量值,所以就不需要在创建表格的时候硬要体现出来
2.事务事实表 DDL表设计分析
1.优惠券领取事务事实表 DDL
DROP TABLE IF EXISTS dwd_tool_coupon_get_inc;
CREATE EXTERNAL TABLE dwd_tool_coupon_get_inc
(
`id` STRING COMMENT '编号',
`coupon_id` STRING COMMENT '优惠券ID',
`user_id` STRING COMMENT 'userid',
`date_id` STRING COMMENT '日期ID',
`get_time` STRING COMMENT '领取时间'
) COMMENT '优惠券领取事务事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_tool_coupon_get_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
2.优惠券使用(下单)事务事实表 DDL
DROP TABLE IF EXISTS dwd_tool_coupon_order_inc;
CREATE EXTERNAL TABLE dwd_tool_coupon_order_inc
(
`id` STRING COMMENT '编号',
`coupon_id` STRING COMMENT '优惠券ID',
`user_id` STRING COMMENT 'user_id',
`order_id` STRING COMMENT 'order_id',
`date_id` STRING COMMENT '日期ID',
`order_time` STRING COMMENT '使用下单时间'
) COMMENT '优惠券使用下单事务事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_tool_coupon_order_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
3.优惠券使用(支付)事务事实表 DDL
DROP TABLE IF EXISTS dwd_tool_coupon_pay_inc;
CREATE EXTERNAL TABLE dwd_tool_coupon_pay_inc
(
`id` STRING COMMENT '编号',
`coupon_id` STRING COMMENT '优惠券ID',
`user_id` STRING COMMENT 'user_id',
`order_id` STRING COMMENT 'order_id',
`date_id` STRING COMMENT '日期ID',
`payment_time` STRING COMMENT '使用下单时间'
) COMMENT '优惠券使用支付事务事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_tool_coupon_pay_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
3.事务事实表 加载数据分析
1.优惠券领取事务事实表 加载数据
- 首日全量数据加载
insert overwrite table dwd_tool_coupon_get_inc partition(dt)
select
data.id,
data.coupon_id,
data.user_id,
date_format(data.get_time,'yyyy-MM-dd') date_id,
data.get_time,
date_format(data.get_time,'yyyy-MM-dd')
from ods_coupon_use_inc
where dt='2020-06-14'
and type='bootstrap-insert';
- 每日增量数据加载
insert overwrite table dwd_tool_coupon_get_inc partition (dt='2020-06-15')
select
data.id,
data.coupon_id,
data.user_id,
date_format(data.get_time,'yyyy-MM-dd') date_id,
data.get_time
from ods_coupon_use_inc
where dt='2020-06-15'
and type='insert';
2.优惠券使用(下单)事务事实表 加载数据
- 首日全量数据加载
insert overwrite table dwd_tool_coupon_order_inc partition(dt)
select
data.id,
data.coupon_id,
data.user_id,
data.order_id,
date_format(data.using_time,'yyyy-MM-dd') date_id,
data.using_time,
date_format(data.using_time,'yyyy-MM-dd')
from ods_coupon_use_inc
where dt='2020-06-14'
and type='bootstrap-insert'
and data.using_time is not null;
- 每日增量数据加载
insert overwrite table dwd_tool_coupon_order_inc partition(dt='2020-06-15')
select
data.id,
data.coupon_id,
data.user_id,
data.order_id,
date_format(data.using_time,'yyyy-MM-dd') date_id,
data.using_time
from ods_coupon_use_inc
where dt='2020-06-15'
and type='update'
and array_contains(map_keys(old),'using_time');
3.优惠券使用(支付)事务事实表 加载数据
- 首日全量数据加载
insert overwrite table dwd_tool_coupon_pay_inc partition(dt)
select
data.id,
data.coupon_id,
data.user_id,
data.order_id,
date_format(data.used_time,'yyyy-MM-dd') date_id,
data.used_time,
date_format(data.used_time,'yyyy-MM-dd')
from ods_coupon_use_inc
where dt='2020-06-14'
and type='bootstrap-insert'
and data.used_time is not null;
- 每日增量数据加载
insert overwrite table dwd_tool_coupon_pay_inc partition(dt='2020-06-15')
select
data.id,
data.coupon_id,
data.user_id,
data.order_id,
date_format(data.used_time,'yyyy-MM-dd') date_id,
data.used_time
from ods_coupon_use_inc
where dt='2020-06-15'
and type='update'
and array_contains(map_keys(old),'used_time');
二、互动域相关事实表
1.互动域收藏商品事务事实表&评价事务事实表
1.事务事实表 前期梳理
- 关联的表格:favor_info
- 字段分析:xx用户收藏xx商品,xx时间收藏的。
- 收藏商品事务事实表 一条记录代表 用户收藏一个商品
- 收藏商品这个业务过程对表格影响:收藏商品:业务表新增一条记录,取消收藏:业务表中更新业务状态为取消,并且取消时间更新上。
2.事务事实表 DDL表设计分析
DROP TABLE IF EXISTS dwd_interaction_favor_add_inc;
CREATE EXTERNAL TABLE dwd_interaction_favor_add_inc
(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT 'sku_id',
`date_id` STRING COMMENT '日期id',
`create_time` STRING COMMENT '收藏时间'
) COMMENT '收藏事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_interaction_favor_add_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
3.事务事实表 加载数据分析
- 首日装载全量数据
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dwd_interaction_favor_add_inc partition(dt)
select
data.id,
data.user_id,
data.sku_id,
date_format(data.create_time,'yyyy-MM-dd') date_id,
data.create_time,
date_format(data.create_time,'yyyy-MM-dd')
from ods_favor_info_inc
where dt='2020-06-14'
and type = 'bootstrap-insert';
- 每日装载增量数据
insert overwrite table dwd_interaction_favor_add_inc partition(dt='2020-06-15')
select
data.id,
data.user_id,
data.sku_id,
date_format(data.create_time,'yyyy-MM-dd') date_id,
data.create_time
from ods_favor_info_inc
where dt='2020-06-15'
and type = 'insert';
2.互动域评价事务事实表
1.事务事实表 前期梳理
- 关联的表格:comment_info
- 字段分析:xx用户xx时间评价了xx订单中的xx商品。
- 在某些条件下,某些表格中 度量 和 维度 之间的界限比较模糊,既可以是度量又可以是维度,根据业务需求来进行判断使用。
2.事务事实表 DDL表设计分析
DROP TABLE IF EXISTS dwd_interaction_comment_inc;
CREATE EXTERNAL TABLE dwd_interaction_comment_inc
(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户ID',
`sku_id` STRING COMMENT 'sku_id',
`order_id` STRING COMMENT '订单ID',
`date_id` STRING COMMENT '日期ID',
`create_time` STRING COMMENT '评价时间',
`appraise_code` STRING COMMENT '评价编码',
`appraise_name` STRING COMMENT '评价名称'
) COMMENT '评价事务事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_interaction_comment_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
3.事务事实表 加载数据分析
- 首日全量
insert overwrite table dwd_interaction_comment_inc partition(dt)
select
id,
user_id,
sku_id,
order_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
appraise,
dic_name,
date_format(create_time,'yyyy-MM-dd')
from
(
select
data.id,
data.user_id,
data.sku_id,
data.order_id,
data.create_time,
data.appraise
from ods_comment_info_inc
where dt='2020-06-14'
and type='bootstrap-insert'
)ci
left join
(
select
dic_code,
dic_name
from ods_base_dic_full
where dt='2020-06-14'
and parent_code='12'
)dic
on ci.appraise=dic.dic_code;
- 每日装载
insert overwrite table dwd_interaction_comment_inc partition(dt='2020-06-15')
select
id,
user_id,
sku_id,
order_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
appraise,
dic_name
from
(
select
data.id,
data.user_id,
data.sku_id,
data.order_id,
data.create_time,
data.appraise
from ods_comment_info_inc
where dt='2020-06-15'
and type='insert'
)ci
left join
(
select
dic_code,
dic_name
from ods_base_dic_full
where dt='2020-06-15'
and parent_code='12'
)dic
on ci.appraise=dic.dic_code;
三、流量域相关事实表
- 流量域事务事实表数据通常都来自于用户行为日志数据
1.流量域页面浏览事务事实表
1.事务事实表 前期梳理
- 关联的表格:用户行为日志数据-页面日志数据 ods_log_inc
- 行分析:一行代表一条浏览记录,xx用户在xx时间浏览了xx页面。
- xx用户:使用设备id作为用户标识
- xx时间:日期时间
- xx页面:page_id
- 流量相关的日志数据,最后对应dwd层的事务事实表中,将维度信息全部退化到了事实表中,这样做的目的:因为用户行为日志数据都是将维度信息和业务过程数据全部封装到一起,使用flume采集上来,存放到ods层,与业务库中表格数据不同,业务库中已经把各维度信息存放到不同业务库了,采集后存放到ods层不同表格中,使用的时候直接根据关联关系获取即可,但是用户行为日志中,没有维护这些关联关系,原始日志数据中的子弹直接落地到dwd层作为维度数据即可。
- 总之:业务库采集上来的数据,创建对应的dim层维度表 ;埋点采集上来的数据,不需要创建对应的dim层,因为维度数据和事实一并存放在一条记录上报,如果拆开,后面dwd层以及dws层使用的时候,还需要再join关联,这样损耗了大量时间,所以直接维度退化,放在一条记录中即可。
2.事务事实表 DDL表设计分析
DROP TABLE IF EXISTS dwd_traffic_page_view_inc;
CREATE EXTERNAL TABLE dwd_traffic_page_view_inc
(
`province_id` STRING COMMENT '省份id',
`brand` STRING COMMENT '手机品牌',
`channel` STRING COMMENT '渠道',
`is_new` STRING COMMENT '是否首次启动',
`model` STRING COMMENT '手机型号',
`mid_id` STRING COMMENT '设备id',
`operate_system` STRING COMMENT '操作系统',
`user_id` STRING COMMENT '会员id',
`version_code` STRING COMMENT 'app版本号',
`page_item` STRING COMMENT '目标id ',
`page_item_type` STRING COMMENT '目标类型',
`last_page_id` STRING COMMENT '上页类型',
`page_id` STRING COMMENT '页面ID ',
`source_type` STRING COMMENT '来源类型',
`date_id` STRING COMMENT '日期id',
`view_time` STRING COMMENT '跳入时间',
`session_id` STRING COMMENT '所属会话id',
`during_time` BIGINT COMMENT '持续时间毫秒'
) COMMENT '页面日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_page_view_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
3.事务事实表 加载数据分析
-
用户行为日志数据,只存在当天的数据,不存在历史数据,不需要处理跟之前的首日装载数据的脚本
-
以2020-06-14数据为准,实现每日装载
-
hive中sql语句:
- struct is not null 问题:这个是hive的bug,需要注意,
- 结构体中的字段,然后书写sql判断里面字段 is not null ,最终执行计划的时候,不执行,失效。
- 问题的原因:CBO导致的,基于性能开销的优化策略,选择最小性能开销的优化策略去执行。
- 处理方式:
- 1.设置hive中cbo为关闭状态,set hive.cbo.enable = false;
- 2.使用struct结构体中任意字段进行过滤,不使用结构体本身,即可实现过滤操作。
- hive中开窗函数的复习:
- FIRST_VALUE:按照限制条件开窗后,取第一个值,如果参数里面设置了true,就跳过null值,否则不跳过null值。
- LAST_VALUE:按照限制条件开窗后,取最后一个值,如果参数里面设置了true,就跳过null值,否则不跳过null值。
- struct is not null 问题:这个是hive的bug,需要注意,
-
字段中的session_id如何处理:
- 下图可以看出,红框标注的是一个会话id
- 怎样设计sessionId
- 涉及到跨行操作,使用Hive中开窗函数 ,hive官网上开窗函数介绍文档:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+WindowingAndAnalytics
- sessionId的设置,需要关注三点:
- 1.sessionId是什么:用户访问一个网站,从开始到最后浏览关闭网站的一个会话。
- 2.由于用户行为日志中没有相关字段,需要自己去设置,这里就使用:用户+会话开始时间作为一次会话的sessionId
- 3.怎样去确认开始时间,使用开窗函数,last_value(时间字段,true) ,时间字段需要处理一下,设置一个新的字段,只使用当前用户登录第一个页面的那个时间,然后下面记录中此字段都是null,下一个会话开始的时候,又是一个新的时间,登录其他页面这个字段的时间也都是null。这样使用last_value开窗函数,并且跳过null值的话,就完全可以实现
- sessionid设置的第二种方案,新开一个字段,判断last_page_id是否为空,为空则值为1,否则值为0,然后执行该字段求和即可,用户登录几次会话,后面的值就变成了几,这样就可实现不同会话,但是后面需要添加天维度,因为如果第二天的话,。
- 下图可以看出,红框标注的是一个会话id
-
最终流量域页面浏览事务事实表装载数据的sql
set hive.cbo.enable=false;
insert overwrite table dwd_traffic_page_view_inc partition (dt='2020-06-14')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') view_time,
concat(mid_id,'-',last_value(session_start_point,true) over (partition by mid_id order by ts)) session_id,
during_time
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
ts,
if(page.last_page_id is null,ts,null) session_start_point
from ods_log_inc
where dt='2020-06-14'
and page is not null
)log
left join
(
select
id province_id,
area_code
from ods_base_province_full
where dt='2020-06-14'
)bp
on log.area_code=bp.area_code;
2.流量域启动事务事实表
1.启动事务事实表 前期梳理
- 关联的表格:用户行为日志数据-启动日志数据 ods_log_inc
- 行分析:一行代表一条启动日志,xx用户在xx时间启动了xx程序。
- 列分析:具体字段来源于ods_log_inc里面的启动日志
2.启动事务事实表 DDL表设计分析
DROP TABLE IF EXISTS dwd_traffic_start_inc;
CREATE EXTERNAL TABLE dwd_traffic_start_inc
(
`province_id` STRING COMMENT '省份id',
`brand` STRING COMMENT '手机品牌',
`channel` STRING COMMENT '渠道',
`is_new` STRING COMMENT '是否首次启动',
`model` STRING COMMENT '手机型号',
`mid_id` STRING COMMENT '设备id',
`operate_system` STRING COMMENT '操作系统',
`user_id` STRING COMMENT '会员id',
`version_code` STRING COMMENT 'app版本号',
`entry` STRING COMMENT 'icon手机图标 notice 通知',
`open_ad_id` STRING COMMENT '广告页ID ',
`date_id` STRING COMMENT '日期id',
`start_time` STRING COMMENT '启动时间',
`loading_time_ms` BIGINT COMMENT '启动加载时间',
`open_ad_ms` BIGINT COMMENT '广告总共播放时间',
`open_ad_skip_ms` BIGINT COMMENT '用户跳过广告时点'
) COMMENT '启动日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_start_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
3.启动事务事实表 加载数据分析
- hive的sql处理:
- start 在hive中是一个关键字,需要使用``start`来进行处理
set hive.cbo.enable=false;
insert overwrite table dwd_traffic_start_inc partition(dt='2020-06-14')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
entry,
open_ad_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') action_time,
loading_time,
open_ad_ms,
open_ad_skip_ms
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
`start`.entry,
`start`.loading_time,
`start`.open_ad_id,
`start`.open_ad_ms,
`start`.open_ad_skip_ms,
ts
from ods_log_inc
where dt='2020-06-14'
and `start` is not null
)log
left join
(
select
id province_id,
area_code
from ods_base_province_full
where dt='2020-06-14'
)bp
on log.area_code=bp.area_code;
3.流量域动作事务事实表
1.动作事务事实表 前期梳理
- 关联的表格:用户行为日志数据-用户行为日志数据 ods_log_inc
- 行分析:一行代表一条动作信息,xx用户在xx时间在xx页面触发了xx动作。
- 列分析:具体字段来源于ods_log_inc里面的用户行为
2.动作事务事实表 DDL表设计分析
DROP TABLE IF EXISTS dwd_traffic_action_inc;
CREATE EXTERNAL TABLE dwd_traffic_action_inc
(
`province_id` STRING COMMENT '省份id',
`brand` STRING COMMENT '手机品牌',
`channel` STRING COMMENT '渠道',
`is_new` STRING COMMENT '是否首次启动',
`model` STRING COMMENT '手机型号',
`mid_id` STRING COMMENT '设备id',
`operate_system` STRING COMMENT '操作系统',
`user_id` STRING COMMENT '会员id',
`version_code` STRING COMMENT 'app版本号',
`during_time` BIGINT COMMENT '持续时间毫秒',
`page_item` STRING COMMENT '目标id ',
`page_item_type` STRING COMMENT '目标类型',
`last_page_id` STRING COMMENT '上页类型',
`page_id` STRING COMMENT '页面id ',
`source_type` STRING COMMENT '来源类型',
`action_id` STRING COMMENT '动作id',
`action_item` STRING COMMENT '目标id ',
`action_item_type` STRING COMMENT '目标类型',
`date_id` STRING COMMENT '日期id',
`action_time` STRING COMMENT '动作发生时间'
) COMMENT '动作日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_action_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
3.动作事务事实表 加载数据分析
-
因为用户行为日志中,动作actions字段对应的数组结构,但是最终装载到动作事务事实表中的时候,需要展开为一个一个的动作,这里牵扯到了一进多出的sql处理方式。
-
hive中sql实现:一进多出的逻辑
- udf:一进一出
- udaf:多进一出
- udtf: 一进多出
- udtf中**炸裂函数 **
- 炸裂数组 :explode(arr) tmp as item :其中tmp 是炸裂出来的item字段组成的表,可以使用tmp.item,来获取item的字段信息
- 语法:select * from table lateral view explode(arr) tmp as item
- 最后炸裂完毕,多出来一列为item,其他两列跟原来数据保持一致。
- 炸裂Map集合: explode(map) tmp as key,value
- 最终炸裂效果如下图:
- 炸裂数组 :explode(arr) tmp as item :其中tmp 是炸裂出来的item字段组成的表,可以使用tmp.item,来获取item的字段信息
-
最终装载数据如下:
set hive.cbo.enable=false;
insert overwrite table dwd_traffic_action_inc partition(dt='2020-06-14')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
during_time,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
action_id,
action_item,
action_item_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') action_time
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
action.action_id,
action.item action_item,
action.item_type action_item_type,
action.ts
from ods_log_inc lateral view explode(actions) tmp as action
where dt='2020-06-14'
and actions is not null
)log
left join
(
select
id province_id,
area_code
from ods_base_province_full
where dt='2020-06-14'
)bp
on log.area_code=bp.area_code;
4.流量域错误事务事实表
1.错误事务事实表 前期梳理
- 关联的表格:用户行为日志数据-用户行为日志数据+启动日志 ods_log_inc,这两部分都有可能产生错误日志
- 行分析:一行代表一条错误信息。
- 列分析:错误表格中需要包含:common信息、page信息、start信息、actions信息、曝光信息和最终的错误信息,前面的几个都作为维度信息,方便定位错误问题
2.错误事务事实表 DDL表设计分析
DROP TABLE IF EXISTS dwd_traffic_error_inc;
CREATE EXTERNAL TABLE dwd_traffic_error_inc
(
`province_id` STRING COMMENT '地区编码',
`brand` STRING COMMENT '手机品牌',
`channel` STRING COMMENT '渠道',
`is_new` STRING COMMENT '是否首次启动',
`model` STRING COMMENT '手机型号',
`mid_id` STRING COMMENT '设备id',
`operate_system` STRING COMMENT '操作系统',
`user_id` STRING COMMENT '会员id',
`version_code` STRING COMMENT 'app版本号',
`page_item` STRING COMMENT '目标id ',
`page_item_type` STRING COMMENT '目标类型',
`last_page_id` STRING COMMENT '上页类型',
`page_id` STRING COMMENT '页面ID ',
`source_type` STRING COMMENT '来源类型',
`entry` STRING COMMENT 'icon手机图标 notice 通知',
`loading_time` STRING COMMENT '启动加载时间',
`open_ad_id` STRING COMMENT '广告页ID ',
`open_ad_ms` STRING COMMENT '广告总共播放时间',
`open_ad_skip_ms` STRING COMMENT '用户跳过广告时点',
`actions` ARRAY<STRUCT<action_id:STRING,item:STRING,item_type:STRING,ts:BIGINT>> COMMENT '动作信息',
`displays` ARRAY<STRUCT<display_type :STRING,item :STRING,item_type :STRING,`order` :STRING,pos_id
:STRING>> COMMENT '曝光信息',
`date_id` STRING COMMENT '日期id',
`error_time` STRING COMMENT '错误时间',
`error_code` STRING COMMENT '错误码',
`error_msg` STRING COMMENT '错误信息'
) COMMENT '错误日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_error_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
3.错误事务事实表 加载数据分析
- 执行导入数据的时候,会出现数据导入失败的问题:
- 是由于查询的数据时候存在数组类型,在使用了hive on spark引擎的时候,不支持这样的操作,切换为hive on MR,命令如下:set hive.execution.engine=mr;
- 使用hive on spark的时候,有时候sql正常,但是运行时一直报错,切换为mr引擎以后,查看是否能正常运行,正常运行的话,就代表hive on spark有bug。可以临时切换执行引擎mr,然后在程序结尾在切换为spark即可,具体sql如下。
set hive.cbo.enable=false;
set hive.execution.engine=mr;
insert overwrite table dwd_traffic_error_inc partition(dt='2020-06-14')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
entry,
loading_time,
open_ad_id,
open_ad_ms,
open_ad_skip_ms,
actions,
displays,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') error_time,
error_code,
error_msg
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
`start`.entry,
`start`.loading_time,
`start`.open_ad_id,
`start`.open_ad_ms,
`start`.open_ad_skip_ms,
actions,
displays,
err.error_code,
err.msg error_msg,
ts
from ods_log_inc
where dt='2020-06-14'
and err is not null
)log
join
(
select
id province_id,
area_code
from ods_base_province_full
where dt='2020-06-14'
)bp
on log.area_code=bp.area_code;
四、用户域相关事实表
1.用户域用户注册事务事实表
1.用户注册事务事实表 前期梳理
- 用户注册 特别之处:一个用户只对应表格中一条注册信息,不存在多条的情况。
- 关联的表格:业务过程:注册成功,会对哪些表格产生影响:用户表 use_info,注册成功,插入数据。
- 行分析:一条代表一个用户注册成功的信息。
- 列分析:
- 只从业务系统用户注册表获取维度信息的话,此张事实表的维度很少,不符合多维的的原则,需要再添加一些其他维度;
- 还需要从用户行为日志表中获取用户注册的信息以及用户注册成功后生成的用户id,然后添加到用户注册事务事实表中。
- 两边数据,以业务系统用户注册表数据为主。
2.用户注册事务事实表 DDL表设计分析
DROP TABLE IF EXISTS dwd_user_register_inc;
CREATE EXTERNAL TABLE dwd_user_register_inc
(
`user_id` STRING COMMENT '用户ID',
`date_id` STRING COMMENT '日期ID',
`create_time` STRING COMMENT '注册时间',
`channel` STRING COMMENT '应用下载渠道',
`province_id` STRING COMMENT '省份id',
`version_code` STRING COMMENT '应用版本',
`mid_id` STRING COMMENT '设备id',
`brand` STRING COMMENT '设备品牌',
`model` STRING COMMENT '设备型号',
`operate_system` STRING COMMENT '设备操作系统'
) COMMENT '用户域用户注册事务事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_user_register_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
3.用户注册事务事实表 加载数据分析
1.首日全量装载数据
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dwd_user_register_inc partition(dt)
select
ui.user_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
channel,
province_id,
version_code,
mid_id,
brand,
model,
operate_system,
date_format(create_time,'yyyy-MM-dd')
from
(
select
data.id user_id,
data.create_time
from ods_user_info_inc
where dt='2020-06-14'
and type='bootstrap-insert'
)ui
left join
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code
from ods_log_inc
where dt='2020-06-14'
and page.page_id='register'
and common.uid is not null
)log
on ui.user_id=log.user_id
left join
(
select
id province_id,
area_code
from ods_base_province_full
where dt='2020-06-14'
)bp
on log.area_code=bp.area_code;
2.每日增量装载数据
insert overwrite table dwd_user_register_inc partition(dt='2020-06-15')
select
ui.user_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
channel,
province_id,
version_code,
mid_id,
brand,
model,
operate_system
from
(
select
data.id user_id,
data.create_time
from ods_user_info_inc
where dt='2020-06-15'
and type='insert'
)ui
left join
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code
from ods_log_inc
where dt='2020-06-15'
and page.page_id='register'
and common.uid is not null
)log
on ui.user_id=log.user_id
left join
(
select
id province_id,
area_code
from ods_base_province_full
where dt='2020-06-15'
)bp
on log.area_code=bp.area_code;
2.用户域用户登录事务事实表
1.用户登录事务事实表 前期梳理
- 关联的表格:业务过程:登录成功,会对哪些表格产生影响:用户行为日志表,用户登录会产生登录日志。
- 行分析:一条代表一个用户登录信息。
- 列分析:
- 从用户行为日志中 启动日志 和 页面日志获取相关列字段
- 用户登录情况分为三种情况:
- 1.前期浏览页面未登录,后面登录后再浏览
- 2.登录后再浏览页面
- 3.只浏览页面,不登录
2.用户登录事务事实表 DDL表设计分析
DROP TABLE IF EXISTS dwd_user_login_inc;
CREATE EXTERNAL TABLE dwd_user_login_inc
(
`user_id` STRING COMMENT '用户ID',
`date_id` STRING COMMENT '日期ID',
`login_time` STRING COMMENT '登录时间',
`channel` STRING COMMENT '应用下载渠道',
`province_id` STRING COMMENT '省份id',
`version_code` STRING COMMENT '应用版本',
`mid_id` STRING COMMENT '设备id',
`brand` STRING COMMENT '设备品牌',
`model` STRING COMMENT '设备型号',
`operate_system` STRING COMMENT '设备操作系统'
) COMMENT '用户域用户登录事务事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_user_login_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
3.用户登录事务事实表 加载数据分析
- 怎样获取登录信息,在一次会话中的登录信息。
- 1.前期浏览页面未登录,后面登录后再浏览
- 2.登录后再浏览页面
- 1.首先绘制sessionId,使用开窗函数
- 2.获取每个会话的第一个页面,分组取TopN
insert overwrite table dwd_user_login_inc partition(dt='2020-06-14')
select
user_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') login_time,
channel,
province_id,
version_code,
mid_id,
brand,
model,
operate_system
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts,
row_number() over (partition by session_id order by ts) rn
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts,
concat(mid_id,'-',last_value(session_start_point,true) over(partition by mid_id order by ts)) session_id
from
(
select
common.uid user_id,
common.ch channel,
common.ar area_code,
common.vc version_code,
common.mid mid_id,
common.ba brand,
common.md model,
common.os operate_system,
ts,
if(page.last_page_id is null,ts,null) session_start_point
from ods_log_inc
where dt='2020-06-14'
and page is not null
)t1
)t2
where user_id is not null
)t3
where rn=1
)t4
left join
(
select
id province_id,
area_code
from ods_base_province_full
where dt='2020-06-14'
)bp
on t4.area_code=bp.area_code;
- 总结:
- 事实表,每天有一个分区,对应数据写入到对应分区上面,首日全量同步的时候,也是写到对应分区上,每天分区里面放的是当天的操作记录,在进行查询数据的时候,查询对应分区数据即可。
- 维度表,大多数是每日全量快照表,每天一个分区,此分区内部是全量数据,查询时候,直接查询最新分区即可获取全量数据,如果查询历史数据,可对应查询历史某天的维度数据。
五、建表语句脚本
DROP TABLE IF EXISTS dwd_trade_cart_add_inc;
CREATE EXTERNAL TABLE dwd_trade_cart_add_inc
(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT '商品id',
`date_id` STRING COMMENT '时间id',
`create_time` STRING COMMENT '加购时间',
`source_id` STRING COMMENT '来源类型ID',
`source_type_code` STRING COMMENT '来源类型编码',
`source_type_name` STRING COMMENT '来源类型名称',
`sku_num` BIGINT COMMENT '加购物车件数'
) COMMENT '交易域加购物车事务事实表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_cart_add_inc/'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_trade_order_detail_inc;
CREATE EXTERNAL TABLE dwd_trade_order_detail_inc
(
`id` STRING COMMENT '编号',
`order_id` STRING COMMENT '订单id',
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT '商品id',
`province_id` STRING COMMENT '省份id',
`activity_id` STRING COMMENT '参与活动规则id',
`activity_rule_id` STRING COMMENT '参与活动规则id',
`coupon_id` STRING COMMENT '使用优惠券id',
`date_id` STRING COMMENT '下单日期id',
`create_time` STRING COMMENT '下单时间',
`source_id` STRING COMMENT '来源编号',
`source_type_code` STRING COMMENT '来源类型编码',
`source_type_name` STRING COMMENT '来源类型名称',
`sku_num` BIGINT COMMENT '商品数量',
`split_original_amount` DECIMAL(16, 2) COMMENT '原始价格',
`split_activity_amount` DECIMAL(16, 2) COMMENT '活动优惠分摊',
`split_coupon_amount` DECIMAL(16, 2) COMMENT '优惠券优惠分摊',
`split_total_amount` DECIMAL(16, 2) COMMENT '最终价格分摊'
) COMMENT '交易域下单明细事务事实表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_order_detail_inc/'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_trade_cancel_detail_inc;
CREATE EXTERNAL TABLE dwd_trade_cancel_detail_inc
(
`id` STRING COMMENT '编号',
`order_id` STRING COMMENT '订单id',
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT '商品id',
`province_id` STRING COMMENT '省份id',
`activity_id` STRING COMMENT '参与活动规则id',
`activity_rule_id` STRING COMMENT '参与活动规则id',
`coupon_id` STRING COMMENT '使用优惠券id',
`date_id` STRING COMMENT '取消订单日期id',
`cancel_time` STRING COMMENT '取消订单时间',
`source_id` STRING COMMENT '来源编号',
`source_type_code` STRING COMMENT '来源类型编码',
`source_type_name` STRING COMMENT '来源类型名称',
`sku_num` BIGINT COMMENT '商品数量',
`split_original_amount` DECIMAL(16, 2) COMMENT '原始价格',
`split_activity_amount` DECIMAL(16, 2) COMMENT '活动优惠分摊',
`split_coupon_amount` DECIMAL(16, 2) COMMENT '优惠券优惠分摊',
`split_total_amount` DECIMAL(16, 2) COMMENT '最终价格分摊'
) COMMENT '交易域取消订单明细事务事实表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_cancel_detail_inc/'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_trade_pay_detail_suc_inc;
CREATE EXTERNAL TABLE dwd_trade_pay_detail_suc_inc
(
`id` STRING COMMENT '编号',
`order_id` STRING COMMENT '订单id',
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT '商品id',
`province_id` STRING COMMENT '省份id',
`activity_id` STRING COMMENT '参与活动规则id',
`activity_rule_id` STRING COMMENT '参与活动规则id',
`coupon_id` STRING COMMENT '使用优惠券id',
`payment_type_code` STRING COMMENT '支付类型编码',
`payment_type_name` STRING COMMENT '支付类型名称',
`date_id` STRING COMMENT '支付日期id',
`callback_time` STRING COMMENT '支付成功时间',
`source_id` STRING COMMENT '来源编号',
`source_type_code` STRING COMMENT '来源类型编码',
`source_type_name` STRING COMMENT '来源类型名称',
`sku_num` BIGINT COMMENT '商品数量',
`split_original_amount` DECIMAL(16, 2) COMMENT '应支付原始金额',
`split_activity_amount` DECIMAL(16, 2) COMMENT '支付活动优惠分摊',
`split_coupon_amount` DECIMAL(16, 2) COMMENT '支付优惠券优惠分摊',
`split_payment_amount` DECIMAL(16, 2) COMMENT '支付金额'
) COMMENT '交易域成功支付事务事实表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_pay_detail_suc_inc/'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_trade_order_refund_inc;
CREATE EXTERNAL TABLE dwd_trade_order_refund_inc
(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户ID',
`order_id` STRING COMMENT '订单ID',
`sku_id` STRING COMMENT '商品ID',
`province_id` STRING COMMENT '地区ID',
`date_id` STRING COMMENT '日期ID',
`create_time` STRING COMMENT '退单时间',
`refund_type_code` STRING COMMENT '退单类型编码',
`refund_type_name` STRING COMMENT '退单类型名称',
`refund_reason_type_code` STRING COMMENT '退单原因类型编码',
`refund_reason_type_name` STRING COMMENT '退单原因类型名称',
`refund_reason_txt` STRING COMMENT '退单原因描述',
`refund_num` BIGINT COMMENT '退单件数',
`refund_amount` DECIMAL(16, 2) COMMENT '退单金额'
) COMMENT '交易域退单事务事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_order_refund_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
DROP TABLE IF EXISTS dwd_trade_refund_pay_suc_inc;
CREATE EXTERNAL TABLE dwd_trade_refund_pay_suc_inc
(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户ID',
`order_id` STRING COMMENT '订单编号',
`sku_id` STRING COMMENT 'SKU编号',
`province_id` STRING COMMENT '地区ID',
`payment_type_code` STRING COMMENT '支付类型编码',
`payment_type_name` STRING COMMENT '支付类型名称',
`date_id` STRING COMMENT '日期ID',
`callback_time` STRING COMMENT '支付成功时间',
`refund_num` DECIMAL(16, 2) COMMENT '退款件数',
`refund_amount` DECIMAL(16, 2) COMMENT '退款金额'
) COMMENT '交易域提交退款成功事务事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_refund_pay_suc_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
DROP TABLE IF EXISTS dwd_trade_cart_full;
CREATE EXTERNAL TABLE dwd_trade_cart_full
(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT '商品id',
`sku_name` STRING COMMENT '商品名称',
`sku_num` BIGINT COMMENT '加购物车件数'
) COMMENT '交易域购物车周期快照事实表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_cart_full/'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_tool_coupon_get_inc;
CREATE EXTERNAL TABLE dwd_tool_coupon_get_inc
(
`id` STRING COMMENT '编号',
`coupon_id` STRING COMMENT '优惠券ID',
`user_id` STRING COMMENT 'userid',
`date_id` STRING COMMENT '日期ID',
`get_time` STRING COMMENT '领取时间'
) COMMENT '优惠券领取事务事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_tool_coupon_get_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
DROP TABLE IF EXISTS dwd_tool_coupon_order_inc;
CREATE EXTERNAL TABLE dwd_tool_coupon_order_inc
(
`id` STRING COMMENT '编号',
`coupon_id` STRING COMMENT '优惠券ID',
`user_id` STRING COMMENT 'user_id',
`order_id` STRING COMMENT 'order_id',
`date_id` STRING COMMENT '日期ID',
`order_time` STRING COMMENT '使用下单时间'
) COMMENT '优惠券使用下单事务事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_tool_coupon_order_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
DROP TABLE IF EXISTS dwd_tool_coupon_pay_inc;
CREATE EXTERNAL TABLE dwd_tool_coupon_pay_inc
(
`id` STRING COMMENT '编号',
`coupon_id` STRING COMMENT '优惠券ID',
`user_id` STRING COMMENT 'user_id',
`order_id` STRING COMMENT 'order_id',
`date_id` STRING COMMENT '日期ID',
`payment_time` STRING COMMENT '使用下单时间'
) COMMENT '优惠券使用支付事务事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_tool_coupon_pay_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
DROP TABLE IF EXISTS dwd_interaction_favor_add_inc;
CREATE EXTERNAL TABLE dwd_interaction_favor_add_inc
(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT 'sku_id',
`date_id` STRING COMMENT '日期id',
`create_time` STRING COMMENT '收藏时间'
) COMMENT '收藏事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_interaction_favor_add_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
DROP TABLE IF EXISTS dwd_interaction_comment_inc;
CREATE EXTERNAL TABLE dwd_interaction_comment_inc
(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户ID',
`sku_id` STRING COMMENT 'sku_id',
`order_id` STRING COMMENT '订单ID',
`date_id` STRING COMMENT '日期ID',
`create_time` STRING COMMENT '评价时间',
`appraise_code` STRING COMMENT '评价编码',
`appraise_name` STRING COMMENT '评价名称'
) COMMENT '评价事务事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_interaction_comment_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
DROP TABLE IF EXISTS dwd_traffic_page_view_inc;
CREATE EXTERNAL TABLE dwd_traffic_page_view_inc
(
`province_id` STRING COMMENT '省份id',
`brand` STRING COMMENT '手机品牌',
`channel` STRING COMMENT '渠道',
`is_new` STRING COMMENT '是否首次启动',
`model` STRING COMMENT '手机型号',
`mid_id` STRING COMMENT '设备id',
`operate_system` STRING COMMENT '操作系统',
`user_id` STRING COMMENT '会员id',
`version_code` STRING COMMENT 'app版本号',
`page_item` STRING COMMENT '目标id ',
`page_item_type` STRING COMMENT '目标类型',
`last_page_id` STRING COMMENT '上页类型',
`page_id` STRING COMMENT '页面ID ',
`source_type` STRING COMMENT '来源类型',
`date_id` STRING COMMENT '日期id',
`view_time` STRING COMMENT '跳入时间',
`session_id` STRING COMMENT '所属会话id',
`during_time` BIGINT COMMENT '持续时间毫秒'
) COMMENT '页面日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_page_view_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_traffic_start_inc;
CREATE EXTERNAL TABLE dwd_traffic_start_inc
(
`province_id` STRING COMMENT '省份id',
`brand` STRING COMMENT '手机品牌',
`channel` STRING COMMENT '渠道',
`is_new` STRING COMMENT '是否首次启动',
`model` STRING COMMENT '手机型号',
`mid_id` STRING COMMENT '设备id',
`operate_system` STRING COMMENT '操作系统',
`user_id` STRING COMMENT '会员id',
`version_code` STRING COMMENT 'app版本号',
`entry` STRING COMMENT 'icon手机图标 notice 通知',
`open_ad_id` STRING COMMENT '广告页ID ',
`date_id` STRING COMMENT '日期id',
`start_time` STRING COMMENT '启动时间',
`loading_time_ms` BIGINT COMMENT '启动加载时间',
`open_ad_ms` BIGINT COMMENT '广告总共播放时间',
`open_ad_skip_ms` BIGINT COMMENT '用户跳过广告时点'
) COMMENT '启动日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_start_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_traffic_action_inc;
CREATE EXTERNAL TABLE dwd_traffic_action_inc
(
`province_id` STRING COMMENT '省份id',
`brand` STRING COMMENT '手机品牌',
`channel` STRING COMMENT '渠道',
`is_new` STRING COMMENT '是否首次启动',
`model` STRING COMMENT '手机型号',
`mid_id` STRING COMMENT '设备id',
`operate_system` STRING COMMENT '操作系统',
`user_id` STRING COMMENT '会员id',
`version_code` STRING COMMENT 'app版本号',
`during_time` BIGINT COMMENT '持续时间毫秒',
`page_item` STRING COMMENT '目标id ',
`page_item_type` STRING COMMENT '目标类型',
`last_page_id` STRING COMMENT '上页类型',
`page_id` STRING COMMENT '页面id ',
`source_type` STRING COMMENT '来源类型',
`action_id` STRING COMMENT '动作id',
`action_item` STRING COMMENT '目标id ',
`action_item_type` STRING COMMENT '目标类型',
`date_id` STRING COMMENT '日期id',
`action_time` STRING COMMENT '动作发生时间'
) COMMENT '动作日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_action_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_traffic_display_inc;
CREATE EXTERNAL TABLE dwd_traffic_display_inc
(
`province_id` STRING COMMENT '省份id',
`brand` STRING COMMENT '手机品牌',
`channel` STRING COMMENT '渠道',
`is_new` STRING COMMENT '是否首次启动',
`model` STRING COMMENT '手机型号',
`mid_id` STRING COMMENT '设备id',
`operate_system` STRING COMMENT '操作系统',
`user_id` STRING COMMENT '会员id',
`version_code` STRING COMMENT 'app版本号',
`during_time` BIGINT COMMENT 'app版本号',
`page_item` STRING COMMENT '目标id ',
`page_item_type` STRING COMMENT '目标类型',
`last_page_id` STRING COMMENT '上页类型',
`page_id` STRING COMMENT '页面ID ',
`source_type` STRING COMMENT '来源类型',
`date_id` STRING COMMENT '日期id',
`display_time` STRING COMMENT '曝光时间',
`display_type` STRING COMMENT '曝光类型',
`display_item` STRING COMMENT '曝光对象id ',
`display_item_type` STRING COMMENT 'app版本号',
`display_order` BIGINT COMMENT '曝光顺序',
`display_pos_id` BIGINT COMMENT '曝光位置'
) COMMENT '曝光日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_display_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_traffic_error_inc;
CREATE EXTERNAL TABLE dwd_traffic_error_inc
(
`province_id` STRING COMMENT '地区编码',
`brand` STRING COMMENT '手机品牌',
`channel` STRING COMMENT '渠道',
`is_new` STRING COMMENT '是否首次启动',
`model` STRING COMMENT '手机型号',
`mid_id` STRING COMMENT '设备id',
`operate_system` STRING COMMENT '操作系统',
`user_id` STRING COMMENT '会员id',
`version_code` STRING COMMENT 'app版本号',
`page_item` STRING COMMENT '目标id ',
`page_item_type` STRING COMMENT '目标类型',
`last_page_id` STRING COMMENT '上页类型',
`page_id` STRING COMMENT '页面ID ',
`source_type` STRING COMMENT '来源类型',
`entry` STRING COMMENT 'icon手机图标 notice 通知',
`loading_time` STRING COMMENT '启动加载时间',
`open_ad_id` STRING COMMENT '广告页ID ',
`open_ad_ms` STRING COMMENT '广告总共播放时间',
`open_ad_skip_ms` STRING COMMENT '用户跳过广告时点',
`actions` ARRAY<STRUCT<action_id:STRING,item:STRING,item_type:STRING,ts:BIGINT>> COMMENT '动作信息',
`displays` ARRAY<STRUCT<display_type :STRING,item :STRING,item_type :STRING,`order` :STRING,pos_id
:STRING>> COMMENT '曝光信息',
`date_id` STRING COMMENT '日期id',
`error_time` STRING COMMENT '错误时间',
`error_code` STRING COMMENT '错误码',
`error_msg` STRING COMMENT '错误信息'
) COMMENT '错误日志表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_traffic_error_inc'
TBLPROPERTIES ('orc.compress' = 'snappy');
DROP TABLE IF EXISTS dwd_user_register_inc;
CREATE EXTERNAL TABLE dwd_user_register_inc
(
`user_id` STRING COMMENT '用户ID',
`date_id` STRING COMMENT '日期ID',
`create_time` STRING COMMENT '注册时间',
`channel` STRING COMMENT '应用下载渠道',
`province_id` STRING COMMENT '省份id',
`version_code` STRING COMMENT '应用版本',
`mid_id` STRING COMMENT '设备id',
`brand` STRING COMMENT '设备品牌',
`model` STRING COMMENT '设备型号',
`operate_system` STRING COMMENT '设备操作系统'
) COMMENT '用户域用户注册事务事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_user_register_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
DROP TABLE IF EXISTS dwd_user_login_inc;
CREATE EXTERNAL TABLE dwd_user_login_inc
(
`user_id` STRING COMMENT '用户ID',
`date_id` STRING COMMENT '日期ID',
`login_time` STRING COMMENT '登录时间',
`channel` STRING COMMENT '应用下载渠道',
`province_id` STRING COMMENT '省份id',
`version_code` STRING COMMENT '应用版本',
`mid_id` STRING COMMENT '设备id',
`brand` STRING COMMENT '设备品牌',
`model` STRING COMMENT '设备型号',
`operate_system` STRING COMMENT '设备操作系统'
) COMMENT '用户域用户登录事务事实表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_user_login_inc/'
TBLPROPERTIES ("orc.compress" = "snappy");
六、首日装载脚本
#!/bin/bash
APP=gmall
if [ -n "$2" ] ;then
do_date=$2
else
echo "请传入日期参数"
exit
fi
dwd_interaction_comment_inc="
insert overwrite table ${APP}.dwd_interaction_comment_inc partition(dt)
select
id,
user_id,
sku_id,
order_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
appraise,
dic_name,
date_format(create_time,'yyyy-MM-dd')
from
(
select
data.id,
data.user_id,
data.sku_id,
data.order_id,
data.create_time,
data.appraise
from ${APP}.ods_comment_info_inc
where dt='$do_date'
and type='bootstrap-insert'
)ci
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='12'
)dic
on ci.appraise=dic.dic_code;
"
dwd_interaction_favor_add_inc="
insert overwrite table ${APP}.dwd_interaction_favor_add_inc partition(dt)
select
data.id,
data.user_id,
data.sku_id,
date_format(data.create_time,'yyyy-MM-dd') date_id,
data.create_time,
date_format(data.create_time,'yyyy-MM-dd')
from ${APP}.ods_favor_info_inc
where dt='$do_date'
and type = 'bootstrap-insert';
"
dwd_tool_coupon_get_inc="
insert overwrite table ${APP}.dwd_tool_coupon_get_inc partition(dt)
select
data.id,
data.coupon_id,
data.user_id,
date_format(data.get_time,'yyyy-MM-dd') date_id,
data.get_time,
date_format(data.get_time,'yyyy-MM-dd')
from ${APP}.ods_coupon_use_inc
where dt='$do_date'
and type='bootstrap-insert';
"
dwd_tool_coupon_order_inc="
insert overwrite table ${APP}.dwd_tool_coupon_order_inc partition(dt)
select
data.id,
data.coupon_id,
data.user_id,
data.order_id,
date_format(data.using_time,'yyyy-MM-dd') date_id,
data.using_time,
date_format(data.using_time,'yyyy-MM-dd')
from ${APP}.ods_coupon_use_inc
where dt='$do_date'
and type='bootstrap-insert'
and data.using_time is not null;
"
dwd_tool_coupon_pay_inc="
insert overwrite table ${APP}.dwd_tool_coupon_pay_inc partition(dt)
select
data.id,
data.coupon_id,
data.user_id,
data.order_id,
date_format(data.used_time,'yyyy-MM-dd') date_id,
data.used_time,
date_format(data.used_time,'yyyy-MM-dd')
from ${APP}.ods_coupon_use_inc
where dt='$do_date'
and type='bootstrap-insert'
and data.used_time is not null;
"
dwd_trade_cancel_detail_inc="
insert overwrite table ${APP}.dwd_trade_cancel_detail_inc partition (dt)
select
od.id,
order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
date_format(canel_time,'yyyy-MM-dd') date_id,
canel_time,
source_id,
source_type,
dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount,
date_format(canel_time,'yyyy-MM-dd')
from
(
select
data.id,
data.order_id,
data.sku_id,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ${APP}.ods_order_detail_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) od
join
(
select
data.id,
data.user_id,
data.province_id,
data.operate_time canel_time
from ${APP}.ods_order_info_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
and data.order_status='1003'
) oi
on od.order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ${APP}.ods_order_detail_activity_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ${APP}.ods_order_detail_coupon_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='24'
)dic
on od.source_type=dic.dic_code;
"
dwd_trade_cart_add_inc="
insert overwrite table ${APP}.dwd_trade_cart_add_inc partition (dt)
select
id,
user_id,
sku_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
source_id,
source_type,
dic.dic_name,
sku_num,
date_format(create_time, 'yyyy-MM-dd')
from
(
select
data.id,
data.user_id,
data.sku_id,
data.create_time,
data.source_id,
data.source_type,
data.sku_num
from ${APP}.ods_cart_info_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
)ci
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='24'
)dic
on ci.source_type=dic.dic_code;
"
dwd_trade_cart_full="
insert overwrite table ${APP}.dwd_trade_cart_full partition(dt='$do_date')
select
id,
user_id,
sku_id,
sku_name,
sku_num
from ${APP}.ods_cart_info_full
where dt='$do_date'
and is_ordered='0';
"
dwd_trade_order_detail_inc="
insert overwrite table ${APP}.dwd_trade_order_detail_inc partition (dt)
select
od.id,
order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
date_format(create_time, 'yyyy-MM-dd') date_id,
create_time,
source_id,
source_type,
dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount,
date_format(create_time,'yyyy-MM-dd')
from
(
select
data.id,
data.order_id,
data.sku_id,
data.create_time,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ${APP}.ods_order_detail_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) od
left join
(
select
data.id,
data.user_id,
data.province_id
from ${APP}.ods_order_info_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) oi
on od.order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ${APP}.ods_order_detail_activity_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ${APP}.ods_order_detail_coupon_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='24'
)dic
on od.source_type=dic.dic_code;
"
dwd_trade_order_refund_inc="
insert overwrite table ${APP}.dwd_trade_order_refund_inc partition(dt)
select
ri.id,
user_id,
order_id,
sku_id,
province_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
refund_type,
type_dic.dic_name,
refund_reason_type,
reason_dic.dic_name,
refund_reason_txt,
refund_num,
refund_amount,
date_format(create_time,'yyyy-MM-dd')
from
(
select
data.id,
data.user_id,
data.order_id,
data.sku_id,
data.refund_type,
data.refund_num,
data.refund_amount,
data.refund_reason_type,
data.refund_reason_txt,
data.create_time
from ${APP}.ods_order_refund_info_inc
where dt='$do_date'
and type='bootstrap-insert'
)ri
left join
(
select
data.id,
data.province_id
from ${APP}.ods_order_info_inc
where dt='$do_date'
and type='bootstrap-insert'
)oi
on ri.order_id=oi.id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code = '15'
)type_dic
on ri.refund_type=type_dic.dic_code
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code = '13'
)reason_dic
on ri.refund_reason_type=reason_dic.dic_code;
"
dwd_trade_pay_detail_suc_inc="
insert overwrite table ${APP}.dwd_trade_pay_detail_suc_inc partition (dt)
select
od.id,
od.order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
payment_type,
pay_dic.dic_name,
date_format(callback_time,'yyyy-MM-dd') date_id,
callback_time,
source_id,
source_type,
src_dic.dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount,
date_format(callback_time,'yyyy-MM-dd')
from
(
select
data.id,
data.order_id,
data.sku_id,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ${APP}.ods_order_detail_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) od
join
(
select
data.user_id,
data.order_id,
data.payment_type,
data.callback_time
from ${APP}.ods_payment_info_inc
where dt='$do_date'
and type='bootstrap-insert'
and data.payment_status='1602'
) pi
on od.order_id=pi.order_id
left join
(
select
data.id,
data.province_id
from ${APP}.ods_order_info_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) oi
on od.order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ${APP}.ods_order_detail_activity_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ${APP}.ods_order_detail_coupon_inc
where dt = '$do_date'
and type = 'bootstrap-insert'
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='11'
) pay_dic
on pi.payment_type=pay_dic.dic_code
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='24'
)src_dic
on od.source_type=src_dic.dic_code;
"
dwd_trade_refund_pay_suc_inc="
insert overwrite table ${APP}.dwd_trade_refund_pay_suc_inc partition(dt)
select
rp.id,
user_id,
rp.order_id,
rp.sku_id,
province_id,
payment_type,
dic_name,
date_format(callback_time,'yyyy-MM-dd') date_id,
callback_time,
refund_num,
total_amount,
date_format(callback_time,'yyyy-MM-dd')
from
(
select
data.id,
data.order_id,
data.sku_id,
data.payment_type,
data.callback_time,
data.total_amount
from ${APP}.ods_refund_payment_inc
where dt='$do_date'
and type = 'bootstrap-insert'
and data.refund_status='1602'
)rp
left join
(
select
data.id,
data.user_id,
data.province_id
from ${APP}.ods_order_info_inc
where dt='$do_date'
and type='bootstrap-insert'
)oi
on rp.order_id=oi.id
left join
(
select
data.order_id,
data.sku_id,
data.refund_num
from ${APP}.ods_order_refund_info_inc
where dt='$do_date'
and type='bootstrap-insert'
)ri
on rp.order_id=ri.order_id
and rp.sku_id=ri.sku_id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='11'
)dic
on rp.payment_type=dic.dic_code;
"
dwd_traffic_action_inc="
set hive.cbo.enable=false;
insert overwrite table ${APP}.dwd_traffic_action_inc partition(dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
during_time,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
action_id,
action_item,
action_item_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') action_time
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
action.action_id,
action.item action_item,
action.item_type action_item_type,
action.ts
from ${APP}.ods_log_inc lateral view explode(actions) tmp as action
where dt='$do_date'
and actions is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
dwd_traffic_display_inc="
set hive.cbo.enable=false;
insert overwrite table ${APP}.dwd_traffic_display_inc partition(dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
during_time,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') display_time,
display_type,
display_item,
display_item_type,
display_order,
display_pos_id
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
display.display_type,
display.item display_item,
display.item_type display_item_type,
display.\`order\` display_order,
display.pos_id display_pos_id,
ts
from ${APP}.ods_log_inc lateral view explode(displays) tmp as display
where dt='$do_date'
and displays is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
dwd_traffic_error_inc="
set hive.cbo.enable=false;
set hive.execution.engine=mr;
insert overwrite table ${APP}.dwd_traffic_error_inc partition(dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
entry,
loading_time,
open_ad_id,
open_ad_ms,
open_ad_skip_ms,
actions,
displays,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') error_time,
error_code,
error_msg
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
\`start\`.entry,
\`start\`.loading_time,
\`start\`.open_ad_id,
\`start\`.open_ad_ms,
\`start\`.open_ad_skip_ms,
actions,
displays,
err.error_code,
err.msg error_msg,
ts
from ${APP}.ods_log_inc
where dt='$do_date'
and err is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
set hive.execution.engine=spark;
"
dwd_traffic_page_view_inc="
set hive.cbo.enable=false;
insert overwrite table ${APP}.dwd_traffic_page_view_inc partition (dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') view_time,
concat(mid_id,'-',last_value(session_start_point,true) over (partition by mid_id order by ts)) session_id,
during_time
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
ts,
if(page.last_page_id is null,ts,null) session_start_point
from ${APP}.ods_log_inc
where dt='$do_date'
and page is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
dwd_traffic_start_inc="
set hive.cbo.enable=false;
insert overwrite table ${APP}.dwd_traffic_start_inc partition(dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
entry,
open_ad_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') action_time,
loading_time,
open_ad_ms,
open_ad_skip_ms
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
\`start\`.entry,
\`start\`.loading_time,
\`start\`.open_ad_id,
\`start\`.open_ad_ms,
\`start\`.open_ad_skip_ms,
ts
from ${APP}.ods_log_inc
where dt='$do_date'
and \`start\` is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
dwd_user_login_inc="
insert overwrite table ${APP}.dwd_user_login_inc partition(dt='$do_date')
select
user_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') login_time,
channel,
province_id,
version_code,
mid_id,
brand,
model,
operate_system
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts,
row_number() over (partition by session_id order by ts) rn
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts,
concat(mid_id,'-',last_value(session_start_point,true) over(partition by mid_id order by ts)) session_id
from
(
select
common.uid user_id,
common.ch channel,
common.ar area_code,
common.vc version_code,
common.mid mid_id,
common.ba brand,
common.md model,
common.os operate_system,
ts,
if(page.last_page_id is null,ts,null) session_start_point
from ${APP}.ods_log_inc
where dt='$do_date'
and page is not null
)t1
)t2
where user_id is not null
)t3
where rn=1
)t4
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on t4.area_code=bp.area_code;
"
dwd_user_register_inc="
insert overwrite table ${APP}.dwd_user_register_inc partition(dt)
select
ui.user_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
channel,
province_id,
version_code,
mid_id,
brand,
model,
operate_system,
date_format(create_time,'yyyy-MM-dd')
from
(
select
data.id user_id,
data.create_time
from ${APP}.ods_user_info_inc
where dt='$do_date'
and type='bootstrap-insert'
)ui
left join
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code
from ${APP}.ods_log_inc
where dt='$do_date'
and page.page_id='register'
and common.uid is not null
)log
on ui.user_id=log.user_id
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
case $1 in
"dwd_interaction_comment_inc" )
hive -e "$dwd_interaction_comment_inc"
;;
"dwd_interaction_favor_add_inc" )
hive -e "$dwd_interaction_favor_add_inc"
;;
"dwd_tool_coupon_get_inc" )
hive -e "$dwd_tool_coupon_get_inc"
;;
"dwd_tool_coupon_order_inc" )
hive -e "$dwd_tool_coupon_order_inc"
;;
"dwd_tool_coupon_pay_inc" )
hive -e "$dwd_tool_coupon_pay_inc"
;;
"dwd_trade_cancel_detail_inc" )
hive -e "$dwd_trade_cancel_detail_inc"
;;
"dwd_trade_cart_add_inc" )
hive -e "$dwd_trade_cart_add_inc"
;;
"dwd_trade_cart_full" )
hive -e "$dwd_trade_cart_full"
;;
"dwd_trade_order_detail_inc" )
hive -e "$dwd_trade_order_detail_inc"
;;
"dwd_trade_order_refund_inc" )
hive -e "$dwd_trade_order_refund_inc"
;;
"dwd_trade_pay_detail_suc_inc" )
hive -e "$dwd_trade_pay_detail_suc_inc"
;;
"dwd_trade_refund_pay_suc_inc" )
hive -e "$dwd_trade_refund_pay_suc_inc"
;;
"dwd_traffic_action_inc" )
hive -e "$dwd_traffic_action_inc"
;;
"dwd_traffic_display_inc" )
hive -e "$dwd_traffic_display_inc"
;;
"dwd_traffic_error_inc" )
hive -e "$dwd_traffic_error_inc"
;;
"dwd_traffic_page_view_inc" )
hive -e "$dwd_traffic_page_view_inc"
;;
"dwd_traffic_start_inc" )
hive -e "$dwd_traffic_start_inc"
;;
"dwd_user_login_inc" )
hive -e "$dwd_user_login_inc"
;;
"dwd_user_register_inc" )
hive -e "$dwd_user_register_inc"
;;
"all" )
hive -e "$dwd_interaction_comment_inc$dwd_interaction_favor_add_inc$dwd_tool_coupon_get_inc$dwd_tool_coupon_order_inc$dwd_tool_coupon_pay_inc$dwd_trade_cancel_detail_inc$dwd_trade_cart_add_inc$dwd_trade_cart_full$dwd_trade_order_detail_inc$dwd_trade_order_refund_inc$dwd_trade_pay_detail_suc_inc$dwd_trade_refund_pay_suc_inc$dwd_traffic_action_inc$dwd_traffic_display_inc$dwd_traffic_error_inc$dwd_traffic_page_view_inc$dwd_traffic_start_inc$dwd_user_login_inc$dwd_user_register_inc"
esac
七、每日装载脚本
#!/bin/bash
APP=gmall
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$2" ] ;then
do_date=$2
else
do_date=`date -d "-1 day" +%F`
fi
dwd_interaction_comment_inc="
insert overwrite table ${APP}.dwd_interaction_comment_inc partition(dt='$do_date')
select
id,
user_id,
sku_id,
order_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
appraise,
dic_name
from
(
select
data.id,
data.user_id,
data.sku_id,
data.order_id,
data.create_time,
data.appraise
from ${APP}.ods_comment_info_inc
where dt='$do_date'
and type='insert'
)ci
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='12'
)dic
on ci.appraise=dic.dic_code;
"
dwd_interaction_favor_add_inc="
insert overwrite table ${APP}.dwd_interaction_favor_add_inc partition(dt='$do_date')
select
data.id,
data.user_id,
data.sku_id,
date_format(data.create_time,'yyyy-MM-dd') date_id,
data.create_time
from ${APP}.ods_favor_info_inc
where dt='$do_date'
and type = 'insert';
"
dwd_tool_coupon_get_inc="
insert overwrite table ${APP}.dwd_tool_coupon_get_inc partition (dt='$do_date')
select
data.id,
data.coupon_id,
data.user_id,
date_format(data.get_time,'yyyy-MM-dd') date_id,
data.get_time
from ${APP}.ods_coupon_use_inc
where dt='$do_date'
and type='insert';
"
dwd_tool_coupon_order_inc="
insert overwrite table ${APP}.dwd_tool_coupon_order_inc partition(dt='$do_date')
select
data.id,
data.coupon_id,
data.user_id,
data.order_id,
date_format(data.using_time,'yyyy-MM-dd') date_id,
data.using_time
from ${APP}.ods_coupon_use_inc
where dt='$do_date'
and type='update'
and array_contains(map_keys(old),'using_time');
"
dwd_tool_coupon_pay_inc="
insert overwrite table ${APP}.dwd_tool_coupon_pay_inc partition(dt='$do_date')
select
data.id,
data.coupon_id,
data.user_id,
data.order_id,
date_format(data.used_time,'yyyy-MM-dd') date_id,
data.used_time
from ${APP}.ods_coupon_use_inc
where dt='$do_date'
and type='update'
and array_contains(map_keys(old),'used_time');
"
dwd_trade_cancel_detail_inc="
insert overwrite table ${APP}.dwd_trade_cancel_detail_inc partition (dt='$do_date')
select
od.id,
order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
date_format(canel_time,'yyyy-MM-dd') date_id,
canel_time,
source_id,
source_type,
dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount
from
(
select
data.id,
data.order_id,
data.sku_id,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ${APP}.ods_order_detail_inc
where (dt='$do_date' or dt=date_add('$do_date',-1))
and (type = 'insert' or type= 'bootstrap-insert')
) od
join
(
select
data.id,
data.user_id,
data.province_id,
data.operate_time canel_time
from ${APP}.ods_order_info_inc
where dt = '$do_date'
and type = 'update'
and data.order_status='1003'
and array_contains(map_keys(old),'order_status')
) oi
on order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ${APP}.ods_order_detail_activity_inc
where (dt='$do_date' or dt=date_add('$do_date',-1))
and (type = 'insert' or type= 'bootstrap-insert')
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ${APP}.ods_order_detail_coupon_inc
where (dt='$do_date' or dt=date_add('$do_date',-1))
and (type = 'insert' or type= 'bootstrap-insert')
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='24'
)dic
on od.source_type=dic.dic_code;
"
dwd_trade_cart_add_inc="
insert overwrite table ${APP}.dwd_trade_cart_add_inc partition(dt='$do_date')
select
id,
user_id,
sku_id,
date_id,
create_time,
source_id,
source_type_code,
source_type_name,
sku_num
from
(
select
data.id,
data.user_id,
data.sku_id,
date_format(from_utc_timestamp(ts*1000,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts*1000,'GMT+8'),'yyyy-MM-dd HH:mm:ss') create_time,
data.source_id,
data.source_type source_type_code,
if(type='insert',data.sku_num,data.sku_num-old['sku_num']) sku_num
from ${APP}.ods_cart_info_inc
where dt='$do_date'
and (type='insert'
or(type='update' and old['sku_num'] is not null and data.sku_num>cast(old['sku_num'] as int)))
)cart
left join
(
select
dic_code,
dic_name source_type_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='24'
)dic
on cart.source_type_code=dic.dic_code;
"
dwd_trade_cart_full="
insert overwrite table ${APP}.dwd_trade_cart_full partition(dt='$do_date')
select
id,
user_id,
sku_id,
sku_name,
sku_num
from ${APP}.ods_cart_info_full
where dt='$do_date'
and is_ordered='0';
"
dwd_trade_order_detail_inc="
insert overwrite table ${APP}.dwd_trade_order_detail_inc partition (dt='$do_date')
select
od.id,
order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
date_id,
create_time,
source_id,
source_type,
dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount
from
(
select
data.id,
data.order_id,
data.sku_id,
date_format(data.create_time, 'yyyy-MM-dd') date_id,
data.create_time,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ${APP}.ods_order_detail_inc
where dt = '$do_date'
and type = 'insert'
) od
left join
(
select
data.id,
data.user_id,
data.province_id
from ${APP}.ods_order_info_inc
where dt = '$do_date'
and type = 'insert'
) oi
on od.order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ${APP}.ods_order_detail_activity_inc
where dt = '$do_date'
and type = 'insert'
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ${APP}.ods_order_detail_coupon_inc
where dt = '$do_date'
and type = 'insert'
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='24'
)dic
on od.source_type=dic.dic_code;
"
dwd_trade_order_refund_inc="
insert overwrite table ${APP}.dwd_trade_order_refund_inc partition(dt='$do_date')
select
ri.id,
user_id,
order_id,
sku_id,
province_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
refund_type,
type_dic.dic_name,
refund_reason_type,
reason_dic.dic_name,
refund_reason_txt,
refund_num,
refund_amount
from
(
select
data.id,
data.user_id,
data.order_id,
data.sku_id,
data.refund_type,
data.refund_num,
data.refund_amount,
data.refund_reason_type,
data.refund_reason_txt,
data.create_time
from ${APP}.ods_order_refund_info_inc
where dt='$do_date'
and type='insert'
)ri
left join
(
select
data.id,
data.province_id
from ${APP}.ods_order_info_inc
where dt='$do_date'
and type='update'
and data.order_status='1005'
and array_contains(map_keys(old),'order_status')
)oi
on ri.order_id=oi.id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code = '15'
)type_dic
on ri.refund_type=type_dic.dic_code
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code = '13'
)reason_dic
on ri.refund_reason_type=reason_dic.dic_code;
"
dwd_trade_pay_detail_suc_inc="
insert overwrite table ${APP}.dwd_trade_pay_detail_suc_inc partition (dt='$do_date')
select
od.id,
od.order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
payment_type,
pay_dic.dic_name,
date_format(callback_time,'yyyy-MM-dd') date_id,
callback_time,
source_id,
source_type,
src_dic.dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount
from
(
select
data.id,
data.order_id,
data.sku_id,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ${APP}.ods_order_detail_inc
where (dt = '$do_date' or dt = date_add('$do_date',-1))
and (type = 'insert' or type = 'bootstrap-insert')
) od
join
(
select
data.user_id,
data.order_id,
data.payment_type,
data.callback_time
from ${APP}.ods_payment_info_inc
where dt='$do_date'
and type='update'
and array_contains(map_keys(old),'payment_status')
and data.payment_status='1602'
) pi
on od.order_id=pi.order_id
left join
(
select
data.id,
data.province_id
from ${APP}.ods_order_info_inc
where (dt = '$do_date' or dt = date_add('$do_date',-1))
and (type = 'insert' or type = 'bootstrap-insert')
) oi
on od.order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ${APP}.ods_order_detail_activity_inc
where (dt = '$do_date' or dt = date_add('$do_date',-1))
and (type = 'insert' or type = 'bootstrap-insert')
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ${APP}.ods_order_detail_coupon_inc
where (dt = '$do_date' or dt = date_add('$do_date',-1))
and (type = 'insert' or type = 'bootstrap-insert')
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='11'
) pay_dic
on pi.payment_type=pay_dic.dic_code
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='24'
)src_dic
on od.source_type=src_dic.dic_code;
"
dwd_trade_refund_pay_suc_inc="
insert overwrite table ${APP}.dwd_trade_refund_pay_suc_inc partition(dt='$do_date')
select
rp.id,
user_id,
rp.order_id,
rp.sku_id,
province_id,
payment_type,
dic_name,
date_format(callback_time,'yyyy-MM-dd') date_id,
callback_time,
refund_num,
total_amount
from
(
select
data.id,
data.order_id,
data.sku_id,
data.payment_type,
data.callback_time,
data.total_amount
from ${APP}.ods_refund_payment_inc
where dt='$do_date'
and type = 'update'
and array_contains(map_keys(old),'refund_status')
and data.refund_status='1602'
)rp
left join
(
select
data.id,
data.user_id,
data.province_id
from ${APP}.ods_order_info_inc
where dt='$do_date'
and type='update'
and data.order_status='1006'
and array_contains(map_keys(old),'order_status')
)oi
on rp.order_id=oi.id
left join
(
select
data.order_id,
data.sku_id,
data.refund_num
from ${APP}.ods_order_refund_info_inc
where dt='$do_date'
and type='update'
and data.refund_status='0705'
and array_contains(map_keys(old),'refund_status')
)ri
on rp.order_id=ri.order_id
and rp.sku_id=ri.sku_id
left join
(
select
dic_code,
dic_name
from ${APP}.ods_base_dic_full
where dt='$do_date'
and parent_code='11'
)dic
on rp.payment_type=dic.dic_code;
"
dwd_traffic_action_inc="
set hive.cbo.enable=false;
insert overwrite table ${APP}.dwd_traffic_action_inc partition(dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
during_time,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
action_id,
action_item,
action_item_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') action_time
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
action.action_id,
action.item action_item,
action.item_type action_item_type,
action.ts
from ${APP}.ods_log_inc lateral view explode(actions) tmp as action
where dt='$do_date'
and actions is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
dwd_traffic_display_inc="
set hive.cbo.enable=false;
insert overwrite table ${APP}.dwd_traffic_display_inc partition(dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
during_time,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') display_time,
display_type,
display_item,
display_item_type,
display_order,
display_pos_id
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
display.display_type,
display.item display_item,
display.item_type display_item_type,
display.\`order\` display_order,
display.pos_id display_pos_id,
ts
from ${APP}.ods_log_inc lateral view explode(displays) tmp as display
where dt='$do_date'
and displays is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
dwd_traffic_error_inc="
set hive.cbo.enable=false;
set hive.execution.engine=mr;
insert overwrite table ${APP}.dwd_traffic_error_inc partition(dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
entry,
loading_time,
open_ad_id,
open_ad_ms,
open_ad_skip_ms,
actions,
displays,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') error_time,
error_code,
error_msg
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
\`start\`.entry,
\`start\`.loading_time,
\`start\`.open_ad_id,
\`start\`.open_ad_ms,
\`start\`.open_ad_skip_ms,
actions,
displays,
err.error_code,
err.msg error_msg,
ts
from ${APP}.ods_log_inc
where dt='$do_date'
and err is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
set hive.execution.engine=spark;
"
dwd_traffic_page_view_inc="
set hive.cbo.enable=false;
insert overwrite table ${APP}.dwd_traffic_page_view_inc partition (dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') view_time,
concat(mid_id,'-',last_value(session_start_point,true) over (partition by mid_id order by ts)) session_id,
during_time
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
ts,
if(page.last_page_id is null,ts,null) session_start_point
from ${APP}.ods_log_inc
where dt='$do_date'
and page is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
dwd_traffic_start_inc="
set hive.cbo.enable=false;
insert overwrite table ${APP}.dwd_traffic_start_inc partition(dt='$do_date')
select
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
entry,
open_ad_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') action_time,
loading_time,
open_ad_ms,
open_ad_skip_ms
from
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
\`start\`.entry,
\`start\`.loading_time,
\`start\`.open_ad_id,
\`start\`.open_ad_ms,
\`start\`.open_ad_skip_ms,
ts
from ${APP}.ods_log_inc
where dt='$do_date'
and \`start\` is not null
)log
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
dwd_user_login_inc="
insert overwrite table ${APP}.dwd_user_login_inc partition(dt='$do_date')
select
user_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') login_time,
channel,
province_id,
version_code,
mid_id,
brand,
model,
operate_system
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts,
row_number() over (partition by session_id order by ts) rn
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts,
concat(mid_id,'-',last_value(session_start_point,true) over(partition by mid_id order by ts)) session_id
from
(
select
common.uid user_id,
common.ch channel,
common.ar area_code,
common.vc version_code,
common.mid mid_id,
common.ba brand,
common.md model,
common.os operate_system,
ts,
if(page.last_page_id is null,ts,null) session_start_point
from ${APP}.ods_log_inc
where dt='$do_date'
and page is not null
)t1
)t2
where user_id is not null
)t3
where rn=1
)t4
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on t4.area_code=bp.area_code;
"
dwd_user_register_inc="
insert overwrite table ${APP}.dwd_user_register_inc partition(dt='$do_date')
select
ui.user_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
channel,
province_id,
version_code,
mid_id,
brand,
model,
operate_system
from
(
select
data.id user_id,
data.create_time
from ${APP}.ods_user_info_inc
where dt='$do_date'
and type='insert'
)ui
left join
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code
from ${APP}.ods_log_inc
where dt='$do_date'
and page.page_id='register'
and common.uid is not null
)log
on ui.user_id=log.user_id
left join
(
select
id province_id,
area_code
from ${APP}.ods_base_province_full
where dt='$do_date'
)bp
on log.area_code=bp.area_code;
"
case $1 in
"dwd_interaction_comment_inc" )
hive -e "$dwd_interaction_comment_inc"
;;
"dwd_interaction_favor_add_inc" )
hive -e "$dwd_interaction_favor_add_inc"
;;
"dwd_tool_coupon_get_inc" )
hive -e "$dwd_tool_coupon_get_inc"
;;
"dwd_tool_coupon_order_inc" )
hive -e "$dwd_tool_coupon_order_inc"
;;
"dwd_tool_coupon_pay_inc" )
hive -e "$dwd_tool_coupon_pay_inc"
;;
"dwd_trade_cancel_detail_inc" )
hive -e "$dwd_trade_cancel_detail_inc"
;;
"dwd_trade_cart_add_inc" )
hive -e "$dwd_trade_cart_add_inc"
;;
"dwd_trade_cart_full" )
hive -e "$dwd_trade_cart_full"
;;
"dwd_trade_order_detail_inc" )
hive -e "$dwd_trade_order_detail_inc"
;;
"dwd_trade_order_refund_inc" )
hive -e "$dwd_trade_order_refund_inc"
;;
"dwd_trade_pay_detail_suc_inc" )
hive -e "$dwd_trade_pay_detail_suc_inc"
;;
"dwd_trade_refund_pay_suc_inc" )
hive -e "$dwd_trade_refund_pay_suc_inc"
;;
"dwd_traffic_action_inc" )
hive -e "$dwd_traffic_action_inc"
;;
"dwd_traffic_display_inc" )
hive -e "$dwd_traffic_display_inc"
;;
"dwd_traffic_error_inc" )
hive -e "$dwd_traffic_error_inc"
;;
"dwd_traffic_page_view_inc" )
hive -e "$dwd_traffic_page_view_inc"
;;
"dwd_traffic_start_inc" )
hive -e "$dwd_traffic_start_inc"
;;
"dwd_user_login_inc" )
hive -e "$dwd_user_login_inc"
;;
"dwd_user_register_inc" )
hive -e "$dwd_user_register_inc"
;;
"all" )
hive -e "$dwd_interaction_comment_inc$dwd_interaction_favor_add_inc$dwd_tool_coupon_get_inc$dwd_tool_coupon_order_inc$dwd_tool_coupon_pay_inc$dwd_trade_cancel_detail_inc$dwd_trade_cart_add_inc$dwd_trade_cart_full$dwd_trade_order_detail_inc$dwd_trade_order_refund_inc$dwd_trade_pay_detail_suc_inc$dwd_trade_refund_pay_suc_inc$dwd_traffic_action_inc$dwd_traffic_display_inc$dwd_traffic_error_inc$dwd_traffic_page_view_inc$dwd_traffic_start_inc$dwd_user_login_inc$dwd_user_register_inc"
esac