拉链表
- 创建外部表
- 将编写的orders.txt上传到hdfs
- 创建一个增减分区表
- 将orders表的数据传入ods_orders_inc
- 查看分区
- 创建历史表
- 插入数据
- 操作
创建外部表
create database lalian;
use lalian;
create external table orders(
orderId int,
createDate string,
modifiedTime string,
status string
)
row format delimited fields terminated by '\t'
location '/tmp/lalian/orders';
将编写的orders.txt上传到hdfs
[root@cp145 ~]# vim ./orders.txt
1 2023-03-03 2023-03-03 创建
1 2023-03-03 2023-03-03 创建
1 2023-03-03 2023-03-03 创建
[root@cp145 ~]# hdfs dfs -mkdir -p /tmp/lalian/orders/
[root@cp145 ~]# hdfs dfs -put ./orders.txt /tmp/lalian/orders/
用select可以查到数据
创建一个增减分区表
create table ods_orders_inc(
orderId int,
createDate string,
modifiedTime string,
status string
) partitioned by (day string) --通过day分区
row format delimited fields terminated by '\t';
将orders表的数据传入ods_orders_inc
insert overwrite table ods_orders_inc partition (day = '2023-03-03')
select orderId,createDate,modifiedTime,status from orders;
select * from ods_orders_inc;
查看分区
show partitions ods_orders_inc;
创建历史表
create table dws_orders_his(
orderId int,
createDate string,
modifiedTime string,
status string,
start_time string,
end_time string
) row format delimited fields terminated by '\t';
插入数据
insert overwrite table dws_orders_his
select orderId,
createDate,
modifiedTime,
status,
modifiedTime,-- start_time
'9999-12-31' -- end_time
from ods_orders_inc where day = '2023-03-03';
查看数据
select * from dws_orders_his;
操作
将hdfs中的orders.txt删除,上传增加了字段的新orders.txt
覆写分区表
insert overwrite table ods_orders_inc partition (day='2023-03-04')
select orderId, createDate, modifiedTime, status from orders
where modifiedTime = '2023-03-04';
查看分区,出现03-04分区
show partitions ods_orders_inc;
查看 day=‘2023-03-04’ 分区
select * from ods_orders_inc where day='2023-03-04';
将03-04分区的数据整合覆盖到历史表中
with
t1 as (select orderId, createDate, modifiedTime, status, modifiedTime start_time,'9999-12-31' end_time
from ods_orders_inc where day = '2023-03-04'),
t2 as ( select
t1.orderId,t1.createDate,t1.modifiedTime,t1.status,t1.start_time,
if(t2.orderId is not null and t1.end_time > '2023-03-04','2023-03-04',t1.end_time) end_time
from dws_orders_his t1 left join (select orderId,modifiedTime from ods_orders_inc where day = '2023-03-04') t2
on t1.orderId = t2.orderId ),
t3 as (select * from t1 union all select * from t2)
insert overwrite table dws_orders_his
select * from t3 order by t3.orderId,t3.modifiedTime;
--或者
insert overwrite table dws_orders_his
select tb.orderId,tb.createDate,tb.modifiedTime,tb.status,tb.start_time,tb.end_time
from (
(select orderId, createDate, modifiedTime, status,modifiedTime as start_time,'9999-12-31' as end_time
from ods_orders_inc where day='2023-03-04')
union all
(select t1.orderId,
t1.createDate,
t1.modifiedTime,
t1.status,
t1.start_time,
case when t2.orderId is not null and t1.end_time > '2023-03-04'
then '2023-03-04' else t1.end_time end end_time
from dws_orders_his t1 left join (select orderId from ods_orders_inc where day='2023-03-04') t2
on t1.orderId = t2.orderId)
) tb order by tb.orderId,tb.start_time;