sql专场练习（一）（最后五题 21-25）

news2025/2/22 20:09:16

第21题：找出恶意购买用户

create table sql1_21(
    order_id int,
    user_id string,
    order_status string,
    operate_time string
)
row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
with serdeproperties(
  'input.regex'='(\\d+)\\s+(.+?)\\s+(.+?)\\s+(.+?)'
);
load data local inpath '/home/homedata/sql_1/sql1_21.txt' into table sql1_21;

order_id    user_id    order_status     operate_time
1101         a         已支付        2023-01-01 10:00:00
1102         a         已取消        2023-01-01 10:10:00
1103         a         待支付        2023-01-01 10:20:00
1104         b         已取消        2023-01-01 10:30:00
1105         a         待确认        2023-01-01 10:50:00
1106         a         已取消        2023-01-01 11:00:00
1107         b         已取消        2023-01-01 11:40:00
1108         b         已取消        2023-01-01 11:50:00
1109         b         已支付        2023-01-01 12:00:00
1110         b         已取消        2023-01-01 12:11:00
1111         c         已取消        2023-01-01 12:20:00
1112         c         已取消        2023-01-01 12:30:00
1113         c         已取消        2023-01-01 12:55:00
1114         c         已取消        2023-01-01 13:00:00

恶意购买的用户定义是：同一个用户，在任意半小时内（含），取消订单次数>=3次的就被视为恶意买家。——这道题主要在于range 的使用，range可以用于固定窗口的大小，range是根据值的范围进行固定的，而rows是根据行数。

with t1 as (
    select order_id, user_id, unix_timestamp(operate_time) operate_time
    from sql1_21 where order_status = "已取消"
)
select user_id,
       count(*) over (partition by user_id order by operate_time range between 3600 preceding and current row )
from t1;

第22题：取每个保单的最大保单版本编号的证件号以及客户名称

create table sql1_22(
   x1  String, -- 保单编号
   x2  String, -- 客户名称
   x3  String, -- 证件号
   x4  Int --保单版本编号
)
row format delimited
fields terminated by ",";

insert into sql1_22 values('01','a','001',3),('02','b','002',2),('03','c','003',1), ('01','d','004',2),('02','e','005',1),('01','f','006',1);

直接group by即可

select x1,concat_ws(",",collect_list (x2)) x2 ,min(x3) x3 from sql1_22 group by x1;

第23题：获取班级前3名，以及他们的分差

create table sql1_23(
Stu_no int,
class string,
score int
)
row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
with serdeproperties(
  'input.regex'='(\\d+)\\s+(.+?)\\s+(\\d+)'
);
load data local inpath '/home/homedata/sql_1/sql1_23.txt' into table sql1_23;


Stu_no class score 
1        1901        90
2        1901        90
3        1901        83
4        1901        60
5        1902        66
6        1902        23
7        1902        99
8        1902        67
9        1902        87

实现每班前三名，分数一样不并列，同时求出前三名按名次排序的一次的分差

with t1 as (
    select *,row_number() over (partition by class order by score desc ) rowNumber from sql1_23
)
select *,
    `if`( rowNumber = 1,score,score - lag(score,1,score) over (partition by class order by rowNumber  ))  diff_score 
from t1 where rowNumber <=3;

第24题：间隔连续问题

create table sql1_24(
    id int,
    dt string
)
row format delimited
fields terminated by ' ';
load data local inpath '/home/homedata/sql_1/sql1_24.txt' into table sql1_24;


id dt
1001 2021-12-12
1002 2021-12-12
1001 2021-12-13
1001 2021-12-14
1001 2021-12-16
1002 2021-12-16
1001 2021-12-19
1002 2021-12-17
1001 2021-12-20

计算每个用户最大的连续登录天数，可以间隔一天。解释：如果一个用户在 1,3,5,6 登录游戏，则视为连续 6 天登录。

按照分组的思想，间隔两天以内就划分为同一组，然后获取每组中的first_value和last_value ,然后日期相减取最大值即可。

with t1 as (
    select *,datediff(dt,lag(dt,1,null) over (partition by id order by dt )) cz from sql1_24
),t2 as (
    select id,dt,`if`(cz = null or cz >2 ,1,0)  status from t1
),t3 as (
    select id,dt,sum(status) over (partition by id order by dt) groupId from t2
),t4 as (
    select id,
       first_value(dt) over (partition by id,groupId order by dt) sdt,
       last_value(dt) over (partition by id,groupId order by dt)  edt
       from t3
)
select id,max(datediff(edt,sdt)+1) days from t4 group by id;

第25题：行列转换

create table sql1_25(
  a string,
  b string,
  c int
)
row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
with serdeproperties(
  'input.regex'='(.+?)\\s+(.+?)\\s+(\\d+)',
    'output.format.string'='%1$s %2$s %3$s'
);
load data local inpath '/home/homedata/sql_1/sql1_25.txt' into table sql1_25;
select * from sql1_25;


2014    B       9
2015    A       8
2014    A       10
2015    B       7

第一题：行转列问题

将上面的数据转为下面这种格式

select a,
       sum(case b when "A" then c else 0 end) col_A,
       sum(case b when "B" then c else 0 end) col_B
       from sql1_25 group by a;

问题2：再转回去

create table sql1_25_2 as select a,
       sum(case b when 'A' then c else 0 end) col_A,
       sum(case b when 'B' then c else 0 end) col_B

from sql1_25 group by a ;

select a,'A' b ,col_A  c from sql1_25_2
union all
select a,'B' b ,col_B from sql1_25_2;

问题3：将上面的数据转为下面的这种

创建表格

create table sql1_25_3(
    year int,
    deptno string,
    score string
);
insert into sql1_25_3 values (2014,"B",9),
    (2015,"A",8),(2014,"A",10),(2015,"B",7),(2014,"B",6);

两种写法思路是一样的，只是步骤换了一下，实际上就是一种解法不同写法

写法一：

with t1 as (
    select year a,case deptno when "A" then score else null end col_A,
       case deptno when "B" then score else null end col_B
from sql1_25_3
)
select a,concat_ws(",", collect_list(col_A)) col_A,
         concat_ws(",", collect_list(col_B)) col_B from t1 group by  a;

写法二：

with t1 as
    ( select year a ,deptno,concat_ws(',',collect_list(score)) tep1 from sql1_25_3 group by year,deptno )
select a,
       concat_ws(',',collect_list(case  deptno when 'A' then tep1  end ))col_A ,
       concat_ws(',',collect_list(case deptno when 'B' then tep1    end ))col_B
       from t1  group by a ;

本文来自互联网用户投稿，该文观点仅代表作者本人，不代表本站立场。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如若转载，请注明出处：http://www.coloradmin.cn/o/2230697.html

如若内容造成侵权/违法违规/事实不符，请联系多彩编程网进行投诉反馈，一经查实，立即删除！