目录
一、连续7天登录用户
1.数据准备ulogin.dat
2.建表与加载数据
3. 使用 row_number 在组内给数据编号(rownum)
4.某个值 - rownum = gid,得到结果可以作为后面分组计算的依据
5.根据求得的gid,作为分组条件,求最终结果
二、求TopN
1.需求
2.数据准备
3.建表与加载数据
4、上排名函数,分数一样并列,所以用dense_rank
5、将上一行数据下移,相减即得到分数差
6、处理 NULL
一、连续7天登录用户
1.数据准备ulogin.dat
1 2019-07-11 1
1 2019-07-12 1
1 2019-07-13 1
1 2019-07-14 1
1 2019-07-15 1
1 2019-07-16 1
1 2019-07-17 1
1 2019-07-18 1
2 2019-07-11 1
2 2019-07-12 1
2 2019-07-13 0
2 2019-07-14 1
2 2019-07-15 1
2 2019-07-16 0
2 2019-07-17 1
2 2019-07-18 0
3 2019-07-11 1
3 2019-07-12 1
3 2019-07-13 1
3 2019-07-14 0
3 2019-07-15 1
3 2019-07-16 1
3 2019-07-17 1
3 2019-07-18 1
2.建表与加载数据
create table ulogin
(
uid int,
dt date,
status int
) row format delimited fields terminated by " ";
load data local inpath "/opt/stufile/ulogin.dat" into table ulogin;
select * from ulogin;
3. 使用 row_number 在组内给数据编号(rownum)
select uid,
dt,
row_number() over (partition by uid order by dt) rownum
from ulogin
where status = 1;
4.某个值 - rownum = gid,得到结果可以作为后面分组计算的依据
select uid,
date_sub(dt, row_number() over (partition by uid order by dt)) gid
from ulogin
where status = 1;
5.根据求得的gid,作为分组条件,求最终结果
select tmp.uid,
count(*) contlogin
from (
select uid,
date_sub(dt, row_number() over (partition by uid order by dt)) gid
from ulogin
where status = 1) tmp
group by tmp.uid, tmp.gid
having contlogin >= 7;
二、求TopN
1.需求
编写 sql 语句实现每班前三名,分数一样并列,同时求出前三名按名次排序的分差。
2.数据准备
1 1901 90
2 1901 90
3 1901 83
4 1901 60
5 1902 66
6 1902 23
7 1902 99
8 1902 67
9 1902 87
3.建表与加载数据
create table stu
(
sno int,
class string,
score int
) row format delimited fields terminated by " ";
load data local inpath '/opt/stufile/stu.dat' into table stu;
4、上排名函数,分数一样并列,所以用dense_rank
select class,
score,
dense_rank() over (partition by class order by score desc ) rank
from stu;
5、将上一行数据下移,相减即得到分数差
select t.class,
t.score,
t.rank,
t.score - lag(t.score) over (partition by class order by score desc ) lagscore
from (
select class,
score,
dense_rank() over (partition by class order by score desc ) rank
from stu) t
where t.rank <= 3;
6、处理 NULL
select t.class,
t.score,
t.rank,
nvl(t.score - lag(t.score) over (partition by class order by score desc ), 0) lagscore
from (
select class,
score,
dense_rank() over (partition by class order by score desc ) rank
from stu) t
where t.rank <= 3;