有用户点击日志记录表 t2_click_log,包含user_id(用户ID),click_time(点击时间),请查询出连续点击三次的用户数,
连续点击三次:指点击记录中同一用户连续点击,中间无其他用户点击;
CREATE TABLE t2_click_log (
user_id BIGINT,
click_time BIGINT
);
insert into t2_click_log (user_id,click_time)
values
(1,1736337600),
(2,1736337670),
(1,1736337710),
(1,1736337715),
(1,1736337750),
(2,1736337760),
(3,1736337820),
(3,1736337840),
(3,1736337850),
(3,1736337910),
(4,1736337915)
1.增加一列is_same_user,判断是否与上一行是同一用户点击,是取0,否取1,第一行默认为0;
select user_id,click_time,
case
when lag(user_id)over(order by click_time) = user_id then 0
when lag(user_id)over(order by click_time) is null then 0
else 1 end as flag
from t2_click_log
2.为了判断是否是同一用户分组而进行累积求和
累加求和值不变说明是同一用户
select user_id,click_time,sum(flag) over(order by click_time) n
from (
select user_id,click_time,
case
when lag(user_id)over(order by click_time) = user_id then 0
when lag(user_id)over(order by click_time) is null then 0
else 1 end as flag
from t2_click_log
) temp
3.查询相同n值个数>3的用户
select user_id
from(
select user_id,click_time,sum(flag) over(order by click_time) n
from (
select user_id,click_time,
case
when lag(user_id)over(order by click_time) = user_id then 0
when lag(user_id)over(order by click_time) is null then 0
else 1 end as flag
from t2_click_log
) temp
)temp2
group by user_id,n
having count(1)>=3
4.查询最终用户数量
select count(1)
from (
select user_id
from(
select user_id,click_time,sum(flag) over(order by click_time) n
from (
select user_id,click_time,
case
when lag(user_id)over(order by click_time) = user_id then 0
when lag(user_id)over(order by click_time) is null then 0
else 1 end as flag
from t2_click_log
) temp
)temp2
group by user_id,n
having count(1)>=3) tt
方法2:
双重排序差值法
1.分别按照时间,按照不分组和按照用户分组进行排序;
select user_id,
click_time,
row_number() over (order by click_time asc) as row_num1,
row_number() over (partition by user_id order by click_time asc) as row_num2
from t2_click_log
2.计算差值并按照用户和差值进行分组
2.1计算差值
diff=row_num1-row_num2
select user_id,
row_number() over(order by click_time)-row_number() over(partition by user_id order by click_time) diff
from t2_click_log
同一个用户,同样的差值,表示连续登录,
2.2分组统计连续登录次数
select user_id,
diff,
count(1) as aa
from (select user_id,
click_time,
row_number() over (order by click_time asc) -
row_number() over (partition by user_id order by click_time asc) as diff
from t2_click_log) t
group by user_id, diff
3.查询分组行数>=3的用户差值分组
select user_id,count(1) times
from(
select user_id,
row_number() over(order by click_time)-row_number() over(partition by user_id order by click_time) diff
from t2_click_log) temp
group by user_id,diff
having times>=3
4.计算用户数
记得给用户去重哦!
select count(distinct user_id) count
from (
select user_id,count(1) times
from(
select user_id,
row_number() over(order by click_time)-row_number() over(partition by user_id order by click_time) diff
from t2_click_log) temp
group by user_id,diff
having times>=3)
temp1