本文基于前段时间学习总结的 MySQL 相关的查询语法,在牛客网找了相应的 MySQL 题目进行练习,以便加强对于 MySQL 查询语法的理解和应用。
由于涉及到的数据库表较多,因此本文不再展示,只提供 MySQL 代码与示例输出。
以下内容是牛客题霸-SQL大厂面试真题(抖音短视频 1-6 题、百度信息流 1-5 题)的 MySQL 代码答案。
SQL 156:查询 2021 年里有播放记录的每个视频的完播率(结果保留三位小数),并按照完播率降序排列
select b.video_id,
round(avg(if(timestampdiff(second, start_time, end_time) >= duration, 1, 0)), 3) as avg_comp_play_rate
from tb_user_video_log a
left join tb_video_info b
on a.video_id = b.video_id
where year(start_time) = 2021
group by 1
order by 2 desc
SQL 157:查询各类视频的平均播放进度,将进度大于 60% 的类别输出(结果保留两位小数),并按照播放进度降序排列
select tag,
concat(round(avg(if(timestampdiff(second, start_time, end_time) >= duration, 1,
timestampdiff(second, start_time, end_time)/duration))*100, 2), '%') as avg_play_progress
from tb_user_video_log a
join tb_video_info b
on a.video_id = b.video_id
group by 1
having avg(if(timestampdiff(second, start_time, end_time) >= duration, 1,
timestampdiff(second, start_time, end_time)/duration)) > 0.6
order by 2 desc
SQL 158:查询在有用户互动的最近一个月(按包含当天在内的近30天算,比如10月31日的近30天为10.2~10.31之间的数据)中,每类视频的转发量和转发率(保留3位小数),并按照转发率降序排列
select tag,
sum(if_retweet) as retweet_cut,
round(sum(if_retweet)/count(a.video_id), 3) as retweet_rate
from tb_user_video_log a
join tb_video_info b
on a.video_id = b.video_id
where datediff((select max(start_time) from tb_user_video_log), start_time) <= 29
group by 1
order by 3 desc
SQL 159:查询 2021 年里每个创作者每月的涨粉率及截止当月的总粉丝量,并按照创作者 ID,总粉丝量升序排列
select author, left(start_time, 7) as month,
round(sum(follow_fans_change)/count(1), 3) as fans_growth_rate,
sum(sum(follow_fans_change)) over(partition by author order by left(start_time, 7)) as total_fans
from(
select b.video_id, author, start_time,
case when if_follow = 1 then 1
when if_follow = 2 then -1
when if_follow = 0 then 0
else -1000 end as follow_fans_change
from tb_user_video_log a
join tb_video_info b
on a.video_id = b.video_id
) c
where year(start_time) = 2021
group by 1, 2
order by 1, 4
SQL 160:查询 2021 年国庆头 3 天每类视频每天的近一周总点赞量和一周内最大单天转发量,并按照视频类别降序排列,日期升序排列
with a as(
select tag, left(start_time, 10) as dt,
sum(if_like) as like_cnt,
sum(if_retweet) as retweet_cnt
from tb_user_video_log t1
left join tb_video_info t2
on t1.video_id = t2.video_id
group by 1, 2
),
b as(
select tag, dt,
sum(like_cnt) over(partition by tag rows between 6 preceding and current row) as sum_like_cnt_7d,
max(retweet_cnt) over(partition by tag rows between 6 preceding and current row) as max_retweet_cnt_7d
from a
)
select tag, dt, sum_like_cnt_7d, max_retweet_cnt_7d
from b
where dt in('2021-10-01', '2021-10-02', '2021-10-03')
order by 1 desc, 2
SQL 161:查询近一个月发布的视频中热度最高的 top3 视频
select video_id,
round((avg(if_complete) * 100 + sum(if_like) * 5 + sum(if_comment) * 3 + sum(if_retweet) * 2) * (1 / (1 + min(diff_time)))) as hot_index
from(
select
a.video_id as video_id,
if(timestampdiff(second, start_time, end_time) >= duration, 1, 0) as if_complete,
if_like,
if_retweet,
if(comment_id is null, 0, 1) as if_comment,
datediff((select max(end_time) from tb_user_video_log), end_time) as diff_time
from tb_user_video_log a
left join tb_video_info b
on a.video_id = b.video_id
where datediff((select max(end_time) from tb_user_video_log), release_time) <= 29
) c
group by 1
order by 2 desc
limit 3
SQL 162:查询 2021 年 11 月每天的人均浏览文章时长(秒数),结果保留 1 位小数,并按时长由短到长升序排列
select left(in_time, 10) as dt,
round(sum(timestampdiff(second, in_time, out_time)) / count(distinct uid), 1) as avg_viiew_len_sec
from tb_user_log
where left(in_time, 7) = '2021-11' and artical_id <> 0
group by 1
order by 2
SQL 163:查询每篇文章同一时刻最大在看人数,如果同一时刻有进入也有离开时,先记录用户数增加再记录减少,结果按最大人数降序排列
with a as(
select uid, artical_id, in_time as dt, 1 as is_in
from tb_user_log
union all
select uid, artical_id, out_time as dt, -1 as is_in
from tb_user_log
)
select artical_id, max(uv)
from(
select artical_id, dt,
sum(is_in) over(partition by artical_id order by dt, is_in desc) as uv
from a
where artical_id <> 0
) b
group by 1
order by 2 desc
SQL 164:统计2021年11月每天新用户的次日留存率(保留2位小数)
with reg as(
select uid, min(left(in_time, 10)) as reg_date
from tb_user_log
group by 1
), -- 用户注册表
log as(
select uid, date(in_time) as log_date
from tb_user_log
union
select uid, date(out_time) as log_date
from tb_user_log
) -- 用户登陆表
select
reg_date as dt,
round(ifnull(count(l.uid)/count(r.uid), 0), 2) as uv_left_rate
from reg r
left join log l
on r.uid = l.uid
and r.reg_date = date_sub(l.log_date, interval 1 day)
where left(reg_date, 7) = '2021-11'
group by 1
order by 1
SQL 165:统计活跃间隔对用户分级后,各活跃等级用户占比,结果保留两位小数,且按占比降序排列
with a as(
select
uid,
date(min(in_time)) as first_date, -- 用户注册日期
date(max(in_time)) as last_date, -- 用户最近活跃日期
(select date(max(in_time)) from tb_user_log) as today
from tb_user_log
group by 1
)
select
user_grade,
round(count(distinct uid) / (select count(distinct uid) from tb_user_log), 2) as ratio
from
(
select
uid,
case when datediff(today, first_date) <= 6 then '新晋用户'
when datediff(today, first_date) > 6 and datediff(today, last_date) <= 6 then '忠实用户'
when datediff(today, first_date) > 6 and datediff(today, last_date) > 29 then '流失用户'
when datediff(today, first_date) > 6 and datediff(today, last_date) > 6 then '沉睡用户'
else '其他' end as user_grade
from a
) b
group by 1
SQL 166:统计每天的日活数及新用户占比
with reg as(
select
uid,
date(min(in_time)) as reg_date
from tb_user_log
group by 1
), -- 用户注册表
log as(
select
uid,
date(in_time) as login_date
from tb_user_log
union
select
uid,
date(out_time) as login_date
from tb_user_log
) -- 用户登陆表
select
login_date,
count(distinct l.uid) as dau,
round(count(distinct r.uid) / count(distinct l.uid), 2) as uv_new_ratio
from log l
left join reg r
on l.uid = r.uid
and l.login_date = r.reg_date
group by 1
order by 1