知乎问答最大连续回答问题天数大于等于3天的用户及其对应等级
1.描述
现有某乎问答创作者信息表author_tb如下(其中author_id表示创作者编号、author_level表示创作者级别,共1-6六个级别、sex表示创作者性别):
author_id | author_level | sex |
101 | 6 | m |
102 | 1 | f |
103 | 1 | m |
104 | 3 | m |
105 | 4 | f |
106 | 2 | f |
107 | 2 | m |
108 | 5 | f |
109 | 6 | f |
110 | 5 | m |
创作者回答情况表answer_tb如下(其中answer_date表示创作日期、author_id指创作者编号、issue_id指回答问题编号、char_len表示回答字数):
answer_date | author_id | issue_id | char_len |
2023-11-01 | 101 | E001 | 150 |
2023-11-01 | 101 | E002 | 200 |
2023-11-01 | 102 | C003 | 50 |
2023-11-01 | 103 | P001 | 35 |
2023-11-01 | 104 | C003 | 120 |
2023-11-01 | 105 | P001 | 125 |
2023-11-01 | 102 | P002 | 105 |
2023-11-02 | 101 | P001 | 201 |
2023-11-02 | 110 | C002 | 200 |
2023-11-02 | 110 | C001 | 225 |
2023-11-02 | 110 | C002 | 220 |
2023-11-03 | 101 | C002 | 180 |
2023-11-04 | 109 | E003 | 130 |
2023-11-04 | 109 | E001 | 123 |
2023-11-05 | 108 | C001 | 160 |
2023-11-05 | 108 | C002 | 120 |
2023-11-05 | 110 | P001 | 180 |
2023-11-05 | 106 | P002 | 45 |
2023-11-05 | 107 | E003 | 56 |
请你统计最大连续回答问题的天数大于等于3天的用户及其等级(若有多条符合条件的数据,按author_id升序排序),以上例子的输出结果如下:
author_id | author_level | days_cnt |
101 | 6 | 3 |
数据导入
drop database if exists db_1;
create database db_1;
use db_1;
drop table if exists author_tb;
CREATE TABLE author_tb
(
author_id int(10) NOT NULL,
author_level int(10) NOT NULL,
sex char(10) NOT NULL
);
INSERT INTO author_tb
VALUES
(101, 6, 'm'),
(102, 1, 'f'),
(103, 1, 'm'),
(104, 3, 'm'),
(105, 4, 'f'),
(106, 2, 'f'),
(107, 2, 'm'),
(108, 5, 'f'),
(109, 6, 'f'),
(110, 5, 'm');
drop table if exists answer_tb;
CREATE TABLE answer_tb
(
answer_date date NOT NULL,
author_id int(10) NOT NULL,
issue_id char(10) NOT NULL,
char_len int(10) NOT NULL
);
INSERT INTO answer_tb
VALUES
('2021-11-1', 101, 'E001', 150),
('2021-11-1', 101, 'E002', 200),
('2021-11-1', 102, 'C003', 50),
('2021-11-1', 103, 'P001', 35),
('2021-11-1', 104, 'C003', 120),
('2021-11-1', 105, 'P001', 125),
('2021-11-1', 102, 'P002', 105),
('2021-11-2', 101, 'P001', 201),
('2021-11-2', 110, 'C002', 200),
('2021-11-2', 110, 'C001', 225),
('2021-11-2', 110, 'C002', 220),
('2021-11-3', 101, 'C002', 180),
('2021-11-4', 109, 'E003', 130),
('2021-11-4', 109, 'E001', 123),
('2021-11-5', 108, 'C001', 160),
('2021-11-5', 108, 'C002', 120),
('2021-11-5', 110, 'P001', 180),
('2021-11-5', 106, 'P002', 45),
('2021-11-5', 107, 'E003', 56);
select * from author_tb;
select * from answer_tb;
解题思路
实现代码
with t1 as (
-- 1 去重
select
author_id,
answer_date,
-- 2 排名
row_number() over (partition by author_id order by answer_date) as rn,
-- 3 差值 = 日期 - 排名
date_sub(answer_date, interval (row_number() over (partition by author_id order by answer_date)) day) as diff
from answer_tb
group by answer_date, author_id
)
select
author_id,
(select t2.author_level from author_tb t2 where t2.author_id=t1.author_id) author_level,
count(1) days
from t1
group by author_id, diff
having count(1)>=3
order by author_id asc
;