目录
题目
准备数据
分析数据
实现
总结
题目
如果一个账户在 连续两个及以上 月份的 总收入 超过最大收入(max_income
),那么认为这个账户 可疑。 账户当月 总收入 是当月存入资金总数(即 transactions 表中 type 字段的 'Creditor'
)。
编写一个解决方案,报告所有的 可疑 账户。
准备数据
Create table If Not Exists Accounts (account_id int, max_income int);
Create table If Not Exists Transactions (transaction_id int, account_id int, type ENUM('creditor', 'debtor'), amount int, day datetime);
Truncate table Accounts;
insert into Accounts (account_id, max_income) values ('3', '21000');
insert into Accounts (account_id, max_income) values ('4', '10400');
Truncate table Transactions;
insert into Transactions (transaction_id, account_id, type, amount, day) values ('2', '3', 'Creditor', '107100', '2021-06-02 11:38:14');
insert into Transactions (transaction_id, account_id, type, amount, day) values ('4', '4', 'Creditor', '10400', '2021-06-20 12:39:18');
insert into Transactions (transaction_id, account_id, type, amount, day) values ('11', '4', 'Debtor', '58800', '2021-07-23 12:41:55');
insert into Transactions (transaction_id, account_id, type, amount, day) values ('1', '4', 'Creditor', '49300', '2021-05-03 16:11:04');
insert into Transactions (transaction_id, account_id, type, amount, day) values ('15', '3', 'Debtor', '75500', '2021-05-23 14:40:20');
insert into Transactions (transaction_id, account_id, type, amount, day) values ('10', '3', 'Creditor', '102100', '2021-06-15 10:37:16');
insert into Transactions (transaction_id, account_id, type, amount, day) values ('14', '4', 'Creditor', '56300', '2021-07-21 12:12:25');
insert into Transactions (transaction_id, account_id, type, amount, day) values ('19', '4', 'Debtor', '101100', '2021-05-09 15:21:49');
insert into Transactions (transaction_id, account_id, type, amount, day) values ('8', '3', 'Creditor', '64900', '2021-07-26 15:09:56');
insert into Transactions (transaction_id, account_id, type, amount, day) values ('7', '3', 'Creditor', '90900', '2021-06-14 11:23:07');
accounts表
transactions表
分析数据
第一步:将日期的月提取出来,并且过滤出type = 'creditor'
select
account_id,type,amount,
substr(day,1,4) year,
substr(day,6,2) month
from Transactions
where type = 'creditor'
order by account_id,month;
第二步:将两张表进行关联,根据month,t1.account_id,max_income分组
with t1 as (
select
account_id,type,amount,
substr(day,1,4) year,
substr(day,6,2) month
from Transactions
where type = 'creditor'
order by account_id,month
),t2 as (
select
t1.account_id,sum(amount) max,month,max_income
from t1,Accounts
where t1.account_id = Accounts.account_id
group by month,t1.account_id,max_income
order by t1.account_id)
select * from t2;
第三步:利用开窗函数row_number()形成一列排名
with t1 as (
select
account_id,type,amount,
substr(day,1,4) year,
substr(day,6,2) month
from Transactions
where type = 'creditor'
order by account_id,month
),t2 as (
select
t1.account_id,sum(amount) max,month,max_income
from t1,Accounts
where t1.account_id = Accounts.account_id
group by month,t1.account_id,max_income
order by t1.account_id
),t3 as (
select t2.account_id,t2.max,month,
row_number() over(partition by t2.account_id order by t2.month) rn1
from t2
where max > max_income)select * from t3;
第四步:再得出一个等差数列month2,若是差值相同,则连续
with t1 as (
select
account_id,type,amount,
substr(day,1,4) year,
substr(day,6,2) month
from Transactions
where type = 'creditor'
order by account_id,month
),t2 as (
select
t1.account_id,sum(amount) max,month,max_income
from t1,Accounts
where t1.account_id = Accounts.account_id
group by month,t1.account_id,max_income
order by t1.account_id
),t3 as (
select t2.account_id,t2.max,month,
row_number() over(partition by t2.account_id order by t2.month) rn1
from t2
where max > max_income
),t4 as (
select account_id,(month - rn1) month2,
count(month-rn1) as r2
from t3
group by account_id,(month - rn1)
)select * from t4;
实现
with t1 as (
select
account_id,type,amount,
substr(day,1,4) year,
substr(day,6,2) month
from Transactions
where type = 'creditor'
order by account_id,month
),t2 as (
select
t1.account_id,sum(amount) max,month,max_income
from t1,Accounts
where t1.account_id = Accounts.account_id
group by month,t1.account_id,max_income
order by t1.account_id
),t3 as (
select t2.account_id,t2.max,month,
row_number() over(partition by t2.account_id order by t2.month) rn1
from t2
where max > max_income
),t4 as (
select account_id,(month - rn1) month2,
count(month-rn1) as r2
from t3
group by account_id,(month - rn1)
)select
distinct account_id
from t4
where r2 >=2;
总结
遇见连续性问题,需要两列差值相同的,最后进行相减,相同的即为连续。