问题背景
最近公司有个项目需要用到某种特殊的统计用法, 例如从所有的数据中找出每个账号最新的一条余额信息(根据某个关键信息进行排序并获取排序值最高的记录)。
当时用的是非常普通的语句,也就是多个子查询嵌套select出每个账号第一条的id,然后left join会主表获取
数据准备
执行以下sql来初始化表格和数据
-- ----------------------------
-- Table structure for test_cust_acct
-- ----------------------------
DROP TABLE IF EXISTS "public"."test_cust_acct";
CREATE TABLE "public"."test_cust_acct" (
"account_number" varchar(32) COLLATE "pg_catalog"."default",
"customer_id" varchar(32) COLLATE "pg_catalog"."default"
)
;
-- ----------------------------
-- Records of test_cust_acct
-- ----------------------------
INSERT INTO "public"."test_cust_acct" VALUES ('18522222', 'CNC_10010');
INSERT INTO "public"."test_cust_acct" VALUES ('804461110', 'WOM_10086');
/*
Navicat Premium Data Transfer
by https://zhengkai.blog.csdn.net/
*/
-- ----------------------------
-- Table structure for test_intraday
-- ----------------------------
DROP TABLE IF EXISTS "public"."test_intraday";
CREATE TABLE "public"."test_intraday" (
"_id" int4 NOT NULL,
"account_number" varchar(32) COLLATE "pg_catalog"."default",
"amount" numeric(12,2),
"currency" varchar(12) COLLATE "pg_catalog"."default",
"balance_date" date
)
;
-- ----------------------------
-- Records of test_intraday
-- ----------------------------
INSERT INTO "public"."test_intraday" VALUES (1, '804461110', 12315.00, 'CNY', '2023-05-27');
INSERT INTO "public"."test_intraday" VALUES (2, '18522222', 45611.11, 'HKD', '2023-05-27');
INSERT INTO "public"."test_intraday" VALUES (3, '18522222', 45622.22, 'HKD', '2023-05-28');
INSERT INTO "public"."test_intraday" VALUES (4, '18522222', 45633.33, 'HKD', '2023-06-01');
INSERT INTO "public"."test_intraday" VALUES (5, '804461110', -54321.00, 'CNY', '2023-06-01');
-- ----------------------------
-- Primary Key structure for table test_intraday
-- ----------------------------
ALTER TABLE "public"."test_intraday" ADD CONSTRAINT "test_intraday_pkey" PRIMARY KEY ("_id");
补习班
什么是partition by 和 rank ?
-
一旦各种开窗函数使用partition by后,需要明确的是,函数会对每个分区独立处理。
avg(page) over(partition by book)
假如整体是对阿利波特七部曲,则该函数是求出每一部哈利波特的平均页码数。 -
rank是T_sql 函数,rank函数返回结果集分区内指定字段的值的排名,指定字段的值的排名是相关行之前的排名加一。
解决方案与SQL实战
- 第一步,先查出所有的记录
-- START
with myresult as (
select ca.*,i.* from test_cust_acct ca
INNER JOIN test_intraday i on ca.account_number=i.account_number
where ca.customer_id='CNC_10010'
)
select * FROM myresult;
让我们explain一下
- 第二步,加上partition by 和 rank 获取排序
-- AFTER
with myresult as (
select i.*,rank() over (partition by i.account_number ORDER BY i.account_number,i.balance_date desc) from test_cust_acct ca
INNER JOIN test_intraday i on ca.account_number=i.account_number
where ca.customer_id in ('CNC_10010','WOM_10086')
)
select * FROM myresult;
让我们继续explain一下,
3. 第三步,之找到rank为1的也就是所有account最新一条的数据。
-- FINAL
with myresult as (
select i.*,rank() over (partition by i.account_number ORDER BY i.account_number,i.balance_date desc) from test_cust_acct ca
INNER JOIN test_intraday i on ca.account_number=i.account_number
where ca.customer_id in ('CNC_10010','WOM_10086')
)
select * FROM myresult where rank=1;
对比
--old
select ii.* from (
select max(myresut._id) as _id ,myresut.account_number,max(myresut.balance_date) as balance_date from (select ca.customer_id,i.* from test_cust_acct ca
INNER JOIN test_intraday i on ca.account_number=i.account_number
where ca.customer_id in ('CNC_10010','WOM_10086')) myresut
GROUP BY myresut.account_number
) finalresult
left JOIN test_intraday ii on finalresult.account_number=ii.account_number and ii._id=finalresult._id;