0 概述
分析三种类型的insert在parse的各个阶段的差异:
insert into TAB_IS SELECT * FROM STUDENT a WHERE a.sno > ANY (SELECT b.sno from STUDENT b);
insert into TAB_IS values(10, 'AAA');
insert into TAB_IS values(20, 'CCC'),(30, 'DDD'),(40, 'EEE');
不同insert的计划树type
# T_NestLoopState
insert into TAB_IS SELECT * FROM STUDENT a WHERE a.sno > ANY (SELECT b.sno from STUDENT b);
# T_ResultState
insert into TAB_IS values(10, 'AAA');
# T_ValuesScanState
insert into TAB_IS values(20, 'CCC'),(30, 'DDD'),(40, 'EEE');
# T_FunctionScanState
insert into TAB_IS select i, 'QQQ', i % 10 from generate_series(1, 1000) t(i);
# T_ProjectSetState
insert into TAB_IS values(generate_series(1,10), 'DDD', 1);
1 语义分析差异
下面三种SQL在语义分析结果来看有什么区别?
insert into TAB_IS SELECT * FROM STUDENT a WHERE a.sno > ANY (SELECT b.sno from STUDENT b);
insert into TAB_IS values(10, 'AAA');
insert into TAB_IS values(20, 'CCC'),(30, 'DDD'),(40, 'EEE');
语义分析结果来看,insert语句都会构造插入表和数据表两张表(RangeTblEntry),数据表可能是值构造出来的,或者是select查询出来的。
核心流程都是构造数据表的RangeTblEntry。
代码位置:
transformInsertStmt
SelectStmt *selectStmt = (SelectStmt *) stmt->selectStmt;
// 如果selectStmt非空,表示存在select子句
if (selectStmt == NULL)
... // 普通insert
else if (isGeneralSelect)
... // 带select子句
else if (list_length(selectStmt->valuesLists) > 1)
... // 多values
1 insert select语义分析结果
pg_analyze_and_rewrite_fixedparams
insert into TAB_IS SELECT * FROM STUDENT a WHERE a.sno > ANY (SELECT b.sno from STUDENT b);
语义分析结果
2 insert values语义分析结果
insert into TAB_IS values(10, 'AAA');
语义分析结果
3 insert values values语义分析结果
insert into TAB_IS values(20, 'CCC'),(30, 'DDD'),(40, 'EEE');
语义分析结果
2 优化结果差异
一定存在ModifyTable节点,因为这是一个写表操作,也就是会进入ExecModifyTable函数。
ExecModifyTable函数loop下层节点每次拿一条数据,然后执行insert操作。知道下层节点没数据为止。
从ExecModifyTable节点的lefttree可以知道具体是哪种insert。
3 执行阶段
从执行阶段来看,下面三种SQL有什么区别?
insert into TAB_IS SELECT * FROM STUDENT a WHERE a.sno > ANY (SELECT b.sno from STUDENT b);
QUERY PLAN
-------------------------------------------------------------------------------------------------
Insert on tab_is (cost=0.15..208.42 rows=0 width=0)
-> Nested Loop Semi Join (cost=0.15..208.42 rows=367 width=46)
-> Seq Scan on student a (cost=0.00..21.00 rows=1100 width=46)
-> Index Only Scan using student_pkey on student b (cost=0.15..6.62 rows=367 width=4)
Index Cond: (sno < a.sno)
insert into TAB_IS values(10, 'AAA');
QUERY PLAN
----------------------------------------------------
Insert on tab_is (cost=0.00..0.01 rows=0 width=0)
-> Result (cost=0.00..0.01 rows=1 width=46)
insert into TAB_IS values(20, 'CCC'),(30, 'DDD'),(40, 'EEE');
QUERY PLAN
--------------------------------------------------------------------
Insert on tab_is (cost=0.00..0.04 rows=0 width=0)
-> Values Scan on "*VALUES*" (cost=0.00..0.04 rows=3 width=46)
执行阶段没什么区别,都是走ExecModifyTable内部循环搞定。
- 每次从lefttree中执行一把拿到一条,
subplanstate = outerPlanState(node);context.planSlot = ExecProcNode(subplanstate);
。 - 根据operation类型(insert)执行具体insert操作ExecInsert,比较简单,中间会有slot到tuple的转换。执行器的元组都是包装在slot中的。现在PG的存储引擎提供了AM接口,代码更清晰了。
(执行器层ExecInsert→存储层入口table_tuple_insert)
PortalRun
PortalRunMulti
ProcessQuery
CreateQueryDesc
ExecutorStart
ExecutorRun
standard_ExecutorRun
ExecutePlan
ExecProcNode
ExecProcNodeFirst
ExecModifyTable
-----> ExecProcNode(subplanstate) ---
/ switch (operation) \
\ case CMD_INSERT: /
------------- ExecInsert <----------
ps. 测试数据
drop table student;
create table student(sno int primary key, sname varchar(10), ssex int);
insert into student values(1, 'stu1', 0);
insert into student values(2, 'stu2', 1);
insert into student values(3, 'stu3', 1);
insert into student values(4, 'stu4', 0);
drop table course;
create table course(cno int primary key, cname varchar(10), tno int);
insert into course values(10, 'meth', 1);
insert into course values(11, 'english', 2);
drop table teacher;
create table teacher(tno int primary key, tname varchar(10), tsex int);
insert into teacher values(1, 'te1', 1);
insert into teacher values(2, 'te2', 0);
drop table score;
create table score (sno int, cno int, degree int);
insert into score values (1, 10, 100);
insert into score values (1, 11, 89);
insert into score values (2, 10, 99);
insert into score values (2, 11, 90);
insert into score values (3, 10, 87);
insert into score values (3, 11, 20);
insert into score values (4, 10, 60);
insert into score values (4, 11, 70);
SELECT * FROM STUDENT a WHERE a.sno > ANY (SELECT b.sno from STUDENT b);
drop table TAB_IS;
create table TAB_IS(sno int, sname varchar(10), ssex int);
insert into TAB_IS SELECT * FROM STUDENT a WHERE a.sno > ANY (SELECT b.sno from STUDENT b);
insert into TAB_IS values(10, 'AAA');
insert into TAB_IS values(20, 'CCC'),(30, 'DDD'),(40, 'EEE');