首先我们需要看CGPOptimizer类(src/include/gpopt/CGPOptimizer.h)为Greenplum数据库提供ORCA优化器export出来的函数的封装。Greenplum数据库主流程调用extern "C"中提供的函数,比如初始化ORCA优化器的函数InitGPOPT,优化查询树的函数GPOPTOptimizedPlan,explain流程中调用的SerializeDXLPlan函数。
class CGPOptimizer{
public:
static PlannedStmt *GPOPTOptimizedPlan( Query *query, bool * had_unexpected_failure // output : set to true if optimizer unexpectedly failed to produce plan ); // optimize given query using GP optimizer
static char *SerializeDXLPlan(Query *query); // serialize planned statement into DXL
static void InitGPOPT(); // gpopt initialize and terminate
static void TerminateGPOPT();
};
extern "C" {
extern PlannedStmt *GPOPTOptimizedPlan(Query *query, bool *had_unexpected_failure);
extern char *SerializeDXLPlan(Query *query);
extern void InitGPOPT();
extern void TerminateGPOPT();
}
CGPOptimizer为GP提供ORCA优化器初始化函数
InitGPOPT函数用于Initialize GPTOPT and dependent libraries,最终调用的是gpos_init(¶ms)
、gpdxl_init()
、gpopt_init()
和CMemoryPoolPallocManager::Init()
。该函数由src/backend/utils/init/postinit.c/InitPostgre函数调用。
extern "C" {
void InitGPOPT() {
GPOS_TRY {
return CGPOptimizer::InitGPOPT();
}GPOS_CATCH_EX(ex){
if (GPOS_MATCH_EX(ex, gpdxl::ExmaGPDB, gpdxl::ExmiGPDBError)){
PG_RE_THROW();
}
}
GPOS_CATCH_END;
}
}
void CGPOptimizer::InitGPOPT() {
if (optimizer_use_gpdb_allocators){ CMemoryPoolPallocManager::Init(); }
struct gpos_init_params params = {gpdb::IsAbortRequested};
gpos_init(¶ms); gpdxl_init(); gpopt_init();
}
CGPOptimizer为GP提供ORCA优化器优化函数
GP有两种优化器:PG优化器和ORCA优化器。如何确定执行计划来自PG优化器还是ORCA优化器,通过查看PlanGenerator值typedef enum PlanGenerator {PLANGEN_PLANNER, /* plan produced by the planner*/ PLANGEN_OPTIMIZER, /* plan produced by the optimizer*/ } PlanGenerator;
Master端简查询入口函数exec_simple_query开始生成执行计划并进行分发,如下图所示。在standard_planner函数中分为orca优化器和PG优化器2个分支产生执行计划。产生执行计划后,由函数PortalStart函数开始调用standard_ExecutorStart从而执行分发执行计划函数CdbDispathPlan将执行计划从master分发到各个segment。摘自https://blog.51cto.com/yanzongshuai/5675056
extern "C" {
PlannedStmt *GPOPTOptimizedPlan(Query *query, bool *had_unexpected_failure){ return CGPOptimizer::GPOPTOptimizedPlan(query, had_unexpected_failure); }
}
//---------------------------------------------------------------------------
// @function: CGPOptimizer::PlstmtOptimize
// @doc: Optimize given query using GP optimizer
//---------------------------------------------------------------------------
PlannedStmt *CGPOptimizer::GPOPTOptimizedPlan(Query *query, bool *had_unexpected_failure // output : set to true if optimizer unexpectedly failed to produce plan){
SOptContext gpopt_context; PlannedStmt *plStmt = NULL; *had_unexpected_failure = false;
GPOS_TRY{
plStmt = COptTasks::GPOPTOptimizedPlan(query, &gpopt_context);
gpopt_context.Free(gpopt_context.epinQuery, gpopt_context.epinPlStmt); // clean up context
}GPOS_CATCH_EX(ex){
...
}GPOS_CATCH_END;
return plStmt;
}
//---------------------------------------------------------------------------
// @function: COptTasks::GPOPTOptimizedPlan
// @doc: optimizes a query to plannedstmt
//---------------------------------------------------------------------------
PlannedStmt *COptTasks::GPOPTOptimizedPlan(Query *query, SOptContext *gpopt_context){
gpopt_context->m_query = query;gpopt_context->m_should_generate_plan_stmt = true;
Execute(&OptimizeTask, gpopt_context);
return gpopt_context->m_plan_stmt;
}
CGPOptimizer为GP提供ORCA优化器explain函数
SerializeDXLPlan函数用于Serialize planned statement into DXL将planned statement序列化为DXL,其最终调用COptTasks::Optimize(query)
函数。
extern "C" {
char *SerializeDXLPlan(Query *query){ return CGPOptimizer::SerializeDXLPlan(query); }
}
char *CGPOptimizer::SerializeDXLPlan(Query *query){
GPOS_TRY;{ return COptTasks::Optimize(query); }
GPOS_CATCH_EX(ex);{
errstart(ERROR, ex.Filename(), ex.Line(), NULL, TEXTDOMAIN);
errfinish(errcode(ERRCODE_INTERNAL_ERROR),errmsg("optimizer failed to produce plan"));
}
GPOS_CATCH_END;
return NULL;
}
//---------------------------------------------------------------------------
// @function: COptTasks::Optimize
// @doc: optimizes a query to physical DXL
//---------------------------------------------------------------------------
char *COptTasks::Optimize(Query *query){
SOptContext gpopt_context;
gpopt_context.m_query = query;
gpopt_context.m_should_serialize_plan_dxl = true;
Execute(&OptimizeTask, &gpopt_context);
gpopt_context.Free(gpopt_context.epinQuery, gpopt_context.epinPlanDXL); // clean up context
return gpopt_context.m_plan_dxl;
}
COptTasks::GPOPTOptimizedPlanh和COptTasks::Optimize函数都是调用OptimizeTask函数,不同的是设置SOptContext的m_should_generate_plan_stmt或m_should_serialize_plan_dxl,达到不同的作用:optimizes a query to plannedstmt、optimizes a query to physical DXL。
SerializeDXLPlan函数用于explain流程:ExplainQuery/ExplainOneUtility --> ExplainOneQuery --> ExplainDXL --> SerializeDXLPlan。
CGPOptimizer为GP提供ORCA优化器结束函数
TerminateGPOPT函数用于Terminate GPOPT and dependent libraries,主要调用gpopt_terminate()、gpdxl_terminate()、gpos_terminate()。
extern "C" {
void TerminateGPOPT() {
GPOS_TRY { return CGPOptimizer::TerminateGPOPT();
} GPOS_CATCH_EX(ex){
if (GPOS_MATCH_EX(ex, gpdxl::ExmaGPDB, gpdxl::ExmiGPDBError)) {
PG_RE_THROW();
}
}
GPOS_CATCH_END;
}
}
void CGPOptimizer::TerminateGPOPT() {
gpopt_terminate();
gpdxl_terminate();
gpos_terminate();
}
ShutdownPostgres函数调用TerminateGPOPT,并销毁OptimizerMemoryContext内存上下文。