PostgreSQL查询引擎——transform expressions之AEXPR

static Node *transformAExprOp(ParseState *pstate, A_Expr *a){
	Node	   *lexpr = a->lexpr; Node	   *rexpr = a->rexpr; // 操作符左右表达式
	Node	   *result;

	/* Special-case "foo = NULL" and "NULL = foo" for compatibility with standards-broken products (like Microsoft's).  Turn these into IS NULL exprs. (If either side is a CaseTestExpr, then the expression was generated internally from a CASE-WHEN expression, and transform_null_equals does not apply.) */
	if (Transform_null_equals && list_length(a->name) == 1 && strcmp(strVal(linitial(a->name)), "=") == 0 &&(exprIsNullConstant(lexpr) || exprIsNullConstant(rexpr)) && (!IsA(lexpr, CaseTestExpr) && !IsA(rexpr, CaseTestExpr))) {
		NullTest   *n = makeNode(NullTest); n->nulltesttype = IS_NULL; n->location = a->location;
		if (exprIsNullConstant(lexpr)) n->arg = (Expr *) rexpr;
		else n->arg = (Expr *) lexpr;
		result = transformExprRecurse(pstate, (Node *) n);
	}
	else if (lexpr && IsA(lexpr, RowExpr) && rexpr && IsA(rexpr, SubLink) && ((SubLink *) rexpr)->subLinkType == EXPR_SUBLINK) { /* Convert "row op subselect" into a ROWCOMPARE sublink. Formerly the grammar did this, but now that a row construct is allowed anywhere in expressions, it's easier to do it here. */
		SubLink    *s = (SubLink *) rexpr;
		s->subLinkType = ROWCOMPARE_SUBLINK; s->testexpr = lexpr; s->operName = a->name; s->location = a->location;
		result = transformExprRecurse(pstate, (Node *) s);
	}
	else if (lexpr && IsA(lexpr, RowExpr) && rexpr && IsA(rexpr, RowExpr)){ /* ROW() op ROW() is handled specially */
		lexpr = transformExprRecurse(pstate, lexpr);
		rexpr = transformExprRecurse(pstate, rexpr);
		result = make_row_comparison_op(pstate, a->name,castNode(RowExpr, lexpr)->args, castNode(RowExpr, rexpr)->args, a->location);
	}
	else { /* Ordinary scalar operator */
		Node	   *last_srf = pstate->p_last_srf;
		lexpr = transformExprRecurse(pstate, lexpr); rexpr = transformExprRecurse(pstate, rexpr);
		result = (Node *) make_op(pstate, a->name, lexpr, rexpr, last_srf, a->location);
	}
	return result;
}

transformAExprOp函数将对AEXPR_OP、AEXPR_LIKE、AEXPR_ILIKE和AEXPR_SIMILAR类型的A_Expr表达式进行转换，分为四种类型。

“foo = NULL” and “NULL = foo” 转为IS NULL表达式
"row op subselect"转为ROWCOMPARE sublink表达式
ROW() op ROW()调用make_row_comparison_op进行处理
scalar operator调用make_op进行处理

scalar operator

make_op函数用于转换运算符表达式，确保类型兼容性。首先获取左表达式、右表达式类型，加上操作符名，调用oper函数获取pg_operator中匹配运算符表达式的operator记录；强制实现多态参数和返回类型的一致性，可能会调整返回类型或declard_arg_types（make_fn_arguments将其用作强制转换目标）；为形参设置必要的强转函数；

/* make_op() Operator expression construction.
 * Transform operator expression ensuring type compatibility. This is where some type conversion happens.
 * last_srf should be a copy of pstate->p_last_srf from just before we started transforming the operator's arguments; this is used for nested-SRF detection.  If the caller will throw an error anyway for a set-returning expression, it's okay to cheat and just pass pstate->p_last_srf. */
Expr *make_op(ParseState *pstate, List *opname, Node *ltree, Node *rtree, Node *last_srf, int location){
	Oid			ltypeId, rtypeId; Oid			rettype; // 左表达式、右表达式、返回值类型
	Operator	tup; Form_pg_operator opform;
	Oid			actual_arg_types[2]; Oid			declared_arg_types[2]; int			nargs; List	   *args;	
	OpExpr	   *result;

	/* Select the operator */
	if (ltree == NULL){/* prefix operator */ // 查找一元左operator(unary left operator)
		rtypeId = exprType(rtree); ltypeId = InvalidOid; tup = left_oper(pstate, opname, rtypeId, false, location);
	}else{/* otherwise, binary operator */   // 查找二元operator
		ltypeId = exprType(ltree); rtypeId = exprType(rtree); tup = oper(pstate, opname, ltypeId, rtypeId, false, location);
	}
	opform = (Form_pg_operator) GETSTRUCT(tup);
	
	/* Do typecasting and build the expression tree */
	if (ltree == NULL){ /* prefix operator */
		args = list_make1(rtree); actual_arg_types[0] = rtypeId; declared_arg_types[0] = opform->oprright; nargs = 1;
	}else{/* otherwise, binary operator */
		args = list_make2(ltree, rtree); actual_arg_types[0] = ltypeId; actual_arg_types[1] = rtypeId;
		declared_arg_types[0] = opform->oprleft; declared_arg_types[1] = opform->oprright; nargs = 2;
	}

	/* enforce consistency with polymorphic argument and return types, possibly adjusting return type or declared_arg_types (which will be used as the cast destination by make_fn_arguments) */
	rettype = enforce_generic_type_consistency(actual_arg_types, declared_arg_types, nargs, opform->oprresult, false);
	/* perform the necessary typecasting of arguments */
	make_fn_arguments(pstate, args, actual_arg_types, declared_arg_types);
	
	result = makeNode(OpExpr); /* and build the expression node */
	result->opno = oprid(tup); result->opfuncid = opform->oprcode; result->opresulttype = rettype;
	result->opretset = get_func_retset(opform->oprcode);
	/* opcollid and inputcollid will be set by parse_collate.c */
	result->args = args; result->location = location;	
	if (result->opretset) { /* if it returns a set, check that's OK */
		check_srf_call_placement(pstate, last_srf, location);		
		pstate->p_last_srf = (Node *) result; /* ... and remember it for error checks at higher levels */
	}
	ReleaseSysCache(tup);

	return (Expr *) result;
}

ROW() op ROW()

c_expr中指明了表达式为Row表达式的规则，而Explicit row production的规则如下所示，语法中有ROW关键字或括号：

row:		ROW '(' expr_list ')'					{ $$ = $3; }
			| ROW '(' ')'							{ $$ = NIL; }
			| '(' expr_list ',' a_expr ')'			{ $$ = lappend($2, $4); }
		;
explicit_row:	ROW '(' expr_list ')'				{ $$ = $3; }
			| ROW '(' ')'							{ $$ = NIL; }
		;
implicit_row:	'(' expr_list ',' a_expr ')'		{ $$ = lappend($2, $4); }
		;

expr_list:	a_expr { $$ = list_make1($1); }
			| expr_list ',' a_expr { $$ = lappend($1, $3); }

在这里插入图片描述
对于ROW() op ROW()调用make_row_comparison_op(pstate, a->name, castNode(RowExpr, lexpr)->args, castNode(RowExpr, rexpr)->args, a->location)进行转换处理.该函数输入是已转换表达式的列表。与coerce_type一样，如果不需要特殊的未知Param处理，pstate可能为NULL。输出可以是单个OpExpr、OpExpr的AND或OR组合或RowCompareExpr。在所有情况下，都保证返回布尔值。AND、OR和RowCompareExpr的情况进一步暗示了运算符的行为（即，它们的行为为=、<>或<<=>=）。The inputs are lists of already-transformed expressions. As with coerce_type, pstate may be NULL if no special unknown-Param processing is wanted. The output may be a single OpExpr, an AND or OR combination of OpExprs, or a RowCompareExpr. In all cases it is guaranteed to return boolean. The AND, OR, and RowCompareExpr cases further imply things about the behavior of the operators (ie, they behave as =, <>, or < <= > >=). 首先取出左右RowExpr表达式args中的子表达式，通过make_op函数为其创建OpExpr结构。如果row()的长度为1，则只返回单个运算符OpExpr。否则需要为每对运算符OpExpr，寻找包含运算符的btree操作族；对于=和<>创建BoolExpr(AND_EXPR、OR_EXPR）；对于其他运算符，查找到其对于的opfamily，创建RowCompareExpr，并关联OpExpr的中的左右表达式。

static Node *make_row_comparison_op(ParseState *pstate, List *opname, List *largs, List *rargs, int location) {
	RowCompareExpr *rcexpr; RowCompareType rctype;
	ListCell   *l, *r;
	/* Identify all the pairwise operators, using make_op so that behavior is the same as in the simple scalar case. */
	List	   *opexprs = NIL; // 使用make_op识别所有成对运算符，使其行为与简单标量情况相同。
	forboth(l, largs, r, rargs){
		Node	   *larg = (Node *) lfirst(l); Node	   *rarg = (Node *) lfirst(r);
		OpExpr	   *cmp = castNode(OpExpr, make_op(pstate, opname, larg, rarg, pstate->p_last_srf, location));
		opexprs = lappend(opexprs, cmp);
	}
    int nopers = list_length(largs);
	/* If rows are length 1, just return the single operator.  In this case we don't insist on identifying btree semantics for the operator (but we still require it to return boolean). */ //如果行的长度为1，则只返回单个运算符。在这种情况下，我们并不坚持为运算符识别btree语义（但我们仍然要求它返回布尔值）。
	if (nopers == 1)return (Node *) linitial(opexprs);

	/* Now we must determine which row comparison semantics (= <> < <= > >=) apply to this set of operators.  We look for btree opfamilies containing the operators, and see which interpretations (strategy numbers) exist for each operator. */ // 现在，我们必须确定哪一行比较语义（= <> < <= > >=）应用于这组运算符。我们寻找包含运算符的btree操作族，并查看每个运算符存在哪些解释（策略编号）。
	List	  **opinfo_lists = (List **) palloc(nopers * sizeof(List *));
	Bitmapset  *strats = NULL;
	int i = 0;
	foreach(l, opexprs){ // 从OpExpr列表中取出row左右对应位置的子表达式创建而成的OpExpr
		Oid			opno = ((OpExpr *) lfirst(l))->opno;
		opinfo_lists[i] = get_op_btree_interpretation(opno); // 寻找包含运算符的btree操作族

		/* convert strategy numbers into a Bitmapset to make the intersection calculation easy. */
		Bitmapset  *this_strats = NULL;  ListCell   *j;
		foreach(j, opinfo_lists[i]){
			OpBtreeInterpretation *opinfo = lfirst(j);
			this_strats = bms_add_member(this_strats, opinfo->strategy);
		}
		if (i == 0) strats = this_strats;
		else strats = bms_int_members(strats, this_strats);
		i++;
	}

	/* If there are multiple common interpretations, we may use any one of them ... this coding arbitrarily picks the lowest btree strategy number. */ // 如果有多种常见的解释，我们可以使用其中的任何一种。。。这种编码任意地选择最低的btree策略编号。
	i = bms_first_member(strats); rctype = (RowCompareType) i;

	/* For = and <> cases, we just combine the pairwise operators with AND or OR respectively. */
	if (rctype == ROWCOMPARE_EQ) return (Node *) makeBoolExpr(AND_EXPR, opexprs, location);
	if (rctype == ROWCOMPARE_NE) return (Node *) makeBoolExpr(OR_EXPR, opexprs, location);

	/* Otherwise we need to choose exactly which opfamily to associate with each operator. */
	List	   *opfamilies = NIL;
	for (i = 0; i < nopers; i++){
		Oid			opfamily = InvalidOid;
		ListCell   *j;
		foreach(j, opinfo_lists[i]){
			OpBtreeInterpretation *opinfo = lfirst(j);
			if (opinfo->strategy == rctype){
				opfamily = opinfo->opfamily_id;
				break;
			}
		}
		if (OidIsValid(opfamily)) opfamilies = lappend_oid(opfamilies, opfamily);
		else					/* should not happen */
			ereport(ERROR,(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),errmsg("could not determine interpretation of row comparison operator %s",strVal(llast(opname))), errdetail("There are multiple equally-plausible candidates."), parser_errposition(pstate, location)));
	}

	/* Now deconstruct the OpExprs and create a RowCompareExpr. Note: can't just reuse the passed largs/rargs lists, because of possibility that make_op inserted coercion operations. */
	List	   *opnos = NIL; largs = NIL; rargs = NIL;
	foreach(l, opexprs){
		OpExpr	   *cmp = (OpExpr *) lfirst(l);
		opnos = lappend_oid(opnos, cmp->opno);
		largs = lappend(largs, linitial(cmp->args)); rargs = lappend(rargs, lsecond(cmp->args));
	}

	rcexpr = makeNode(RowCompareExpr);
	rcexpr->rctype = rctype; rcexpr->opnos = opnos; rcexpr->opfamilies = opfamilies;
	rcexpr->inputcollids = NIL; /* assign_expr_collations will fix this */
	rcexpr->largs = largs; rcexpr->rargs = rargs;

	return (Node *) rcexpr;
}