citus/src/backend/distributed/planner/distributed_planner.c

/*-------------------------------------------------------------------------
 *
 * distributed_planner.c
 *	  General Citus planner code.
 *
 * Copyright (c) Citus Data, Inc.
 *-------------------------------------------------------------------------
 */

#include "postgres.h"
#include "funcapi.h"

#include <float.h>
#include <limits.h>

#include "access/htup_details.h"
#include "catalog/pg_class.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
#include "distributed/citus_nodefuncs.h"
#include "distributed/citus_nodes.h"
#include "distributed/cte_inline.h"
#include "distributed/function_call_delegation.h"
#include "distributed/insert_select_planner.h"
#include "distributed/intermediate_result_pruning.h"
#include "distributed/intermediate_results.h"
#include "distributed/listutils.h"
#include "distributed/master_protocol.h"
#include "distributed/metadata_cache.h"
#include "distributed/multi_executor.h"
#include "distributed/distributed_planner.h"
#include "distributed/query_pushdown_planning.h"
#include "distributed/multi_logical_optimizer.h"
#include "distributed/multi_logical_planner.h"
#include "distributed/multi_partitioning_utils.h"
#include "distributed/multi_physical_planner.h"
#include "distributed/multi_master_planner.h"
#include "distributed/multi_router_planner.h"
#include "distributed/query_utils.h"
#include "distributed/recursive_planning.h"
#include "distributed/shardinterval_utils.h"
#include "distributed/version_compat.h"
#include "distributed/worker_shard_visibility.h"
#include "executor/executor.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "parser/parsetree.h"
#include "parser/parse_type.h"
#if PG_VERSION_NUM >= 120000
#include "optimizer/optimizer.h"
#include "optimizer/plancat.h"
#else
#include "optimizer/cost.h"
#endif
#include "optimizer/pathnode.h"
#include "optimizer/planner.h"
#include "optimizer/planmain.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/syscache.h"


static List *plannerRestrictionContextList = NIL;
int MultiTaskQueryLogLevel = MULTI_TASK_QUERY_INFO_OFF; /* multi-task query log level */
static uint64 NextPlanId = 1;

/* keep track of planner call stack levels */
int PlannerLevel = 0;

static bool ListContainsDistributedTableRTE(List *rangeTableList);
static bool IsUpdateOrDelete(Query *query);
static PlannedStmt * CreateDistributedPlannedStmt(
	DistributedPlanningContext *planContext);
static PlannedStmt * InlineCtesAndCreateDistributedPlannedStmt(uint64 planId,
															   DistributedPlanningContext
															   *planContext);
static PlannedStmt * TryCreateDistributedPlannedStmt(PlannedStmt *localPlan,
													 Query *originalQuery,
													 Query *query, ParamListInfo
													 boundParams,
													 PlannerRestrictionContext *
													 plannerRestrictionContext);
static DistributedPlan * CreateDistributedPlan(uint64 planId, Query *originalQuery,
											   Query *query, ParamListInfo boundParams,
											   bool hasUnresolvedParams,
											   PlannerRestrictionContext *
											   plannerRestrictionContext);
static void FinalizeDistributedPlan(DistributedPlan *plan, Query *originalQuery);
static void RecordSubPlansUsedInPlan(DistributedPlan *plan, Query *originalQuery);
static DeferredErrorMessage * DeferErrorIfPartitionTableNotSingleReplicated(Oid
																			relationId);

static int AssignRTEIdentities(List *rangeTableList, int rteIdCounter);
static void AssignRTEIdentity(RangeTblEntry *rangeTableEntry, int rteIdentifier);
static void AdjustPartitioningForDistributedPlanning(List *rangeTableList,
													 bool setPartitionedTablesInherited);
static PlannedStmt * FinalizeNonRouterPlan(PlannedStmt *localPlan,
										   DistributedPlan *distributedPlan,
										   CustomScan *customScan);
static PlannedStmt * FinalizeRouterPlan(PlannedStmt *localPlan, CustomScan *customScan);
static List * makeTargetListFromCustomScanList(List *custom_scan_tlist);
static List * makeCustomScanTargetlistFromExistingTargetList(List *existingTargetlist);
static int32 BlessRecordExpressionList(List *exprs);
static void CheckNodeIsDumpable(Node *node);
static Node * CheckNodeCopyAndSerialization(Node *node);
static void AdjustReadIntermediateResultCost(RangeTblEntry *rangeTableEntry,
											 RelOptInfo *relOptInfo);
static void AdjustReadIntermediateResultArrayCost(RangeTblEntry *rangeTableEntry,
												  RelOptInfo *relOptInfo);
static void AdjustReadIntermediateResultsCostInternal(RelOptInfo *relOptInfo,
													  List *columnTypes,
													  int resultIdCount,
													  Datum *resultIds,
													  Const *resultFormatConst);
static List * OuterPlanParamsList(PlannerInfo *root);
static List * CopyPlanParamList(List *originalPlanParamList);
static PlannerRestrictionContext * CreateAndPushPlannerRestrictionContext(void);
static PlannerRestrictionContext * CurrentPlannerRestrictionContext(void);
static void PopPlannerRestrictionContext(void);
static void ResetPlannerRestrictionContext(
	PlannerRestrictionContext *plannerRestrictionContext);
static bool HasUnresolvedExternParamsWalker(Node *expression, ParamListInfo boundParams);
static bool IsLocalReferenceTableJoin(Query *parse, List *rangeTableList);
static bool QueryIsNotSimpleSelect(Node *node);
static bool UpdateReferenceTablesWithShard(Node *node, void *context);
static PlannedStmt * PlanFastPathDistributedStmt(DistributedPlanningContext *planContext,
												 Node *distributionKeyValue);
static PlannedStmt * PlanDistributedStmt(DistributedPlanningContext *planContext,
										 List *rangeTableList, int rteIdCounter);


/* Distributed planner hook */
PlannedStmt *
distributed_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
{
	PlannedStmt *result = NULL;
	bool needsDistributedPlanning = false;
	bool setPartitionedTablesInherited = false;
	List *rangeTableList = ExtractRangeTableEntryList(parse);
	int rteIdCounter = 1;
	bool fastPathRouterQuery = false;
	Node *distributionKeyValue = NULL;
	DistributedPlanningContext planContext = {
		.query = parse,
		.cursorOptions = cursorOptions,
		.boundParams = boundParams,
	};


	if (cursorOptions & CURSOR_OPT_FORCE_DISTRIBUTED)
	{
		/* this cursor flag could only be set when Citus has been loaded */
		Assert(CitusHasBeenLoaded());

		needsDistributedPlanning = true;
	}
	else if (CitusHasBeenLoaded())
	{
		if (IsLocalReferenceTableJoin(parse, rangeTableList))
		{
			/*
			 * For joins between reference tables and local tables, we replace
			 * reference table names with shard tables names in the query, so
			 * we can use the standard_planner for planning it locally.
			 */
			needsDistributedPlanning = false;
			UpdateReferenceTablesWithShard((Node *) parse, NULL);
		}
		else
		{
			needsDistributedPlanning = ListContainsDistributedTableRTE(rangeTableList);
			if (needsDistributedPlanning)
			{
				fastPathRouterQuery = FastPathRouterQuery(parse, &distributionKeyValue);
			}
		}
	}

	if (fastPathRouterQuery)
	{
		/*
		 *  We need to copy the parse tree because the FastPathPlanner modifies
		 *  it. In the next branch we do the same for other distributed queries
		 *  too, but for those it needs to be done AFTER calling
		 *  AssignRTEIdentities.
		 */
		planContext.originalQuery = copyObject(parse);
	}
	else if (needsDistributedPlanning)
	{
		/*
		 * Inserting into a local table needs to go through the regular postgres
		 * planner/executor, but the SELECT needs to go through Citus. We currently
		 * don't have a way of doing both things and therefore error out, but do
		 * have a handy tip for users.
		 */
		if (InsertSelectIntoLocalTable(parse))
		{
			ereport(ERROR, (errmsg("cannot INSERT rows from a distributed query into a "
								   "local table"),
							errhint("Consider using CREATE TEMPORARY TABLE tmp AS "
									"SELECT ... and inserting from the temporary "
									"table.")));
		}

		/*
		 * standard_planner scribbles on it's input, but for deparsing we need the
		 * unmodified form. Note that before copying we call
		 * AssignRTEIdentities, which is needed because these identities need
		 * to be present in the copied query too.
		 */
		rteIdCounter = AssignRTEIdentities(rangeTableList, rteIdCounter);
		planContext.originalQuery = copyObject(parse);

		setPartitionedTablesInherited = false;
		AdjustPartitioningForDistributedPlanning(rangeTableList,
												 setPartitionedTablesInherited);
	}

	/*
	 * Make sure that we hide shard names on the Citus MX worker nodes. See comments in
	 * ReplaceTableVisibleFunction() for the details.
	 */
	ReplaceTableVisibleFunction((Node *) parse);

	/* create a restriction context and put it at the end if context list */
	planContext.plannerRestrictionContext = CreateAndPushPlannerRestrictionContext();

	/*
	 * We keep track of how many times we've recursed into the planner, primarily
	 * to detect whether we are in a function call. We need to make sure that the
	 * PlannerLevel is decremented exactly once at the end of the next PG_TRY
	 * block, both in the happy case and when an error occurs.
	 */
	PlannerLevel++;


	PG_TRY();
	{
		if (fastPathRouterQuery)
		{
			result = PlanFastPathDistributedStmt(&planContext, distributionKeyValue);
		}
		else
		{
			/*
			 * Call into standard_planner because the Citus planner relies on both the
			 * restriction information per table and parse tree transformations made by
			 * postgres' planner.
			 */
			planContext.plan = standard_planner(planContext.query,
												planContext.cursorOptions,
												planContext.boundParams);
			if (needsDistributedPlanning)
			{
				result = PlanDistributedStmt(&planContext, rangeTableList, rteIdCounter);
			}
			else if ((result = TryToDelegateFunctionCall(&planContext)) == NULL)
			{
				result = planContext.plan;
			}
		}
	}
	PG_CATCH();
	{
		PopPlannerRestrictionContext();

		PlannerLevel--;

		PG_RE_THROW();
	}
	PG_END_TRY();

	PlannerLevel--;

	/* remove the context from the context list */
	PopPlannerRestrictionContext();

	/*
	 * In some cases, for example; parameterized SQL functions, we may miss that
	 * there is a need for distributed planning. Such cases only become clear after
	 * standard_planner performs some modifications on parse tree. In such cases
	 * we will simply error out.
	 */
	if (!needsDistributedPlanning && NeedsDistributedPlanning(parse))
	{
		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						errmsg("cannot perform distributed planning on this "
							   "query because parameterized queries for SQL "
							   "functions referencing distributed tables are "
							   "not supported"),
						errhint("Consider using PL/pgSQL functions instead.")));
	}

	return result;
}


/*
 * ExtractRangeTableEntryList is a wrapper around ExtractRangeTableEntryWalker.
 * The function traverses the input query and returns all the range table
 * entries that are in the query tree.
 */
List *
ExtractRangeTableEntryList(Query *query)
{
	List *rangeTblList = NIL;

	ExtractRangeTableEntryWalker((Node *) query, &rangeTblList);

	return rangeTblList;
}


/*
 * NeedsDistributedPlanning returns true if the Citus extension is loaded and
 * the query contains a distributed table.
 *
 * This function allows queries containing local tables to pass through the
 * distributed planner. How to handle local tables is a decision that should
 * be made within the planner
 */
bool
NeedsDistributedPlanning(Query *query)
{
	List *allRTEs = NIL;
	CmdType commandType = query->commandType;

	if (!CitusHasBeenLoaded())
	{
		return false;
	}

	if (commandType != CMD_SELECT && commandType != CMD_INSERT &&
		commandType != CMD_UPDATE && commandType != CMD_DELETE)
	{
		return false;
	}

	ExtractRangeTableEntryWalker((Node *) query, &allRTEs);

	return ListContainsDistributedTableRTE(allRTEs);
}


/*
 * ListContainsDistributedTableRTE gets a list of range table entries
 * and returns true if there is at least one distributed relation range
 * table entry in the list.
 */
static bool
ListContainsDistributedTableRTE(List *rangeTableList)
{
	ListCell *rangeTableCell = NULL;

	foreach(rangeTableCell, rangeTableList)
	{
		RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell);

		if (rangeTableEntry->rtekind != RTE_RELATION)
		{
			continue;
		}

		if (IsDistributedTable(rangeTableEntry->relid))
		{
			return true;
		}
	}

	return false;
}


/*
 * AssignRTEIdentities function modifies query tree by adding RTE identities to the
 * RTE_RELATIONs.
 *
 * Please note that, we want to avoid modifying query tree as much as possible
 * because if PostgreSQL changes the way it uses modified fields, that may break
 * our logic.
 *
 * Returns the next id. This can be used to call on a rangeTableList that may've
 * been partially assigned. Should be set to 1 initially.
 */
static int
AssignRTEIdentities(List *rangeTableList, int rteIdCounter)
{
	ListCell *rangeTableCell = NULL;

	foreach(rangeTableCell, rangeTableList)
	{
		RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell);

		/*
		 * To be able to track individual RTEs through PostgreSQL's query
		 * planning, we need to be able to figure out whether an RTE is
		 * actually a copy of another, rather than a different one. We
		 * simply number the RTEs starting from 1.
		 *
		 * Note that we're only interested in RTE_RELATIONs and thus assigning
		 * identifiers to those RTEs only.
		 */
		if (rangeTableEntry->rtekind == RTE_RELATION &&
			rangeTableEntry->values_lists == NIL)
		{
			AssignRTEIdentity(rangeTableEntry, rteIdCounter++);
		}
	}

	return rteIdCounter;
}


/*
 * AdjustPartitioningForDistributedPlanning function modifies query tree by
 * changing inh flag and relkind of partitioned tables. We want Postgres to
 * treat partitioned tables as regular relations (i.e. we do not want to
 * expand them to their partitions) since it breaks Citus planning in different
 * ways. We let anything related to partitioning happen on the shards.
 *
 * Please note that, we want to avoid modifying query tree as much as possible
 * because if PostgreSQL changes the way it uses modified fields, that may break
 * our logic.
 */
static void
AdjustPartitioningForDistributedPlanning(List *rangeTableList,
										 bool setPartitionedTablesInherited)
{
	ListCell *rangeTableCell = NULL;

	foreach(rangeTableCell, rangeTableList)
	{
		RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell);

		/*
		 * We want Postgres to behave partitioned tables as regular relations
		 * (i.e. we do not want to expand them to their partitions). To do this
		 * we set each distributed partitioned table's inh flag to appropriate
		 * value before and after dropping to the standart_planner.
		 */
		if (rangeTableEntry->rtekind == RTE_RELATION &&
			IsDistributedTable(rangeTableEntry->relid) &&
			PartitionedTable(rangeTableEntry->relid))
		{
			rangeTableEntry->inh = setPartitionedTablesInherited;

			if (setPartitionedTablesInherited)
			{
				rangeTableEntry->relkind = RELKIND_PARTITIONED_TABLE;
			}
			else
			{
				rangeTableEntry->relkind = RELKIND_RELATION;
			}
		}
	}
}


/*
 * AssignRTEIdentity assigns the given rteIdentifier to the given range table
 * entry.
 *
 * To be able to track RTEs through postgres' query planning, which copies and
 * duplicate, and modifies them, we sometimes need to figure out whether two
 * RTEs are copies of the same original RTE. For that we, hackishly, use a
 * field normally unused in RTE_RELATION RTEs.
 *
 * The assigned identifier better be unique within a plantree.
 */
static void
AssignRTEIdentity(RangeTblEntry *rangeTableEntry, int rteIdentifier)
{
	Assert(rangeTableEntry->rtekind == RTE_RELATION);

	rangeTableEntry->values_lists = list_make1_int(rteIdentifier);
}


/* GetRTEIdentity returns the identity assigned with AssignRTEIdentity. */
int
GetRTEIdentity(RangeTblEntry *rte)
{
	Assert(rte->rtekind == RTE_RELATION);
	Assert(rte->values_lists != NIL);
	Assert(IsA(rte->values_lists, IntList));
	Assert(list_length(rte->values_lists) == 1);

	return linitial_int(rte->values_lists);
}


/*
 * IsModifyCommand returns true if the query performs modifications, false
 * otherwise.
 */
bool
IsModifyCommand(Query *query)
{
	CmdType commandType = query->commandType;

	if (commandType == CMD_INSERT || commandType == CMD_UPDATE ||
		commandType == CMD_DELETE)
	{
		return true;
	}

	return false;
}


/*
 * IsMultiTaskPlan returns true if job contains multiple tasks.
 */
bool
IsMultiTaskPlan(DistributedPlan *distributedPlan)
{
	Job *workerJob = distributedPlan->workerJob;

	if (workerJob != NULL && list_length(workerJob->taskList) > 1)
	{
		return true;
	}

	return false;
}


/*
 * IsUpdateOrDelete returns true if the query performs an update or delete.
 */
bool
IsUpdateOrDelete(Query *query)
{
	return query->commandType == CMD_UPDATE ||
		   query->commandType == CMD_DELETE;
}


/*
 * IsModifyDistributedPlan returns true if the multi plan performs modifications,
 * false otherwise.
 */
bool
IsModifyDistributedPlan(DistributedPlan *distributedPlan)
{
	return distributedPlan->modLevel > ROW_MODIFY_READONLY;
}


/*
 * PlanFastPathDistributedStmt creates a distributed planned statement using
 * the FastPathPlanner.
 */
static PlannedStmt *
PlanFastPathDistributedStmt(DistributedPlanningContext *planContext,
							Node *distributionKeyValue)
{
	FastPathRestrictionContext *fastPathContext =
		planContext->plannerRestrictionContext->fastPathRestrictionContext;

	planContext->plannerRestrictionContext->fastPathRestrictionContext->
	fastPathRouterQuery = true;

	if (distributionKeyValue == NULL)
	{
		/* nothing to record */
	}
	else if (IsA(distributionKeyValue, Const))
	{
		fastPathContext->distributionKeyValue = (Const *) distributionKeyValue;
	}
	else if (IsA(distributionKeyValue, Param))
	{
		fastPathContext->distributionKeyHasParam = true;
	}

	planContext->plan = FastPathPlanner(planContext->originalQuery, planContext->query,
										planContext->boundParams);

	return CreateDistributedPlannedStmt(planContext);
}


/*
 * PlanDistributedStmt creates a distributed planned statement using the PG
 * planner.
 */
static PlannedStmt *
PlanDistributedStmt(DistributedPlanningContext *planContext,
					List *rangeTableList,
					int rteIdCounter)
{
	/* may've inlined new relation rtes */
	rangeTableList = ExtractRangeTableEntryList(planContext->query);
	rteIdCounter = AssignRTEIdentities(rangeTableList, rteIdCounter);


	PlannedStmt *result = CreateDistributedPlannedStmt(planContext);

	bool setPartitionedTablesInherited = true;
	AdjustPartitioningForDistributedPlanning(rangeTableList,
											 setPartitionedTablesInherited);

	return result;
}


/*
 * DissuadePlannerFromUsingPlan try dissuade planner when planning a plan that
 * potentially failed due to unresolved prepared statement parameters.
 */
void
DissuadePlannerFromUsingPlan(PlannedStmt *plan)
{
	/*
	 * Arbitrarily high cost, but low enough that it can be added up
	 * without overflowing by choose_custom_plan().
	 */
	plan->planTree->total_cost = FLT_MAX / 100000000;
}


/*
 * CreateDistributedPlannedStmt encapsulates the logic needed to transform a particular
 * query into a distributed plan that is encapsulated by a PlannedStmt.
 */
static PlannedStmt *
CreateDistributedPlannedStmt(DistributedPlanningContext *planContext)
{
	uint64 planId = NextPlanId++;
	bool hasUnresolvedParams = false;
	JoinRestrictionContext *joinRestrictionContext =
		planContext->plannerRestrictionContext->joinRestrictionContext;

	PlannedStmt *resultPlan = NULL;

	if (QueryTreeContainsInlinableCTE(planContext->originalQuery))
	{
		/*
		 * Inlining CTEs as subqueries in the query can avoid recursively
		 * planning some (or all) of the CTEs. In other words, the inlined
		 * CTEs could become part of query pushdown planning, which is much
		 * more efficient than recursively planning. So, first try distributed
		 * planning on the inlined CTEs in the query tree.
		 *
		 * We also should fallback to distributed planning with non-inlined CTEs
		 * if the distributed planning fails with inlined CTEs, because recursively
		 * planning CTEs can provide full SQL coverage, although it might be slow.
		 */
		resultPlan = InlineCtesAndCreateDistributedPlannedStmt(planId, planContext);
		if (resultPlan != NULL)
		{
			return resultPlan;
		}
	}

	if (HasUnresolvedExternParamsWalker((Node *) planContext->originalQuery,
										planContext->boundParams))
	{
		hasUnresolvedParams = true;
	}

	planContext->plannerRestrictionContext->joinRestrictionContext =
		RemoveDuplicateJoinRestrictions(joinRestrictionContext);

	DistributedPlan *distributedPlan =
		CreateDistributedPlan(planId, planContext->originalQuery, planContext->query,
							  planContext->boundParams,
							  hasUnresolvedParams,
							  planContext->plannerRestrictionContext);

	/*
	 * If no plan was generated, prepare a generic error to be emitted.
	 * Normally this error message will never returned to the user, as it's
	 * usually due to unresolved prepared statement parameters - in that case
	 * the logic below will force a custom plan (i.e. with parameters bound to
	 * specific values) to be generated.  But sql (not plpgsql) functions
	 * unfortunately don't go through a codepath supporting custom plans - so
	 * we still need to have an error prepared.
	 */
	if (!distributedPlan)
	{
		/* currently always should have a more specific error otherwise */
		Assert(hasUnresolvedParams);
		distributedPlan = CitusMakeNode(DistributedPlan);
		distributedPlan->planningError =
			DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
						  "could not create distributed plan",
						  "Possibly this is caused by the use of parameters in SQL "
						  "functions, which is not supported in Citus.",
						  "Consider using PL/pgSQL functions instead.");
	}

	/*
	 * Error out if none of the planners resulted in a usable plan, unless the
	 * error was possibly triggered by missing parameters.  In that case we'll
	 * not error out here, but instead rely on postgres' custom plan logic.
	 * Postgres re-plans prepared statements the first five executions
	 * (i.e. it produces custom plans), after that the cost of a generic plan
	 * is compared with the average custom plan cost.  We support otherwise
	 * unsupported prepared statement parameters by assigning an exorbitant
	 * cost to the unsupported query.  That'll lead to the custom plan being
	 * chosen.  But for that to be possible we can't error out here, as
	 * otherwise that logic is never reached.
	 */
	if (distributedPlan->planningError && !hasUnresolvedParams)
	{
		RaiseDeferredError(distributedPlan->planningError, ERROR);
	}

	/* remember the plan's identifier for identifying subplans */
	distributedPlan->planId = planId;

	/* create final plan by combining local plan with distributed plan */
	resultPlan = FinalizePlan(planContext->plan, distributedPlan);

	/*
	 * As explained above, force planning costs to be unrealistically high if
	 * query planning failed (possibly) due to prepared statement parameters or
	 * if it is planned as a multi shard modify query.
	 */
	if ((distributedPlan->planningError ||
		 (IsUpdateOrDelete(planContext->originalQuery) && IsMultiTaskPlan(
			  distributedPlan))) &&
		hasUnresolvedParams)
	{
		DissuadePlannerFromUsingPlan(resultPlan);
	}

	return resultPlan;
}


/*
 * InlineCtesAndCreateDistributedPlannedStmt gets all the parameters required
 * for creating a distributed planned statement. The function is primarily a
 * wrapper on top of CreateDistributedPlannedStmt(), by first inlining the
 * CTEs and calling CreateDistributedPlannedStmt() in PG_TRY() block. The
 * function returns NULL if the planning fails on the query where eligable
 * CTEs are inlined.
 */
static PlannedStmt *
InlineCtesAndCreateDistributedPlannedStmt(uint64 planId,
										  DistributedPlanningContext *planContext)
{
	if (!EnableCTEInlining)
	{
		/*
		 * In Postgres 12+, users can adjust whether to inline/not inline CTEs
		 * by [NOT] MATERIALIZED keywords. However, in PG 11, that's not possible.
		 * So, with this we provide a way to prevent CTE inlining on Postgres 11.
		 *
		 * The main use-case for this is not to have divergent test outputs between
		 * PG 11 vs PG 12, so not very much intended for users.
		 */
		return NULL;
	}

	/*
	 * We'll inline the CTEs and try distributed planning, preserve the original
	 * query in case the planning fails and we fallback to recursive planning of
	 * CTEs.
	 */
	Query *copyOfOriginalQuery = copyObject(planContext->originalQuery);

	RecursivelyInlineCtesInQueryTree(copyOfOriginalQuery);

	/* after inlining, we shouldn't have any inlinable CTEs */
	Assert(!QueryTreeContainsInlinableCTE(copyOfOriginalQuery));

	#if PG_VERSION_NUM < 120000
	Query *query = planContext->query;

	/*
	 * We had to implement this hack because on Postgres11 and below, the originalQuery
	 * and the query would have significant differences in terms of CTEs where CTEs
	 * would not be inlined on the query (as standard_planner() wouldn't inline CTEs
	 * on PG 11 and below).
	 *
	 * Instead, we prefer to pass the inlined query to the distributed planning. We rely
	 * on the fact that the query includes subqueries, and it'd definitely go through
	 * query pushdown planning. During query pushdown planning, the only relevant query
	 * tree is the original query.
	 */
	planContext->query = copyObject(copyOfOriginalQuery);
#endif


	/* simply recurse into CreateDistributedPlannedStmt() in a PG_TRY() block */
	PlannedStmt *result = TryCreateDistributedPlannedStmt(planContext->plan,
														  copyOfOriginalQuery,
														  planContext->query,
														  planContext->boundParams,
														  planContext->
														  plannerRestrictionContext);

#if PG_VERSION_NUM < 120000

	/*
	 * Set back the original query, in case the planning failed and we need to go
	 * into distributed planning again.
	 */
	planContext->query = query;
#endif

	return result;
}


/*
 * TryCreateDistributedPlannedStmt is a wrapper around CreateDistributedPlannedStmt, simply
 * calling it in PG_TRY()/PG_CATCH() block. The function returns a PlannedStmt if the input
 * query can be planned by Citus. If not, the function returns NULL and generates a DEBUG4
 * message with the reason for the failure.
 */
static PlannedStmt *
TryCreateDistributedPlannedStmt(PlannedStmt *localPlan,
								Query *originalQuery,
								Query *query, ParamListInfo boundParams,
								PlannerRestrictionContext *plannerRestrictionContext)
{
	MemoryContext savedContext = CurrentMemoryContext;
	PlannedStmt *result = NULL;

	DistributedPlanningContext *planContext = palloc0(sizeof(DistributedPlanningContext));

	planContext->plan = localPlan;
	planContext->boundParams = boundParams;
	planContext->originalQuery = originalQuery;
	planContext->query = query;
	planContext->plannerRestrictionContext = plannerRestrictionContext;


	PG_TRY();
	{
		result = CreateDistributedPlannedStmt(planContext);
	}
	PG_CATCH();
	{
		MemoryContextSwitchTo(savedContext);
		ErrorData *edata = CopyErrorData();
		FlushErrorState();

		/* don't try to intercept PANIC or FATAL, let those breeze past us */
		if (edata->elevel != ERROR)
		{
			PG_RE_THROW();
		}

		ereport(DEBUG4, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						 errmsg("Planning after CTEs inlined failed with "
								"\nmessage: %s\ndetail: %s\nhint: %s",
								edata->message ? edata->message : "",
								edata->detail ? edata->detail : "",
								edata->hint ? edata->hint : "")));

		/* leave the error handling system */
		FreeErrorData(edata);

		result = NULL;
	}
	PG_END_TRY();

	return result;
}


/*
 * CreateDistributedPlan generates a distributed plan for a query.
 * It goes through 3 steps:
 *
 * 1. Try router planner
 * 2. Generate subplans for CTEs and complex subqueries
 *    - If any, go back to step 1 by calling itself recursively
 * 3. Logical planner
 */
static DistributedPlan *
CreateDistributedPlan(uint64 planId, Query *originalQuery, Query *query, ParamListInfo
					  boundParams, bool hasUnresolvedParams,
					  PlannerRestrictionContext *plannerRestrictionContext)
{
	DistributedPlan *distributedPlan = NULL;
	bool hasCtes = originalQuery->cteList != NIL;

	if (IsModifyCommand(originalQuery))
	{
		EnsureModificationsCanRun();

		Oid targetRelationId = ModifyQueryResultRelationId(query);
		EnsurePartitionTableNotReplicated(targetRelationId);

		if (InsertSelectIntoDistributedTable(originalQuery))
		{
			if (hasUnresolvedParams)
			{
				/*
				 * Unresolved parameters can cause performance regressions in
				 * INSERT...SELECT when the partition column is a parameter
				 * because we don't perform any additional pruning in the executor.
				 */
				return NULL;
			}

			distributedPlan =
				CreateInsertSelectPlan(planId, originalQuery, plannerRestrictionContext);
		}
		else
		{
			/* modifications are always routed through the same planner/executor */
			distributedPlan =
				CreateModifyPlan(originalQuery, query, plannerRestrictionContext);
		}

		/* the functions above always return a plan, possibly with an error */
		Assert(distributedPlan);

		if (distributedPlan->planningError == NULL)
		{
			FinalizeDistributedPlan(distributedPlan, originalQuery);

			return distributedPlan;
		}
		else
		{
			RaiseDeferredError(distributedPlan->planningError, DEBUG2);
		}
	}
	else
	{
		/*
		 * For select queries we, if router executor is enabled, first try to
		 * plan the query as a router query. If not supported, otherwise try
		 * the full blown plan/optimize/physical planning process needed to
		 * produce distributed query plans.
		 */

		distributedPlan = CreateRouterPlan(originalQuery, query,
										   plannerRestrictionContext);
		if (distributedPlan->planningError == NULL)
		{
			FinalizeDistributedPlan(distributedPlan, originalQuery);

			return distributedPlan;
		}
		else
		{
			/*
			 * For debugging it's useful to display why query was not
			 * router plannable.
			 */
			RaiseDeferredError(distributedPlan->planningError, DEBUG2);
		}
	}

	if (hasUnresolvedParams)
	{
		/*
		 * There are parameters that don't have a value in boundParams.
		 *
		 * The remainder of the planning logic cannot handle unbound
		 * parameters. We return a NULL plan, which will have an
		 * extremely high cost, such that postgres will replan with
		 * bound parameters.
		 */
		return NULL;
	}

	/* force evaluation of bound params */
	boundParams = copyParamList(boundParams);

	/*
	 * If there are parameters that do have a value in boundParams, replace
	 * them in the original query. This allows us to more easily cut the
	 * query into pieces (during recursive planning) or deparse parts of
	 * the query (during subquery pushdown planning).
	 */
	originalQuery = (Query *) ResolveExternalParams((Node *) originalQuery,
													boundParams);

	/*
	 * Plan subqueries and CTEs that cannot be pushed down by recursively
	 * calling the planner and return the resulting plans to subPlanList.
	 */
	List *subPlanList = GenerateSubplansForSubqueriesAndCTEs(planId, originalQuery,
															 plannerRestrictionContext);

	/*
	 * If subqueries were recursively planned then we need to replan the query
	 * to get the new planner restriction context and apply planner transformations.
	 *
	 * We could simplify this code if the logical planner was capable of dealing
	 * with an original query. In that case, we would only have to filter the
	 * planner restriction context.
	 *
	 * Note that we check both for subplans and whether the query had CTEs
	 * prior to calling GenerateSubplansForSubqueriesAndCTEs. If none of
	 * the CTEs are referenced then there are no subplans, but we still want
	 * to retry the router planner.
	 */
	if (list_length(subPlanList) > 0 || hasCtes)
	{
		Query *newQuery = copyObject(originalQuery);
		bool setPartitionedTablesInherited = false;
		PlannerRestrictionContext *currentPlannerRestrictionContext =
			CurrentPlannerRestrictionContext();

		/* reset the current planner restrictions context */
		ResetPlannerRestrictionContext(currentPlannerRestrictionContext);

		/*
		 * We force standard_planner to treat partitioned tables as regular tables
		 * by clearing the inh flag on RTEs. We already did this at the start of
		 * distributed_planner, but on a copy of the original query, so we need
		 * to do it again here.
		 */
		AdjustPartitioningForDistributedPlanning(ExtractRangeTableEntryList(newQuery),
												 setPartitionedTablesInherited);

		/*
		 * Some relations may have been removed from the query, but we can skip
		 * AssignRTEIdentities since we currently do not rely on RTE identities
		 * being contiguous.
		 */

		standard_planner(newQuery, 0, boundParams);

		/* overwrite the old transformed query with the new transformed query */
		*query = *newQuery;

		/* recurse into CreateDistributedPlan with subqueries/CTEs replaced */
		distributedPlan = CreateDistributedPlan(planId, originalQuery, query, NULL, false,
												plannerRestrictionContext);
		distributedPlan->subPlanList = subPlanList;

		FinalizeDistributedPlan(distributedPlan, originalQuery);

		return distributedPlan;
	}

	/*
	 * DML command returns a planning error, even after recursive planning. The
	 * logical planner cannot handle DML commands so return the plan with the
	 * error.
	 */
	if (IsModifyCommand(originalQuery))
	{
		FinalizeDistributedPlan(distributedPlan, originalQuery);

		return distributedPlan;
	}

	/*
	 * CTEs are stripped from the original query by RecursivelyPlanSubqueriesAndCTEs.
	 * If we get here and there are still CTEs that means that none of the CTEs are
	 * referenced. We therefore also strip the CTEs from the rewritten query.
	 */
	query->cteList = NIL;
	Assert(originalQuery->cteList == NIL);

	MultiTreeRoot *logicalPlan = MultiLogicalPlanCreate(originalQuery, query,
														plannerRestrictionContext);
	MultiLogicalPlanOptimize(logicalPlan);

	/*
	 * This check is here to make it likely that all node types used in
	 * Citus are dumpable. Explain can dump logical and physical plans
	 * using the extended outfuncs infrastructure, but it's infeasible to
	 * test most plans. MultiQueryContainerNode always serializes the
	 * physical plan, so there's no need to check that separately
	 */
	CheckNodeIsDumpable((Node *) logicalPlan);

	/* Create the physical plan */
	distributedPlan = CreatePhysicalDistributedPlan(logicalPlan,
													plannerRestrictionContext);

	/* distributed plan currently should always succeed or error out */
	Assert(distributedPlan && distributedPlan->planningError == NULL);

	FinalizeDistributedPlan(distributedPlan, originalQuery);

	return distributedPlan;
}


/*
 * FinalizeDistributedPlan is the final step of distributed planning. The function
 * currently only implements some optimizations for intermediate result(s) pruning.
 */
static void
FinalizeDistributedPlan(DistributedPlan *plan, Query *originalQuery)
{
	/*
	 * Fast path queries, we cannot have any subplans by their definition,
	 * so skip expensive traversals.
	 */
	if (!plan->fastPathRouterPlan)
	{
		RecordSubPlansUsedInPlan(plan, originalQuery);
	}
}


/*
 * RecordSubPlansUsedInPlan gets a distributed plan a queryTree, and
 * updates the usedSubPlanNodeList of the distributed plan.
 *
 * The function simply pulls all the subPlans that are used in the queryTree
 * with one exception: subPlans in the HAVING clause. The reason is explained
 * in the function.
 */
static void
RecordSubPlansUsedInPlan(DistributedPlan *plan, Query *originalQuery)
{
	Node *havingQual = originalQuery->havingQual;

	/* temporarily set to NULL, we're going to restore before the function returns */
	originalQuery->havingQual = NULL;

	/*
	 * Mark the subplans as needed on remote side. Note that this decision is revisited
	 * on execution, when the query only consists of intermediate results.
	 */
	List *subplansExceptHaving = FindSubPlansUsedInNode((Node *) originalQuery);
	UpdateUsedPlanListLocation(subplansExceptHaving, SUBPLAN_ACCESS_REMOTE);

	/* do the same for HAVING part of the query */
	List *subplansInHaving = NIL;
	if (originalQuery->hasSubLinks &&
		FindNodeCheck(havingQual, IsNodeSubquery))
	{
		subplansInHaving = FindSubPlansUsedInNode(havingQual);
		if (plan->masterQuery)
		{
			/*
			 * If we have the master query, we're sure that the result is needed locally.
			 * Otherwise, such as router queries, the plan may not be required locally.
			 * Note that if the query consists of only intermediate results, the executor
			 * may still prefer to write locally.
			 *
			 * If any of the subplansInHaving is used in other parts of the query,
			 * we'll later merge those subPlans and send to remote.
			 */
			UpdateUsedPlanListLocation(subplansInHaving, SUBPLAN_ACCESS_LOCAL);
		}
		else
		{
			UpdateUsedPlanListLocation(subplansInHaving, SUBPLAN_ACCESS_REMOTE);
		}
	}

	/* set back the havingQual and the calculated subplans */
	originalQuery->havingQual = havingQual;

	/* merge the used subplans */
	plan->usedSubPlanNodeList =
		MergeUsedSubPlanLists(subplansExceptHaving, subplansInHaving);
}


/*
 * EnsurePartitionTableNotReplicated errors out if the infput relation is
 * a partition table and the table has a replication factor greater than
 * one.
 *
 * If the table is not a partition or replication factor is 1, the function
 * becomes a no-op.
 */
void
EnsurePartitionTableNotReplicated(Oid relationId)
{
	DeferredErrorMessage *deferredError =
		DeferErrorIfPartitionTableNotSingleReplicated(relationId);
	if (deferredError != NULL)
	{
		RaiseDeferredError(deferredError, ERROR);
	}
}


/*
 * DeferErrorIfPartitionTableNotSingleReplicated defers error if the input relation
 * is a partition table with replication factor > 1. Otherwise, the function returns
 * NULL.
 */
static DeferredErrorMessage *
DeferErrorIfPartitionTableNotSingleReplicated(Oid relationId)
{
	if (PartitionTableNoLock(relationId) && !SingleReplicatedTable(relationId))
	{
		Oid parentOid = PartitionParentOid(relationId);
		char *parentRelationTest = get_rel_name(parentOid);
		StringInfo errorHint = makeStringInfo();

		appendStringInfo(errorHint, "Run the query on the parent table "
									"\"%s\" instead.", parentRelationTest);

		return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
							 "modifications on partitions when replication "
							 "factor is greater than 1 is not supported",
							 NULL, errorHint->data);
	}

	return NULL;
}


/*
 * ResolveExternalParams replaces the external parameters that appears
 * in the query with the corresponding entries in the boundParams.
 *
 * Note that this function is inspired by eval_const_expr() on Postgres.
 * We cannot use that function because it requires access to PlannerInfo.
 */
Node *
ResolveExternalParams(Node *inputNode, ParamListInfo boundParams)
{
	/* consider resolving external parameters only when boundParams exists */
	if (!boundParams)
	{
		return inputNode;
	}

	if (inputNode == NULL)
	{
		return NULL;
	}

	if (IsA(inputNode, Param))
	{
		Param *paramToProcess = (Param *) inputNode;
		int numberOfParameters = boundParams->numParams;
		int parameterId = paramToProcess->paramid;
		int16 typeLength = 0;
		bool typeByValue = false;
		Datum constValue = 0;

		if (paramToProcess->paramkind != PARAM_EXTERN)
		{
			return inputNode;
		}

		if (parameterId < 0)
		{
			return inputNode;
		}

		/* parameterId starts from 1 */
		int parameterIndex = parameterId - 1;
		if (parameterIndex >= numberOfParameters)
		{
			return inputNode;
		}

		ParamExternData *correspondingParameterData =
			&boundParams->params[parameterIndex];

		if (!(correspondingParameterData->pflags & PARAM_FLAG_CONST))
		{
			return inputNode;
		}

		get_typlenbyval(paramToProcess->paramtype, &typeLength, &typeByValue);

		bool paramIsNull = correspondingParameterData->isnull;
		if (paramIsNull)
		{
			constValue = 0;
		}
		else if (typeByValue)
		{
			constValue = correspondingParameterData->value;
		}
		else
		{
			/*
			 * Out of paranoia ensure that datum lives long enough,
			 * although bind params currently should always live
			 * long enough.
			 */
			constValue = datumCopy(correspondingParameterData->value, typeByValue,
								   typeLength);
		}

		return (Node *) makeConst(paramToProcess->paramtype, paramToProcess->paramtypmod,
								  paramToProcess->paramcollid, typeLength, constValue,
								  paramIsNull, typeByValue);
	}
	else if (IsA(inputNode, Query))
	{
		return (Node *) query_tree_mutator((Query *) inputNode, ResolveExternalParams,
										   boundParams, 0);
	}

	return expression_tree_mutator(inputNode, ResolveExternalParams, boundParams);
}


/*
 * GetDistributedPlan returns the associated DistributedPlan for a CustomScan.
 *
 * Callers should only read from the returned data structure, since it may be
 * the plan of a prepared statement and may therefore be reused.
 */
DistributedPlan *
GetDistributedPlan(CustomScan *customScan)
{
	Assert(list_length(customScan->custom_private) == 1);

	Node *node = (Node *) linitial(customScan->custom_private);
	Assert(CitusIsA(node, DistributedPlan));

	CheckNodeCopyAndSerialization(node);

	DistributedPlan *distributedPlan = (DistributedPlan *) node;

	return distributedPlan;
}


/*
 * FinalizePlan combines local plan with distributed plan and creates a plan
 * which can be run by the PostgreSQL executor.
 */
PlannedStmt *
FinalizePlan(PlannedStmt *localPlan, DistributedPlan *distributedPlan)
{
	PlannedStmt *finalPlan = NULL;
	CustomScan *customScan = makeNode(CustomScan);
	MultiExecutorType executorType = MULTI_EXECUTOR_INVALID_FIRST;

	if (!distributedPlan->planningError)
	{
		executorType = JobExecutorType(distributedPlan);
	}

	switch (executorType)
	{
		case MULTI_EXECUTOR_ADAPTIVE:
		{
			customScan->methods = &AdaptiveExecutorCustomScanMethods;
			break;
		}

		case MULTI_EXECUTOR_TASK_TRACKER:
		{
			customScan->methods = &TaskTrackerCustomScanMethods;
			break;
		}

		case MULTI_EXECUTOR_COORDINATOR_INSERT_SELECT:
		{
			customScan->methods = &CoordinatorInsertSelectCustomScanMethods;
			break;
		}

		default:
		{
			customScan->methods = &DelayedErrorCustomScanMethods;
			break;
		}
	}

	if (IsMultiTaskPlan(distributedPlan))
	{
		/* if it is not a single task executable plan, inform user according to the log level */
		if (MultiTaskQueryLogLevel != MULTI_TASK_QUERY_INFO_OFF)
		{
			ereport(MultiTaskQueryLogLevel, (errmsg(
												 "multi-task query about to be executed"),
											 errhint(
												 "Queries are split to multiple tasks "
												 "if they have to be split into several"
												 " queries on the workers.")));
		}
	}

	distributedPlan->relationIdList = localPlan->relationOids;
	distributedPlan->queryId = localPlan->queryId;

	Node *distributedPlanData = (Node *) distributedPlan;

	customScan->custom_private = list_make1(distributedPlanData);
	customScan->flags = CUSTOMPATH_SUPPORT_BACKWARD_SCAN;

	if (distributedPlan->masterQuery)
	{
		finalPlan = FinalizeNonRouterPlan(localPlan, distributedPlan, customScan);
	}
	else
	{
		finalPlan = FinalizeRouterPlan(localPlan, customScan);
	}

	return finalPlan;
}


/*
 * FinalizeNonRouterPlan gets the distributed custom scan plan, and creates the
 * final master select plan on the top of this distributed plan for adaptive
 * and task-tracker executors.
 */
static PlannedStmt *
FinalizeNonRouterPlan(PlannedStmt *localPlan, DistributedPlan *distributedPlan,
					  CustomScan *customScan)
{
	PlannedStmt *finalPlan = MasterNodeSelectPlan(distributedPlan, customScan);
	finalPlan->queryId = localPlan->queryId;
	finalPlan->utilityStmt = localPlan->utilityStmt;

	/* add original range table list for access permission checks */
	finalPlan->rtable = list_concat(finalPlan->rtable, localPlan->rtable);

	return finalPlan;
}


/*
 * FinalizeRouterPlan gets a CustomScan node which already wrapped distributed
 * part of a router plan and sets it as the direct child of the router plan
 * because we don't run any query on master node for router executable queries.
 * Here, we also rebuild the column list to read from the remote scan.
 */
static PlannedStmt *
FinalizeRouterPlan(PlannedStmt *localPlan, CustomScan *customScan)
{
	List *columnNameList = NIL;

	customScan->custom_scan_tlist =
		makeCustomScanTargetlistFromExistingTargetList(localPlan->planTree->targetlist);
	customScan->scan.plan.targetlist =
		makeTargetListFromCustomScanList(customScan->custom_scan_tlist);

	/* extract the column names from the final targetlist*/
	TargetEntry *targetEntry = NULL;
	foreach_ptr(targetEntry, customScan->scan.plan.targetlist)
	{
		Value *columnName = makeString(targetEntry->resname);
		columnNameList = lappend(columnNameList, columnName);
	}

	PlannedStmt *routerPlan = makeNode(PlannedStmt);
	routerPlan->planTree = (Plan *) customScan;

	RangeTblEntry *remoteScanRangeTableEntry = RemoteScanRangeTableEntry(columnNameList);
	routerPlan->rtable = list_make1(remoteScanRangeTableEntry);

	/* add original range table list for access permission checks */
	routerPlan->rtable = list_concat(routerPlan->rtable, localPlan->rtable);

	routerPlan->canSetTag = true;
	routerPlan->relationOids = NIL;

	routerPlan->queryId = localPlan->queryId;
	routerPlan->utilityStmt = localPlan->utilityStmt;
	routerPlan->commandType = localPlan->commandType;
	routerPlan->hasReturning = localPlan->hasReturning;

	return routerPlan;
}


/*
 * makeCustomScanTargetlistFromExistingTargetList rebuilds the targetlist from the remote
 * query into a list that can be used as the custom_scan_tlist for our Citus Custom Scan.
 */
static List *
makeCustomScanTargetlistFromExistingTargetList(List *existingTargetlist)
{
	List *custom_scan_tlist = NIL;

	/* we will have custom scan range table entry as the first one in the list */
	const int customScanRangeTableIndex = 1;

	/* build a targetlist to read from the custom scan output */
	TargetEntry *targetEntry = NULL;
	foreach_ptr(targetEntry, existingTargetlist)
	{
		Assert(IsA(targetEntry, TargetEntry));

		/*
		 * This is unlikely to be hit because we would not need resjunk stuff
		 * at the toplevel of a router query - all things needing it have been
		 * pushed down.
		 */
		if (targetEntry->resjunk)
		{
			continue;
		}

		/* build target entry pointing to remote scan range table entry */
		Var *newVar = makeVarFromTargetEntry(customScanRangeTableIndex, targetEntry);

		if (newVar->vartype == RECORDOID || newVar->vartype == RECORDARRAYOID)
		{
			/*
			 * Add the anonymous composite type to the type cache and store
			 * the key in vartypmod. Eventually this makes its way into the
			 * TupleDesc used by the executor, which uses it to parse the
			 * query results from the workers in BuildTupleFromCStrings.
			 */
			newVar->vartypmod = BlessRecordExpression(targetEntry->expr);
		}

		TargetEntry *newTargetEntry = flatCopyTargetEntry(targetEntry);
		newTargetEntry->expr = (Expr *) newVar;
		custom_scan_tlist = lappend(custom_scan_tlist, newTargetEntry);
	}

	return custom_scan_tlist;
}


/*
 * makeTargetListFromCustomScanList based on a custom_scan_tlist create the target list to
 * use on the Citus Custom Scan Node. The targetlist differs from the custom_scan_tlist in
 * a way that the expressions in the targetlist all are references to the index (resno) in
 * the custom_scan_tlist in their varattno while the varno is replaced with INDEX_VAR
 * instead of the range table entry index.
 */
static List *
makeTargetListFromCustomScanList(List *custom_scan_tlist)
{
	List *targetList = NIL;
	TargetEntry *targetEntry = NULL;
	int resno = 1;
	foreach_ptr(targetEntry, custom_scan_tlist)
	{
		/*
		 * INDEX_VAR is used to reference back to the TargetEntry in custom_scan_tlist by
		 * its resno (index)
		 */
		Var *newVar = makeVarFromTargetEntry(INDEX_VAR, targetEntry);
		TargetEntry *newTargetEntry = makeTargetEntry((Expr *) newVar, resno,
													  targetEntry->resname,
													  targetEntry->resjunk);
		targetList = lappend(targetList, newTargetEntry);
		resno++;
	}
	return targetList;
}


/*
 * BlessRecordExpression ensures we can parse an anonymous composite type on the
 * target list of a query that is sent to the worker.
 *
 * We cannot normally parse record types coming from the workers unless we
 * "bless" the tuple descriptor, which adds a transient type to the type cache
 * and assigns it a type mod value, which is the key in the type cache.
 */
int32
BlessRecordExpression(Expr *expr)
{
	int32 typeMod = -1;

	if (IsA(expr, FuncExpr) || IsA(expr, OpExpr))
	{
		/*
		 * Handle functions that return records on the target
		 * list, e.g. SELECT function_call(1,2);
		 */
		Oid resultTypeId = InvalidOid;
		TupleDesc resultTupleDesc = NULL;

		/* get_expr_result_type blesses the tuple descriptor */
		TypeFuncClass typeClass = get_expr_result_type((Node *) expr, &resultTypeId,
													   &resultTupleDesc);

		if (typeClass == TYPEFUNC_COMPOSITE)
		{
			typeMod = resultTupleDesc->tdtypmod;
		}
	}
	else if (IsA(expr, RowExpr))
	{
		/*
		 * Handle row expressions, e.g. SELECT (1,2);
		 */
		RowExpr *rowExpr = (RowExpr *) expr;
		TupleDesc rowTupleDesc = NULL;
		ListCell *argCell = NULL;
		int currentResno = 1;

#if PG_VERSION_NUM >= 120000
		rowTupleDesc = CreateTemplateTupleDesc(list_length(rowExpr->args));
#else
		rowTupleDesc = CreateTemplateTupleDesc(list_length(rowExpr->args), false);
#endif

		foreach(argCell, rowExpr->args)
		{
			Node *rowArg = (Node *) lfirst(argCell);
			Oid rowArgTypeId = exprType(rowArg);
			int rowArgTypeMod = exprTypmod(rowArg);

			if (rowArgTypeId == RECORDOID || rowArgTypeId == RECORDARRAYOID)
			{
				/* ensure nested rows are blessed as well */
				rowArgTypeMod = BlessRecordExpression((Expr *) rowArg);
			}

			TupleDescInitEntry(rowTupleDesc, currentResno, NULL,
							   rowArgTypeId, rowArgTypeMod, 0);
			TupleDescInitEntryCollation(rowTupleDesc, currentResno,
										exprCollation(rowArg));

			currentResno++;
		}

		BlessTupleDesc(rowTupleDesc);

		typeMod = rowTupleDesc->tdtypmod;
	}
	else if (IsA(expr, ArrayExpr))
	{
		/*
		 * Handle row array expressions, e.g. SELECT ARRAY[(1,2)];
		 * Postgres allows ARRAY[(1,2),(1,2,3)]. We do not.
		 */
		ArrayExpr *arrayExpr = (ArrayExpr *) expr;

		typeMod = BlessRecordExpressionList(arrayExpr->elements);
	}
	else if (IsA(expr, NullIfExpr))
	{
		NullIfExpr *nullIfExpr = (NullIfExpr *) expr;

		typeMod = BlessRecordExpressionList(nullIfExpr->args);
	}
	else if (IsA(expr, MinMaxExpr))
	{
		MinMaxExpr *minMaxExpr = (MinMaxExpr *) expr;

		typeMod = BlessRecordExpressionList(minMaxExpr->args);
	}
	else if (IsA(expr, CoalesceExpr))
	{
		CoalesceExpr *coalesceExpr = (CoalesceExpr *) expr;

		typeMod = BlessRecordExpressionList(coalesceExpr->args);
	}
	else if (IsA(expr, CaseExpr))
	{
		CaseExpr *caseExpr = (CaseExpr *) expr;
		List *results = NIL;
		ListCell *whenCell = NULL;

		foreach(whenCell, caseExpr->args)
		{
			CaseWhen *whenArg = (CaseWhen *) lfirst(whenCell);

			results = lappend(results, whenArg->result);
		}

		if (caseExpr->defresult != NULL)
		{
			results = lappend(results, caseExpr->defresult);
		}

		typeMod = BlessRecordExpressionList(results);
	}

	return typeMod;
}


/*
 * BlessRecordExpressionList maps BlessRecordExpression over a list.
 * Returns typmod of all expressions, or -1 if they are not all the same.
 * Ignores expressions with a typmod of -1.
 */
static int32
BlessRecordExpressionList(List *exprs)
{
	int32 finalTypeMod = -1;
	ListCell *exprCell = NULL;
	foreach(exprCell, exprs)
	{
		Node *exprArg = (Node *) lfirst(exprCell);
		int32 exprTypeMod = BlessRecordExpression((Expr *) exprArg);

		if (exprTypeMod == -1)
		{
			continue;
		}
		else if (finalTypeMod == -1)
		{
			finalTypeMod = exprTypeMod;
		}
		else if (finalTypeMod != exprTypeMod)
		{
			return -1;
		}
	}
	return finalTypeMod;
}


/*
 * RemoteScanRangeTableEntry creates a range table entry from given column name
 * list to represent a remote scan.
 */
RangeTblEntry *
RemoteScanRangeTableEntry(List *columnNameList)
{
	RangeTblEntry *remoteScanRangeTableEntry = makeNode(RangeTblEntry);

	/* we use RTE_VALUES for custom scan because we can't look up relation */
	remoteScanRangeTableEntry->rtekind = RTE_VALUES;
	remoteScanRangeTableEntry->eref = makeAlias("remote_scan", columnNameList);
	remoteScanRangeTableEntry->inh = false;
	remoteScanRangeTableEntry->inFromCl = true;

	return remoteScanRangeTableEntry;
}


/*
 * CheckNodeIsDumpable checks that the passed node can be dumped using
 * nodeToString(). As this checks is expensive, it's only active when
 * assertions are enabled.
 */
static void
CheckNodeIsDumpable(Node *node)
{
#ifdef USE_ASSERT_CHECKING
	char *out = nodeToString(node);
	pfree(out);
#endif
}


/*
 * CheckNodeCopyAndSerialization checks copy/dump/read functions
 * for nodes and returns copy of the input.
 *
 * It is only active when assertions are enabled, otherwise it returns
 * the input directly. We use this to confirm that our serialization
 * and copy logic produces the correct plan during regression tests.
 *
 * It does not check string equality on node dumps due to differences
 * in some Postgres types.
 */
static Node *
CheckNodeCopyAndSerialization(Node *node)
{
#ifdef USE_ASSERT_CHECKING
	char *out = nodeToString(node);
	Node *nodeCopy = copyObject(node);
	char *outCopy = nodeToString(nodeCopy);

	pfree(out);
	pfree(outCopy);

	return nodeCopy;
#else
	return node;
#endif
}


/*
 * multi_join_restriction_hook is a hook called by postgresql standard planner
 * to notify us about various planning information regarding joins. We use
 * it to learn about the joining column.
 */
void
multi_join_restriction_hook(PlannerInfo *root,
							RelOptInfo *joinrel,
							RelOptInfo *outerrel,
							RelOptInfo *innerrel,
							JoinType jointype,
							JoinPathExtraData *extra)
{
	/*
	 * Use a memory context that's guaranteed to live long enough, could be
	 * called in a more shortly lived one (e.g. with GEQO).
	 */
	PlannerRestrictionContext *plannerRestrictionContext =
		CurrentPlannerRestrictionContext();
	MemoryContext restrictionsMemoryContext = plannerRestrictionContext->memoryContext;
	MemoryContext oldMemoryContext = MemoryContextSwitchTo(restrictionsMemoryContext);

	/*
	 * We create a copy of restrictInfoList because it may be created in a memory
	 * context which will be deleted when we still need it, thus we create a copy
	 * of it in our memory context.
	 */
	List *restrictInfoList = copyObject(extra->restrictlist);

	JoinRestrictionContext *joinRestrictionContext =
		plannerRestrictionContext->joinRestrictionContext;
	Assert(joinRestrictionContext != NULL);

	JoinRestriction *joinRestriction = palloc0(sizeof(JoinRestriction));
	joinRestriction->joinType = jointype;
	joinRestriction->joinRestrictInfoList = restrictInfoList;
	joinRestriction->plannerInfo = root;
	joinRestriction->innerrel = innerrel;
	joinRestriction->outerrel = outerrel;

	joinRestrictionContext->joinRestrictionList =
		lappend(joinRestrictionContext->joinRestrictionList, joinRestriction);

	/*
	 * Keep track if we received any semi joins here. If we didn't we can
	 * later safely convert any semi joins in the rewritten query to inner
	 * joins.
	 */
	plannerRestrictionContext->hasSemiJoin = plannerRestrictionContext->hasSemiJoin ||
											 extra->sjinfo->jointype == JOIN_SEMI;

	MemoryContextSwitchTo(oldMemoryContext);
}


/*
 * multi_relation_restriction_hook is a hook called by postgresql standard planner
 * to notify us about various planning information regarding a relation. We use
 * it to retrieve restrictions on relations.
 */
void
multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo,
								Index restrictionIndex, RangeTblEntry *rte)
{
	DistTableCacheEntry *cacheEntry = NULL;

	if (ReplaceCitusExtraDataContainer && IsCitusExtraDataContainerRelation(rte))
	{
		/*
		 * We got here by planning the query part that needs to be executed on the query
		 * coordinator node.
		 * We have verified the occurrence of the citus_extra_datacontainer function
		 * encoding the remote scan we plan to execute here. We will replace all paths
		 * with a path describing our custom scan.
		 */
		Path *path = CreateCitusCustomScanPath(root, relOptInfo, restrictionIndex, rte,
											   ReplaceCitusExtraDataContainerWithCustomScan);

		/* replace all paths with our custom scan and recalculate cheapest */
		relOptInfo->pathlist = list_make1(path);
		set_cheapest(relOptInfo);

		return;
	}

	AdjustReadIntermediateResultCost(rte, relOptInfo);
	AdjustReadIntermediateResultArrayCost(rte, relOptInfo);

	if (rte->rtekind != RTE_RELATION)
	{
		return;
	}

	/*
	 * Use a memory context that's guaranteed to live long enough, could be
	 * called in a more shortly lived one (e.g. with GEQO).
	 */
	PlannerRestrictionContext *plannerRestrictionContext =
		CurrentPlannerRestrictionContext();
	MemoryContext restrictionsMemoryContext = plannerRestrictionContext->memoryContext;
	MemoryContext oldMemoryContext = MemoryContextSwitchTo(restrictionsMemoryContext);

	bool distributedTable = IsDistributedTable(rte->relid);
	bool localTable = !distributedTable;

	RelationRestriction *relationRestriction = palloc0(sizeof(RelationRestriction));
	relationRestriction->index = restrictionIndex;
	relationRestriction->relationId = rte->relid;
	relationRestriction->rte = rte;
	relationRestriction->relOptInfo = relOptInfo;
	relationRestriction->distributedRelation = distributedTable;
	relationRestriction->plannerInfo = root;
	relationRestriction->prunedShardIntervalList = NIL;

	/* see comments on GetVarFromAssignedParam() */
	relationRestriction->outerPlanParamsList = OuterPlanParamsList(root);

	RelationRestrictionContext *relationRestrictionContext =
		plannerRestrictionContext->relationRestrictionContext;
	relationRestrictionContext->hasDistributedRelation |= distributedTable;
	relationRestrictionContext->hasLocalRelation |= localTable;

	/*
	 * We're also keeping track of whether all participant
	 * tables are reference tables.
	 */
	if (distributedTable)
	{
		cacheEntry = DistributedTableCacheEntry(rte->relid);

		relationRestrictionContext->allReferenceTables &=
			(cacheEntry->partitionMethod == DISTRIBUTE_BY_NONE);
	}

	relationRestrictionContext->relationRestrictionList =
		lappend(relationRestrictionContext->relationRestrictionList, relationRestriction);

	MemoryContextSwitchTo(oldMemoryContext);
}


/*
 * AdjustReadIntermediateResultCost adjusts the row count and total cost
 * of a read_intermediate_result call based on the file size.
 */
static void
AdjustReadIntermediateResultCost(RangeTblEntry *rangeTableEntry, RelOptInfo *relOptInfo)
{
	if (rangeTableEntry->rtekind != RTE_FUNCTION ||
		list_length(rangeTableEntry->functions) != 1)
	{
		/* avoid more expensive checks below for non-functions */
		return;
	}

	if (!CitusHasBeenLoaded() || !CheckCitusVersion(DEBUG5))
	{
		/* read_intermediate_result may not exist */
		return;
	}

	if (!ContainsReadIntermediateResultFunction((Node *) rangeTableEntry->functions))
	{
		return;
	}

	RangeTblFunction *rangeTableFunction = (RangeTblFunction *) linitial(
		rangeTableEntry->functions);
	FuncExpr *funcExpression = (FuncExpr *) rangeTableFunction->funcexpr;
	Const *resultIdConst = (Const *) linitial(funcExpression->args);
	if (!IsA(resultIdConst, Const))
	{
		/* not sure how to interpret non-const */
		return;
	}

	Datum resultIdDatum = resultIdConst->constvalue;

	Const *resultFormatConst = (Const *) lsecond(funcExpression->args);
	if (!IsA(resultFormatConst, Const))
	{
		/* not sure how to interpret non-const */
		return;
	}

	AdjustReadIntermediateResultsCostInternal(relOptInfo,
											  rangeTableFunction->funccoltypes,
											  1, &resultIdDatum, resultFormatConst);
}


/*
 * AdjustReadIntermediateResultArrayCost adjusts the row count and total cost
 * of a read_intermediate_results(resultIds, format) call based on the file size.
 */
static void
AdjustReadIntermediateResultArrayCost(RangeTblEntry *rangeTableEntry,
									  RelOptInfo *relOptInfo)
{
	Datum *resultIdArray = NULL;
	int resultIdCount = 0;

	if (rangeTableEntry->rtekind != RTE_FUNCTION ||
		list_length(rangeTableEntry->functions) != 1)
	{
		/* avoid more expensive checks below for non-functions */
		return;
	}

	if (!CitusHasBeenLoaded() || !CheckCitusVersion(DEBUG5))
	{
		/* read_intermediate_result may not exist */
		return;
	}

	if (!ContainsReadIntermediateResultArrayFunction((Node *) rangeTableEntry->functions))
	{
		return;
	}

	RangeTblFunction *rangeTableFunction =
		(RangeTblFunction *) linitial(rangeTableEntry->functions);
	FuncExpr *funcExpression = (FuncExpr *) rangeTableFunction->funcexpr;
	Const *resultIdConst = (Const *) linitial(funcExpression->args);
	if (!IsA(resultIdConst, Const))
	{
		/* not sure how to interpret non-const */
		return;
	}

	Datum resultIdArrayDatum = resultIdConst->constvalue;
	deconstruct_array(DatumGetArrayTypeP(resultIdArrayDatum), TEXTOID, -1, false,
					  'i', &resultIdArray, NULL, &resultIdCount);

	Const *resultFormatConst = (Const *) lsecond(funcExpression->args);
	if (!IsA(resultFormatConst, Const))
	{
		/* not sure how to interpret non-const */
		return;
	}

	AdjustReadIntermediateResultsCostInternal(relOptInfo,
											  rangeTableFunction->funccoltypes,
											  resultIdCount, resultIdArray,
											  resultFormatConst);
}


/*
 * AdjustReadIntermediateResultsCostInternal adjusts the row count and total cost
 * of reading intermediate results based on file sizes.
 */
static void
AdjustReadIntermediateResultsCostInternal(RelOptInfo *relOptInfo, List *columnTypes,
										  int resultIdCount, Datum *resultIds,
										  Const *resultFormatConst)
{
	PathTarget *reltarget = relOptInfo->reltarget;
	List *pathList = relOptInfo->pathlist;
	Path *path = NULL;
	double rowCost = 0.;
	double rowSizeEstimate = 0;
	double rowCountEstimate = 0.;
	double ioCost = 0.;
#if PG_VERSION_NUM >= 120000
	QualCost funcCost = { 0., 0. };
#else
	double funcCost = 0.;
#endif
	int64 totalResultSize = 0;
	ListCell *typeCell = NULL;

	Datum resultFormatDatum = resultFormatConst->constvalue;
	Oid resultFormatId = DatumGetObjectId(resultFormatDatum);
	bool binaryFormat = (resultFormatId == BinaryCopyFormatId());

	for (int index = 0; index < resultIdCount; index++)
	{
		char *resultId = TextDatumGetCString(resultIds[index]);
		int64 resultSize = IntermediateResultSize(resultId);
		if (resultSize < 0)
		{
			/* result does not exist, will probably error out later on */
			return;
		}

		if (binaryFormat)
		{
			/* subtract 11-byte signature + 8 byte header + 2-byte footer */
			totalResultSize -= 21;
		}

		totalResultSize += resultSize;
	}

	/* start with the cost of evaluating quals */
	rowCost += relOptInfo->baserestrictcost.per_tuple;

	/* postgres' estimate for the width of the rows */
	rowSizeEstimate += reltarget->width;

	/* add 2 bytes for column count (binary) or line separator (text) */
	rowSizeEstimate += 2;

	foreach(typeCell, columnTypes)
	{
		Oid columnTypeId = lfirst_oid(typeCell);
		Oid inputFunctionId = InvalidOid;
		Oid typeIOParam = InvalidOid;

		if (binaryFormat)
		{
			getTypeBinaryInputInfo(columnTypeId, &inputFunctionId, &typeIOParam);

			/* binary format: 4 bytes for field size */
			rowSizeEstimate += 4;
		}
		else
		{
			getTypeInputInfo(columnTypeId, &inputFunctionId, &typeIOParam);

			/* text format: 1 byte for tab separator */
			rowSizeEstimate += 1;
		}


		/* add the cost of parsing a column */
#if PG_VERSION_NUM >= 120000
		add_function_cost(NULL, inputFunctionId, NULL, &funcCost);
#else
		funcCost += get_func_cost(inputFunctionId);
#endif
	}
#if PG_VERSION_NUM >= 120000
	rowCost += funcCost.per_tuple;
#else
	rowCost += funcCost * cpu_operator_cost;
#endif

	/* estimate the number of rows based on the file size and estimated row size */
	rowCountEstimate = Max(1, (double) totalResultSize / rowSizeEstimate);

	/* cost of reading the data */
	ioCost = seq_page_cost * totalResultSize / BLCKSZ;

	Assert(pathList != NIL);

	/* tell the planner about the cost and row count of the function */
	path = (Path *) linitial(pathList);
	path->rows = rowCountEstimate;
	path->total_cost = rowCountEstimate * rowCost + ioCost;

#if PG_VERSION_NUM >= 120000
	path->startup_cost = funcCost.startup + relOptInfo->baserestrictcost.startup;
#endif
}


/*
 * OuterPlanParamsList creates a list of RootPlanParams for outer nodes of the
 * given root. The first item in the list corresponds to parent_root, and the
 * last item corresponds to the outer most node.
 */
static List *
OuterPlanParamsList(PlannerInfo *root)
{
	List *planParamsList = NIL;

	for (PlannerInfo *outerNodeRoot = root->parent_root; outerNodeRoot != NULL;
		 outerNodeRoot = outerNodeRoot->parent_root)
	{
		RootPlanParams *rootPlanParams = palloc0(sizeof(RootPlanParams));
		rootPlanParams->root = outerNodeRoot;

		/*
		 * TODO: In SearchPlannerParamList() we are only interested in Var plan
		 * params, consider copying just them here.
		 */
		rootPlanParams->plan_params = CopyPlanParamList(outerNodeRoot->plan_params);

		planParamsList = lappend(planParamsList, rootPlanParams);
	}

	return planParamsList;
}


/*
 * CopyPlanParamList deep copies the input PlannerParamItem list and returns the newly
 * allocated list.
 * Note that we cannot use copyObject() function directly since there is no support for
 * copying PlannerParamItem structs.
 */
static List *
CopyPlanParamList(List *originalPlanParamList)
{
	ListCell *planParamCell = NULL;
	List *copiedPlanParamList = NIL;

	foreach(planParamCell, originalPlanParamList)
	{
		PlannerParamItem *originalParamItem = lfirst(planParamCell);
		PlannerParamItem *copiedParamItem = makeNode(PlannerParamItem);

		copiedParamItem->paramId = originalParamItem->paramId;
		copiedParamItem->item = copyObject(originalParamItem->item);

		copiedPlanParamList = lappend(copiedPlanParamList, copiedParamItem);
	}

	return copiedPlanParamList;
}


/*
 * CreateAndPushPlannerRestrictionContext creates a new relation restriction context
 * and a new join context, inserts it to the beginning of the
 * plannerRestrictionContextList. Finally, the planner restriction context is
 * inserted to the beginning of the plannerRestrictionContextList and it is returned.
 */
static PlannerRestrictionContext *
CreateAndPushPlannerRestrictionContext(void)
{
	PlannerRestrictionContext *plannerRestrictionContext =
		palloc0(sizeof(PlannerRestrictionContext));

	plannerRestrictionContext->relationRestrictionContext =
		palloc0(sizeof(RelationRestrictionContext));

	plannerRestrictionContext->joinRestrictionContext =
		palloc0(sizeof(JoinRestrictionContext));

	plannerRestrictionContext->fastPathRestrictionContext =
		palloc0(sizeof(FastPathRestrictionContext));

	plannerRestrictionContext->memoryContext = CurrentMemoryContext;

	/* we'll apply logical AND as we add tables */
	plannerRestrictionContext->relationRestrictionContext->allReferenceTables = true;

	plannerRestrictionContextList = lcons(plannerRestrictionContext,
										  plannerRestrictionContextList);

	return plannerRestrictionContext;
}


/*
 * CurrentRestrictionContext returns the most recently added
 * PlannerRestrictionContext from the plannerRestrictionContextList list.
 */
static PlannerRestrictionContext *
CurrentPlannerRestrictionContext(void)
{
	Assert(plannerRestrictionContextList != NIL);

	PlannerRestrictionContext *plannerRestrictionContext =
		(PlannerRestrictionContext *) linitial(plannerRestrictionContextList);

	if (plannerRestrictionContext == NULL)
	{
		ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR),
						errmsg("planner restriction context stack was empty"),
						errdetail("Please report this to the Citus core team.")));
	}

	return plannerRestrictionContext;
}


/*
 * PopPlannerRestrictionContext removes the most recently added restriction contexts from
 * the planner restriction context list. The function assumes the list is not empty.
 */
static void
PopPlannerRestrictionContext(void)
{
	plannerRestrictionContextList = list_delete_first(plannerRestrictionContextList);
}


/*
 * ResetPlannerRestrictionContext resets the element of the given planner
 * restriction context.
 */
static void
ResetPlannerRestrictionContext(PlannerRestrictionContext *plannerRestrictionContext)
{
	plannerRestrictionContext->relationRestrictionContext =
		palloc0(sizeof(RelationRestrictionContext));

	plannerRestrictionContext->joinRestrictionContext =
		palloc0(sizeof(JoinRestrictionContext));

	plannerRestrictionContext->fastPathRestrictionContext =
		palloc0(sizeof(FastPathRestrictionContext));


	/* we'll apply logical AND as we add tables */
	plannerRestrictionContext->relationRestrictionContext->allReferenceTables = true;
}


/*
 * HasUnresolvedExternParamsWalker returns true if the passed in expression
 * has external parameters that are not contained in boundParams, false
 * otherwise.
 */
static bool
HasUnresolvedExternParamsWalker(Node *expression, ParamListInfo boundParams)
{
	if (expression == NULL)
	{
		return false;
	}

	if (IsA(expression, Param))
	{
		Param *param = (Param *) expression;
		int paramId = param->paramid;

		/* only care about user supplied parameters */
		if (param->paramkind != PARAM_EXTERN)
		{
			return false;
		}

		/* check whether parameter is available (and valid) */
		if (boundParams && paramId > 0 && paramId <= boundParams->numParams)
		{
			ParamExternData *externParam = NULL;

			/* give hook a chance in case parameter is dynamic */
			if (boundParams->paramFetch != NULL)
			{
				ParamExternData externParamPlaceholder;
				externParam = (*boundParams->paramFetch)(boundParams, paramId, false,
														 &externParamPlaceholder);
			}
			else
			{
				externParam = &boundParams->params[paramId - 1];
			}

			Oid paramType = externParam->ptype;
			if (OidIsValid(paramType))
			{
				return false;
			}
		}

		return true;
	}

	/* keep traversing */
	if (IsA(expression, Query))
	{
		return query_tree_walker((Query *) expression,
								 HasUnresolvedExternParamsWalker,
								 boundParams,
								 0);
	}
	else
	{
		return expression_tree_walker(expression,
									  HasUnresolvedExternParamsWalker,
									  boundParams);
	}
}


/*
 * IsLocalReferenceTableJoin returns if the given query is a join between
 * reference tables and local tables.
 */
static bool
IsLocalReferenceTableJoin(Query *parse, List *rangeTableList)
{
	bool hasReferenceTable = false;
	bool hasLocalTable = false;
	ListCell *rangeTableCell = false;

	bool hasReferenceTableReplica = false;

	/*
	 * We only allow join between reference tables and local tables in the
	 * coordinator.
	 */
	if (!IsCoordinator())
	{
		return false;
	}

	/*
	 * All groups that have pg_dist_node entries, also have reference
	 * table replicas.
	 */
	PrimaryNodeForGroup(COORDINATOR_GROUP_ID, &hasReferenceTableReplica);

	/*
	 * If reference table doesn't have replicas on the coordinator, we don't
	 * allow joins with local tables.
	 */
	if (!hasReferenceTableReplica)
	{
		return false;
	}

	if (FindNodeCheck((Node *) parse, QueryIsNotSimpleSelect))
	{
		return false;
	}

	foreach(rangeTableCell, rangeTableList)
	{
		RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell);

		/*
		 * Don't plan joins involving functions locally since we are not sure if
		 * they do distributed accesses or not, and defaulting to local planning
		 * might break transactional semantics.
		 *
		 * For example, access to the reference table in the function might go
		 * over a connection, but access to the same reference table outside
		 * the function will go over the current backend. The snapshot for the
		 * connection in the function is taken after the statement snapshot,
		 * so they can see two different views of data.
		 *
		 * Looking at gram.y, RTE_TABLEFUNC is used only for XMLTABLE() which
		 * is okay to be planned locally, so allowing that.
		 */
		if (rangeTableEntry->rtekind == RTE_FUNCTION)
		{
			return false;
		}

		if (rangeTableEntry->rtekind != RTE_RELATION)
		{
			continue;
		}

		/*
		 * We only allow local join for the relation kinds for which we can
		 * determine deterministically that access to them are local or distributed.
		 * For this reason, we don't allow non-materialized views.
		 */
		if (rangeTableEntry->relkind == RELKIND_VIEW)
		{
			return false;
		}

		if (!IsDistributedTable(rangeTableEntry->relid))
		{
			hasLocalTable = true;
			continue;
		}

		DistTableCacheEntry *cacheEntry = DistributedTableCacheEntry(
			rangeTableEntry->relid);
		if (cacheEntry->partitionMethod == DISTRIBUTE_BY_NONE)
		{
			hasReferenceTable = true;
		}
		else
		{
			return false;
		}
	}

	return hasLocalTable && hasReferenceTable;
}


/*
 * QueryIsNotSimpleSelect returns true if node is a query which modifies or
 * marks for modifications.
 */
static bool
QueryIsNotSimpleSelect(Node *node)
{
	if (!IsA(node, Query))
	{
		return false;
	}

	Query *query = (Query *) node;
	return (query->commandType != CMD_SELECT) || (query->rowMarks != NIL);
}


/*
 * UpdateReferenceTablesWithShard recursively replaces the reference table names
 * in the given query with the shard table names.
 */
static bool
UpdateReferenceTablesWithShard(Node *node, void *context)
{
	if (node == NULL)
	{
		return false;
	}

	/* want to look at all RTEs, even in subqueries, CTEs and such */
	if (IsA(node, Query))
	{
		return query_tree_walker((Query *) node, UpdateReferenceTablesWithShard,
								 NULL, QTW_EXAMINE_RTES_BEFORE);
	}

	if (!IsA(node, RangeTblEntry))
	{
		return expression_tree_walker(node, UpdateReferenceTablesWithShard,
									  NULL);
	}

	RangeTblEntry *newRte = (RangeTblEntry *) node;

	if (newRte->rtekind != RTE_RELATION)
	{
		return false;
	}

	Oid relationId = newRte->relid;
	if (!IsDistributedTable(relationId))
	{
		return false;
	}

	DistTableCacheEntry *cacheEntry = DistributedTableCacheEntry(relationId);
	if (cacheEntry->partitionMethod != DISTRIBUTE_BY_NONE)
	{
		return false;
	}

	ShardInterval *shardInterval = cacheEntry->sortedShardIntervalArray[0];
	uint64 shardId = shardInterval->shardId;

	char *relationName = get_rel_name(relationId);
	AppendShardIdToName(&relationName, shardId);

	Oid schemaId = get_rel_namespace(relationId);
	newRte->relid = get_relname_relid(relationName, schemaId);

	/*
	 * Parser locks relations in addRangeTableEntry(). So we should lock the
	 * modified ones too.
	 */
	LockRelationOid(newRte->relid, AccessShareLock);

	return false;
}