citus/src/backend/distributed/planner/path_based_planner.c

/* */
/* Created by Nils Dijk on 17/01/2020. */
/* */
#include "postgres.h"

#include "catalog/pg_aggregate.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type_d.h"
#include "distributed/citus_custom_scan.h"
#include "distributed/citus_ruleutils.h"
#include "distributed/colocation_utils.h"
#include "distributed/deparse_shard_query.h"
#include "distributed/intermediate_result_pruning.h"
#include "distributed/listutils.h"
#include "distributed/metadata_cache.h"
#include "distributed/multi_physical_planner.h"
#include "distributed/path_based_planner.h"
#include "nodes/extensible.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/nodes.h"
#include "nodes/pathnodes.h"
#include "nodes/pg_list.h"
#include "nodes/plannodes.h"
#include "optimizer/paramassign.h"
#include "optimizer/pathnode.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/tlist.h"
#include "utils/builtins.h"
#include "utils/syscache.h"

typedef List *(*optimizeFn)(PlannerInfo *root, Path *originalPath);

typedef struct DistributedUnionPath
{
	CustomPath custom_path;

	/* path to be executed on the worker */
	Path *worker_path;

	uint32 colocationId;
	Expr *partitionValue;

	/*
	 * \due to a misabstraction in citus we need to keep track of a relation id that this
	 * union maps to. Idealy we would perform our pruning actions on the colocation id but
	 * we need a shard.
	 */
	Oid sampleRelid;
} DistributedUnionPath;

typedef struct RepartitionPath
{
	CustomPath custom_path;

	uint32 targetColocationId;
} RepartitionPath;

typedef struct GeoScanPath
{
	CustomPath custom_path;

	RangeTblEntry *rte;
} GeoScanPath;

static Plan * CreateDistributedUnionPlan(PlannerInfo *root, RelOptInfo *rel, struct
										 CustomPath *best_path, List *tlist,
										 List *clauses, List *custom_plans);
static List * ReparameterizeDistributedUnion(PlannerInfo *root, List *custom_private,
											 RelOptInfo *child_rel);
static CustomPath * WrapTableAccessWithDistributedUnion(Path *originalPath, uint32
														colocationId,
														Expr *partitionValue, Oid
														sampleRelid, List *subPaths);
static Path * CreateRepartitionNode(uint32 colocationId, Path *worker_path);
static Query * GetQueryFromPath(PlannerInfo *root, Path *path, List *tlist,
								List *clauses, Index **varnoMapping);
static List * ShardIntervalListToRelationShardList(List *shardIntervalList);
static List * OptimizeJoinPath(PlannerInfo *root, Path *originalPath);
static List * OptimizeRepartitionInnerJoinPath(PlannerInfo *root, Path *originalPath);
static List * BroadcastOuterJoinPath(PlannerInfo *root, Path *originalPath);
static List * BroadcastInnerJoinPath(PlannerInfo *root, Path *originalPath);
static List * GeoOverlapJoin(PlannerInfo *root, Path *originalPath);
static Path * CreateReadIntermediateResultPath(const Path *originalPath);
static bool IsDistributedUnion(Path *path, bool recurseTransparent,
							   DistributedUnionPath **out);
static Expr * ExtractPartitionValue(List *restrictionList, Var *partitionKey);
static List * ShardIntervalListForRelationPartitionValue(Oid relationId,
														 Expr *partitionValue);
static void PathBasedPlannerGroupAgg(PlannerInfo *root, RelOptInfo *input_rel,
									 RelOptInfo *output_rel, void *extra);
static Path * OptimizeGroupAgg(PlannerInfo *root, Path *originalPath);
static bool CanOptimizeAggPath(PlannerInfo *root, AggPath *apath);
static GeoScanPath * makeGeoScanPath(Relation rel, RelOptInfo *parent,
									 PathTarget *pathtarget, double rows);
static bool IsGeoScanPath(CustomPath *path);
static RangeTblEntry * makeRangeTableEntryForRelation(Relation rel,
													  int lockmode,
													  Alias *alias,
													  bool inh,
													  bool inFromCl);


/*
 * TODO some optimizations are useless if others are already provided. This might cause
 * excessive path creation causing performance problems. Depending on the amount of
 * optimizations to be added we can keep a bitmask indicating for every entry to skip if
 * the index of a preceding successful optimization is in the bitmap.
 */
bool EnableBroadcastJoin = true;

/* list of functions that will be called to optimized in the joinhook*/
static optimizeFn joinOptimizations[] = {
	OptimizeJoinPath,
	OptimizeRepartitionInnerJoinPath,
/*	BroadcastOuterJoinPath, */
/*	BroadcastInnerJoinPath, */
/*	GeoOverlapJoin, */
};

const CustomPathMethods geoScanMethods = {
	.CustomName = "GeoScan",
};

const CustomPathMethods distributedUnionMethods = {
	.CustomName = "Distributed Union",
	.PlanCustomPath = CreateDistributedUnionPlan,
	.ReparameterizeCustomPathByChild = ReparameterizeDistributedUnion
};

const CustomPathMethods repartitionMethods = {
	.CustomName = "Repartition",
};


static CustomPath *
WrapTableAccessWithDistributedUnion(Path *originalPath, uint32 colocationId,
									Expr *partitionValue, Oid sampleRelid, List *subPaths)
{
	DistributedUnionPath *distUnion = (DistributedUnionPath *)
									  newNode(sizeof(DistributedUnionPath), T_CustomPath);

	distUnion->custom_path.path.pathtype = T_CustomScan;
	distUnion->custom_path.path.parent = originalPath->parent;
	distUnion->custom_path.path.pathtarget = originalPath->pathtarget;
	distUnion->custom_path.path.param_info = originalPath->param_info;

	/* TODO use a better cost model */
	distUnion->custom_path.path.rows = originalPath->rows;
	distUnion->custom_path.path.startup_cost = originalPath->startup_cost + 1000;
	distUnion->custom_path.path.total_cost = originalPath->total_cost + 1000;

	distUnion->custom_path.methods = &distributedUnionMethods;

	distUnion->worker_path = originalPath;
	distUnion->custom_path.custom_private = list_make1(originalPath);
	distUnion->colocationId = colocationId;
	distUnion->partitionValue = partitionValue;
	distUnion->sampleRelid = sampleRelid;
	distUnion->custom_path.custom_paths = subPaths;

	return (CustomPath *) distUnion;
}


typedef struct TransformVarToParamExternMutatorContext
{
	PlannerInfo *root;
	Index *varnoMapping;
} TransformVarToParamExternMutatorContext;


static Node *
TransformVarToParamExternMutator(Node *node, TransformVarToParamExternMutatorContext *context)
{
	if (node == NULL)
	{
		return NULL;
	}

	if (IsA(node, Var))
	{
		Var *var = castNode(Var, node);

		Index originalVarNo = context->varnoMapping[var->varno];
		if (originalVarNo == 0)
		{
			/* no mapping was required */
			originalVarNo = var->varno;
		}
		Assert(originalVarNo > 0);
		/* If not to be replaced, we can just return the Var unmodified */
		if (!bms_is_member(originalVarNo, context->root->curOuterRels))
		{
			return (Node *) var;
		}

		Param *paramExec = replace_nestloop_param_var(context->root, var);

		/* TODO: figure out which Var's to replace by which parameters*/
		/* TODO: hack - insert param 1 for now */
		Param *paramExtern = makeNode(Param);
		paramExtern->paramkind = PARAM_EXTERN;

		/* Exec is 0-index, Extern is 1-indexed */
		paramExtern->paramid = paramExec->paramid + 1;

		paramExtern->paramtype = paramExec->paramtype;
		paramExtern->paramtypmod = paramExec->paramtypmod;
		paramExtern->paramcollid = paramExec->paramcollid;
		paramExtern->location = paramExec->location;

		return (Node *) paramExtern;
	}
	else if (IsA(node, Query))
	{

		return (Node *) query_tree_mutator((Query *) node,
									 TransformVarToParamExternMutator,
									 (void *) context,
									 0);
	}

	return expression_tree_mutator(node, TransformVarToParamExternMutator,
								   (void *) context);
}


static Query *
TransformVarToParamExtern(Query *query, PlannerInfo *root, Index *varnoMapping)
{
	TransformVarToParamExternMutatorContext context = {
			root,
			varnoMapping
	};

	return castNode(Query, TransformVarToParamExternMutator((Node *) query, &context));
}


static Plan *
CreateDistributedUnionPlan(PlannerInfo *root,
						   RelOptInfo *rel,
						   struct CustomPath *best_path,
						   List *tlist,
						   List *clauses,
						   List *custom_plans)
{
	DistributedUnionPath *distUnion = (DistributedUnionPath *) best_path;

	Job *workerJob = CitusMakeNode(Job);
	workerJob->jobId = UniqueJobId();

	ShardInterval *shardInterval = NULL;

	Index *varnoMapping = NULL; /* store mapping back for outerrel checks */
	Query *q = GetQueryFromPath(root, distUnion->worker_path, tlist, clauses, &varnoMapping);

	/*
	 * Assume shards are colocated, any shard should suffice for now to find the initial
	 * interval list
	 */

	/* TODO track colocation information on the Distributed Union node to fetch required information in a more optimal setting*/
	List *shardIntervalList = ShardIntervalListForRelationPartitionValue(
		distUnion->sampleRelid,
		distUnion->partitionValue);

	int i = 0;
	foreach_ptr(shardInterval, shardIntervalList)
	{
		List *colocatedShards = ColocatedShardIntervalList(shardInterval);
		List *relationShardList = ShardIntervalListToRelationShardList(colocatedShards);

		Query *qc = copyObject(q);
		UpdateRelationToShardNames((Node *) qc, relationShardList);

		/* transform Var's for other varno's to parameters */
		qc = TransformVarToParamExtern(qc, root, varnoMapping);

		StringInfoData buf;
		initStringInfo(&buf);
		pg_get_query_def(qc, &buf);

		Task *sqlTask = CreateBasicTask(workerJob->jobId, i, READ_TASK, buf.data);
		sqlTask->anchorShardId = shardInterval->shardId;
		sqlTask->taskPlacementList = ActiveShardPlacementList(shardInterval->shardId);
		workerJob->taskList = lappend(workerJob->taskList, sqlTask);
		i++;
	}
	workerJob->jobQuery = q;

	DistributedPlan *distributedPlan = CitusMakeNode(DistributedPlan);
	distributedPlan->workerJob = workerJob;
	distributedPlan->modLevel = ROW_MODIFY_READONLY;
	distributedPlan->relationIdList = list_make1_oid(distUnion->sampleRelid);
	distributedPlan->expectResults = true;

	Plan *subPlan = NULL;
	int subPlanCount = 0;
	foreach_ptr(subPlan, custom_plans)
	{
		PlannedStmt *result = makeNode(PlannedStmt);
		result->commandType = CMD_SELECT;
		result->planTree = subPlan;
		List *rtable = NIL;
		for (i = 1; i < root->simple_rel_array_size; i++)
		{
			RangeTblEntry *rte = root->simple_rte_array[i];
			rtable = lappend(rtable, rte);
		}
		rtable = lappend(rtable, root->simple_rte_array[1]);
		result->rtable = rtable;

		/* 1 indexed */
		subPlanCount++;
		DistributedSubPlan *dsubPlan = CitusMakeNode(DistributedSubPlan);
		dsubPlan->plan = result;
		dsubPlan->subPlanId = subPlanCount;

		distributedPlan->subPlanList = lappend(distributedPlan->subPlanList, dsubPlan);
	}

	distributedPlan->usedSubPlanNodeList = FindSubPlanUsages(distributedPlan);

	CustomScan *plan = makeNode(CustomScan);
	plan->scan.scanrelid = 0;
	plan->custom_scan_tlist = tlist;
	plan->flags = best_path->flags;
	plan->methods = &AdaptiveExecutorCustomScanMethods;
	plan->custom_private = list_make1(distributedPlan);
	plan->custom_plans = custom_plans;

	plan->scan.plan.targetlist = tlist;

	/* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */
	clauses = extract_actual_clauses(clauses, false);

//	plan->scan.plan.qual = clauses;
//	plan->custom_exprs = clauses;

	return (Plan *) plan;
}


static List *
ShardIntervalListForRelationPartitionValue(Oid relationId, Expr *partitionValue)
{
	if (partitionValue && IsA(partitionValue, Const))
	{
		/* prune shard list to target */
		Const *partitionValueConst = castNode(Const, partitionValue);

		/* TODO assert the constant is of the correct value */
		CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(relationId);
		return list_make1(FindShardInterval(partitionValueConst->constvalue, cacheEntry));
	}

	/* all shards */
	return LoadShardIntervalList(relationId);
}


static List *
ShardIntervalListToRelationShardList(List *shardIntervalList)
{
	List *shardRelationList = NIL;
	ShardInterval *shardInterval = NULL;

	/* map the shard intervals to RelationShard */
	foreach_ptr(shardInterval, shardIntervalList)
	{
		RelationShard *rs = CitusMakeNode(RelationShard);
		rs->relationId = shardInterval->relationId;
		rs->shardId = shardInterval->shardId;
		shardRelationList = lappend(shardRelationList, rs);
	}

	return shardRelationList;
}


static List *
ReparameterizeDistributedUnion(PlannerInfo *root,
							   List *custom_private,
							   RelOptInfo *child_rel)
{
	return NIL;
}


/*
 * IsDistributedUnion returns if the pathnode is a distributed union
 *
 * If recurseTransparent is set it will recurse into transparant nodes like Materialize
 *
 * If out is set to not NULL it will write the pointer to the union at the location
 * specified
 */
static bool
IsDistributedUnion(Path *path, bool recurseTransparent, DistributedUnionPath **out)
{
	if (recurseTransparent)
	{
		switch (nodeTag(path))
		{
			case T_MaterialPath:
			{
				MaterialPath *materialPath = castNode(MaterialPath, path);
				return IsDistributedUnion(materialPath->subpath, recurseTransparent, out);
			}

			default:
			{
				break;
			}
		}
	}

	if (!IsA(path, CustomPath))
	{
		return false;
	}

	CustomPath *cpath = castNode(CustomPath, path);
	if (cpath->methods != &distributedUnionMethods)
	{
		return false;
	}

	if (out != NULL)
	{
		*out = (DistributedUnionPath *) cpath;
	}

	return true;
}


void
PathBasedPlannerRelationHook(PlannerInfo *root,
							 RelOptInfo *relOptInfo,
							 Index restrictionIndex,
							 RangeTblEntry *rte)
{
	if (!IsCitusTable(rte->relid))
	{
		/* table accessed is not distributed, no paths to change */
		return;
	}

	Var *partitionKey = DistPartitionKey(rte->relid);
	Expr *partitionValue = NULL;

	/* the distirbuted table has a partition key, lets check filters if there is a value */
	if (partitionKey != NULL)
	{
		/* use the first rel id included in this relation */
		partitionKey->varno = bms_next_member(relOptInfo->relids, -1);
		Assert(bms_num_members(relOptInfo->relids) == 1);

		partitionValue = ExtractPartitionValue(relOptInfo->baserestrictinfo,
											   partitionKey);
	}

	/* wrap every path with a distributed union custom path */
	ListCell *pathCell = NULL;
	foreach(pathCell, relOptInfo->pathlist)
	{
		Path *originalPath = lfirst(pathCell);
		CustomPath *wrappedPath = WrapTableAccessWithDistributedUnion(
			originalPath,
			TableColocationId(rte->relid),
			partitionValue,
			rte->relid,
			NIL);
		SetListCellPtr(pathCell, wrappedPath);
	}

	/* hardcoded hack for adding geo distributed tables as an alternative path */
	Relation rel = relation_open(rte->relid, AccessShareLock);
	if (UseGeoPartitioning && strcmp(RelationGetRelationName(rel),
									 "belgium_planet_osm_roads_dist") == 0)
	{
		if (OnlyGeoPartitioning)
		{
			/* makes the geo path the only path to access the relation */
			relOptInfo->pathlist = NIL;
		}


		Oid geoRelid = RelnameGetRelid("belgium_planet_osm_roads_geo");
		Relation georel = relation_open(geoRelid, AccessShareLock);

		Path *geoPath = (Path *) makeGeoScanPath(georel,
												 relOptInfo,
												 relOptInfo->reltarget,
												 relOptInfo->rows);
		geoPath = (Path *)
				  WrapTableAccessWithDistributedUnion(geoPath,
													  TableColocationId(geoRelid),
													  NULL,
													  geoRelid,
													  NIL);

		if (EnableGeoPartitioningGrouping)
		{
			/* verymuch just an int4 at the moment */
			SortGroupClause *sgc = makeNode(SortGroupClause);
			sgc->tleSortGroupRef = 1; /* should be first field */
			sgc->eqop = 96;
			sgc->sortop = 97;
			sgc->nulls_first = false;
			sgc->hashable = true; /* ? just assume an in can be hashed */

			List *groupClause = list_make1(sgc);

			/* creating the target list */
			PathTarget *groupPathTarget = create_empty_pathtarget();
			int numAggs = 0;
			Expr *expr = NULL;
			foreach_ptr(expr, relOptInfo->reltarget->exprs)
			{
				if (!IsA(expr, Var))
				{
					continue;
				}
				Var *var = castNode(Var, expr);

				switch (var->varattno)
				{
					case 1: /* k */
					{
						/* transparently add grouping keys */
						add_column_to_pathtarget(groupPathTarget, expr, 0);

						break;
					}

					case 2: /* osm_id */
					{
						/* wrapping non partitioned columns and non-primary keys in any_value */
						Aggref *aggref = makeNode(Aggref);
						aggref->aggfnoid = 18333; /* any_value */
						aggref->aggtype = var->vartype;
						aggref->aggtranstype = var->vartype;
						aggref->aggfilter = NULL;
						aggref->aggstar = false;
						aggref->aggvariadic = false;
						aggref->aggkind = AGGKIND_NORMAL;
						aggref->aggsplit = AGGSPLIT_SIMPLE;
						aggref->location = 0;

						aggref->args = list_make1(
							makeTargetEntry((Expr *) var, 1, NULL, false));
						TargetEntry *argTLE = NULL;
						foreach_ptr(argTLE, aggref->args)
						{
							aggref->aggargtypes =
								lappend_oid(aggref->aggargtypes,
											exprType((Node *) argTLE->expr));
						}
						add_column_to_pathtarget(groupPathTarget, (Expr *) aggref, 0);
						numAggs++;

						break;
					}

					case 3: /* way */
					{
						/* reconstruct partitioned values via ST_Union() */

						Aggref *aggref = makeNode(Aggref);
						aggref->aggfnoid = 16861; /* ST_Union */
						aggref->aggtype = 16390;
						aggref->aggtranstype = 2281;
						aggref->aggfilter = NULL;
						aggref->aggstar = false;
						aggref->aggvariadic = false;
						aggref->aggkind = AGGKIND_NORMAL;
						aggref->aggsplit = AGGSPLIT_SIMPLE;
						aggref->location = 0;

						aggref->args = list_make1(makeTargetEntry(expr, 1, NULL, false));
						TargetEntry *argTLE = NULL;
						foreach_ptr(argTLE, aggref->args)
						{
							aggref->aggargtypes =
								lappend_oid(aggref->aggargtypes,
											exprType((Node *) argTLE->expr));
						}
						add_column_to_pathtarget(groupPathTarget, (Expr *) aggref, 0);
						numAggs++;
					}
				}
			}

			/* TODO figure out costing for our grouping */
			AggClauseCosts costs = {
#if PG_VERSION_NUM < 140000
				.numAggs = numAggs,
				.numOrderedAggs = 0,
				.hasNonPartial = false,
				.hasNonSerial = false,
#endif

				.transCost.startup = 0,
				.transCost.per_tuple = 0,
				.finalCost.startup = 0,
				.finalCost.per_tuple = 0,

				.transitionSpace = 0,
			};

			geoPath = (Path *) create_agg_path(root,
											   relOptInfo,
											   geoPath,
											   groupPathTarget,
											   AGG_HASHED,
											   AGGSPLIT_SIMPLE,
											   groupClause,
											   NIL, &costs,
											   2);
		}


		add_path(relOptInfo, geoPath);

		relation_close(georel, AccessShareLock);
	}
	relation_close(rel, AccessShareLock);
}


static GeoScanPath *
makeGeoScanPath(Relation rel, RelOptInfo *parent, PathTarget *pathtarget, double rows)
{
	GeoScanPath *geoPath = (GeoScanPath *) newNode(sizeof(GeoScanPath), T_CustomPath);
	CustomPath *cpath = (CustomPath *) geoPath;
	Path *path = (Path *) geoPath;

	path->pathtype = T_CustomScan;
	path->parent = parent;

	PathTarget *targetCopy = create_empty_pathtarget();
	Expr *expr = NULL;
	foreach_ptr(expr, pathtarget->exprs)
	{
		bool isPrimaryKey = false;
		if (IsA(expr, Var))
		{
			/* TODO assume the first attribute of a relation as its PK */
			Var *var = (Var *) expr;
			isPrimaryKey = var->varattno == 1;
		}

		/*
		 * Geo partitioning cuts the geometry of the distibution column into pieces, they
		 * need to be reconstructed by grouping on the primary key. Add the primary keys
		 * to a grouping set with reference 1
		 */
		add_column_to_pathtarget(targetCopy, expr,
								 isPrimaryKey ? 1 : 0);
	}

	path->pathtarget = targetCopy;
	path->param_info = NULL;

	path->rows = rows * 1.2; /* add 20% for the duplication */
	path->startup_cost = 0;
	path->total_cost = 0;

	cpath->methods = &geoScanMethods;

	geoPath->rte = makeRangeTableEntryForRelation(rel, AccessShareLock, NULL, false,
												  true);

	return geoPath;
}


static bool
IsGeoScanPath(CustomPath *path)
{
	return path->methods == &geoScanMethods;
}


static RangeTblEntry *
makeRangeTableEntryForRelation(Relation rel,
							   int lockmode,
							   Alias *alias,
							   bool inh,
							   bool inFromCl)
{
	RangeTblEntry *rte = makeNode(RangeTblEntry);
	char *refname = alias ? alias->aliasname : RelationGetRelationName(rel);

	Assert(lockmode == AccessShareLock ||
		   lockmode == RowShareLock ||
		   lockmode == RowExclusiveLock);
	Assert(CheckRelationLockedByMe(rel, lockmode, true));

	rte->rtekind = RTE_RELATION;
	rte->alias = alias;
	rte->relid = RelationGetRelid(rel);
	rte->relkind = rel->rd_rel->relkind;
	rte->rellockmode = lockmode;

	/*
	 * Build the list of effective column names using user-supplied aliases
	 * and/or actual column names.
	 */
	rte->eref = makeAlias(refname, NIL);
	rte->eref->colnames = list_make3(makeString("k"),
									 makeString("osm_id"),
									 makeString("way"));

	/*
	 * Set flags and access permissions.
	 *
	 * The initial default on access checks is always check-for-READ-access,
	 * which is the right thing for all except target tables.
	 */
	rte->lateral = false;
	rte->inh = inh;
	rte->inFromCl = inFromCl;

	rte->requiredPerms = ACL_SELECT;
	rte->checkAsUser = InvalidOid;  /* not set-uid by default, either */
	rte->selectedCols = NULL;
	rte->insertedCols = NULL;
	rte->updatedCols = NULL;
	rte->extraUpdatedCols = NULL;

	return rte;
}


static Expr *
ExtractPartitionValue(List *restrictionList, Var *partitionKey)
{
	RestrictInfo *info = NULL;
	foreach_ptr(info, restrictionList)
	{
		if (!NodeIsEqualsOpExpr((Node *) info->clause))
		{
			continue;
		}

		/* equality operator, check for partition column */
		OpExpr *eq = castNode(OpExpr, info->clause);
		Expr *left = list_nth(eq->args, 0);
		Expr *right = list_nth(eq->args, 1);

		if (IsA(left, Var))
		{
			Var *leftVar = castNode(Var, left);
			if (leftVar->varno == partitionKey->varno &&
				leftVar->varattno == partitionKey->varattno)
			{
				/* partition column, return right*/
				return right;
			}
		}

		if (IsA(right, Var))
		{
			Var *rightVar = castNode(Var, left);
			if (rightVar->varno == partitionKey->varno &&
				rightVar->varattno == partitionKey->varattno)
			{
				/* partition column, return left */
				return left;
			}
		}
	}

	return NULL;
}


static NameData
GetFunctionNameData(Oid funcid)
{
	HeapTuple proctup = NULL;
	Form_pg_proc procform = NULL;

	proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
	if (!HeapTupleIsValid(proctup))
	{
		elog(ERROR, "cache lookup failed for function %u", funcid);
	}
	procform = (Form_pg_proc) GETSTRUCT(proctup);

	/* copy name by value */
	NameData result = procform->proname;

	ReleaseSysCache(proctup);

	return result;
}


typedef struct GeoJoinPathMatch
{
	Const *stdwithinDistanceConst;

//	AggPath *innerGrouping;
	DistributedUnionPath *innerDistUnion;
	GeoScanPath *innerPath;

//	AggPath *outerGrouping;
	DistributedUnionPath *outerDistUnion;
	GeoScanPath *outerPath;
} GeoJoinPathMatch;


#include "distributed/planner/pattern_match.h"
static List *
GeoOverlapJoin(PlannerInfo *root, Path *originalPath)
{
	GeoJoinPathMatch match = { 0 };

	bool didMatch = false;

	/*
	 * temporary nest the matcher till we figure out the final grouping, for now we need
	 * to be able to toggle between
	 */
	if (EnableGeoPartitioningGrouping)
	{
		IfPathMatch(
			originalPath,
			MatchJoin(
				NoCapture,
				JOIN_INNER,
				/* match on join restriction info */
				MatchJoinRestrictions(
					NoCapture,
					MatchExprNamedOperation(
						NoCapture,
						geometry_overlaps,
						MatchVar(NoCapture),
						MatchExprNamedFunction(
							NoCapture,
							st_expand,
							MatchVar(NoCapture),
							MatchConst(
								&match.stdwithinDistanceConst,
								MatchFields(consttype == FLOAT8OID))))),
				/* match inner path in join */
				SkipReadThrough(
					NoCapture,
					MatchGrouping(
						NoCapture, /*&match.innerGrouping,*/
						MatchDistributedUnion(
							&match.innerDistUnion,
							MatchGeoScan(
								&match.innerPath)))),
				/* match outer path in join */
				SkipReadThrough(
					NoCapture,
					MatchGrouping(
						NoCapture, /*&match.outerGrouping,*/
						MatchDistributedUnion(&match.outerDistUnion,
							MatchGeoScan(
								&match.outerPath))))))
		{
			didMatch = true;
		}
	}
	else
	{
		IfPathMatch(
			originalPath,
			MatchJoin(
				NoCapture,
				JOIN_INNER,
				/* match on join restriction info */
				MatchJoinRestrictions(
					NoCapture,
					MatchExprNamedOperation(
						NoCapture,
						geometry_overlaps,
						MatchVar(NoCapture),
						MatchExprNamedFunction(
							NoCapture,
							st_expand,
							MatchVar(NoCapture),
							MatchConst(
								&match.stdwithinDistanceConst,
								MatchFields(consttype == FLOAT8OID))))),
				/* match inner path in join */
				SkipReadThrough(
					NoCapture,
					MatchDistributedUnion(
						&match.innerDistUnion,
						MatchGeoScan(
							&match.innerPath))),
				/* match outer path in join */
				SkipReadThrough(
					NoCapture,
					MatchDistributedUnion(&match.outerDistUnion,
										  MatchGeoScan(
											  &match.outerPath)))))
		{
			didMatch = true;
		}
	}

	if (didMatch)
	{
		/* have a match on the geo join pattern, all fields are stored in `match` */
		ereport(DEBUG1, (errmsg("distance join with distance: %f",
								DatumGetFloat8(match.stdwithinDistanceConst->constvalue)
								)));

		JoinPath *jpath = makeNode(NestPath);
		*jpath = *((JoinPath *) originalPath); /* copy basic join settings */
		jpath->path.type = T_NestPath;

		jpath->innerjoinpath = (Path *) match.innerPath;
		jpath->outerjoinpath = (Path *) match.outerPath;
//			(Path *) create_append_path(
//			root,
//			match.outerPath->custom_path.path.parent,
//			list_make1(match.outerPath), /* TODO add the result of the shuffled job */
//			NIL,
//			NIL,
//			NULL,
//			0,
//			false,
//			NIL,
//			match.outerPath->custom_path.path.rows + 0);

		jpath->path.startup_cost -= 2000; /* remove the double dist union cost */
		jpath->path.total_cost -= 2000; /* remove the double dist union cost */

		/* TODO add grouping */

		Path *newPath = (Path *) WrapTableAccessWithDistributedUnion(
			(Path *) jpath,
			match.innerDistUnion->colocationId,
			match.innerDistUnion->partitionValue,
			match.innerDistUnion->sampleRelid,
			NIL); /* TODO is this ok? */

		return list_make1(newPath);
	}

	return NIL;
}


static List *
OptimizeJoinPath(PlannerInfo *root, Path *originalPath)
{
	DistributedUnionPath *innerDU = NULL;
	DistributedUnionPath *outerDU = NULL;
	JoinPath *jpath = NULL;

	IfPathMatch(
		originalPath,
		MatchJoin(
			&jpath,
			JOIN_INNER,
			/* match on join restriction info */
			MatchAny,
			/* match inner path in join */
			SkipReadThrough(
				NoCapture,
				MatchDistributedUnion(
					&innerDU,
					MatchAny)),
			/* match outer path in join */
			SkipReadThrough(
				NoCapture,
				MatchDistributedUnion(
					&outerDU,
					MatchAny))))
	{
		if (innerDU->colocationId != outerDU->colocationId)
		{
			/* Distributed Union is not on the same colocation group */
			return NIL;
		}

		if (!equal(innerDU->partitionValue, outerDU->partitionValue))
		{
			/* TODO this is most likely too strict, but if the values are strictly the same we can easily take one during merging */
			return NIL;
		}

		const DistributedUnionPath *baseDistUnion = innerDU;

		JoinPath *jcpath = makeNode(NestPath);
		*jcpath = *jpath;
		jcpath->path.type = T_NestPath;

		jcpath->innerjoinpath = innerDU->worker_path;
		jcpath->outerjoinpath = outerDU->worker_path;

		/* TODO update costs of hashjoin, very naive removal of DU cost for now */
		jcpath->path.startup_cost -= 2000; /* remove the double dist union cost */
		jcpath->path.total_cost -= 2000; /* remove the double dist union cost */

		Path *newPath = (Path *) WrapTableAccessWithDistributedUnion(
			(Path *) jcpath,
			baseDistUnion->colocationId,
			baseDistUnion->partitionValue,
			baseDistUnion->sampleRelid,
			baseDistUnion->custom_path.custom_paths);

		return list_make1(newPath);
	}

	return NIL;
}


static List *
OptimizeRepartitionInnerJoinPath(PlannerInfo *root, Path *originalPath)
{
	DistributedUnionPath *innerDU = NULL;
	DistributedUnionPath *outerDU = NULL;
	JoinPath *joinPath = NULL;

	/*
	 * Match the following shape:
	 *
	 *                    +---------+
	 *                    |  Join   |
	 *                    +---------+
	 *                   /           \
	 *  +---------------------+ +---------------------+
	 *  | Collect             | | Collect             |
	 *  | - ColocationID: $1  | | - ColocationID: !$1 |
     *  +---------------------+ +---------------------+
     *
	 */
	IfPathMatch(
		originalPath,
		MatchJoin(
			&joinPath,
			JOIN_INNER,
			/* match on join restriction info */
			MatchAny,
			/* match inner path in join */
			SkipReadThrough(
				NoCapture,
				MatchDistributedUnion(
					&innerDU,
					MatchAny)),
			/* match outer path in join */
			SkipReadThrough(
				NoCapture,
				MatchDistributedUnion(
					&outerDU,
					MatchAny))))
	{
		/*
		 * We matched the shape of our join. Next we need to verify the join is not
		 * already colocated, because a colocated join can always push down. To verify
		 * colocatededness of the join we need to verify the following:
		 *  - the join is happening in the same colocation id
		 *  - there is an equivalence on the list of distribution columns on both the
		 *    inner and the outer part of the join.
		 * If any of the above do not satisfy the joins are not colocated
		 */

		if (innerDU->colocationId == outerDU->colocationId)
		{
			/* TODO check skipped the equivalence check between distribution attributes */
			return NIL;
		}

		/*
		 * We want to repartition the inner join to the colocation of the outer join. For
		 * this we need to understand which attribute on the inner join has an equivalence
		 * condition on any of the attributes in the outer part of the join.
		 *
		 * Once we know on which attribute to repartition the inner part we can create a
		 * new tree in the following shape:
		 *    +------------------------------------+
		 *    | Collect                            |
		 *    | - ColocationID: outer.ColocationID |
		 *    +------------------------------------+
		 *                      |
		 *                 +---------+
		 *                 |  Join   |
		 *                 +---------+
		 *                /           \
		 *  +-------------------+  +------------------------------------+
		 *  | oruter.worke_path |  | Repartition                        |
		 *  +-------------------+  | - ColocationID: outer.colocationID |
		 *                         +------------------------------------+
		 *                                            |
		 *                                  +-------------------+
		 *                                  | inner.worker_path |
		 *                                  +-------------------+
		 */

		/* create new Join node */
		JoinPath *newJoinPath = makeNode(NestPath);
		*newJoinPath = *joinPath;
		newJoinPath->path.type = T_NestPath; /* reset type after copied join data */

		/* populate outer path*/
		newJoinPath->outerjoinpath = outerDU->worker_path;

		/* TODO understand how to describe on which attribute the Repartition needs to happen */
		newJoinPath->innerjoinpath = CreateRepartitionNode(outerDU->colocationId,
														   innerDU->worker_path);

		/* TODO find a good way to calculate join costs based on its inner/outer paths */
		/* subtract the double collect cost */
		newJoinPath->path.startup_cost -= 2000;
		newJoinPath->path.total_cost -= 2000;

		/* add the costs for the repartition */
		newJoinPath->path.startup_cost += 500;
		newJoinPath->path.total_cost += 500;

		/* create Collect on top of new join, base Collect on matched outer Collect */
		const DistributedUnionPath *baseDistUnion = outerDU;
		Path *newPath = (Path *) WrapTableAccessWithDistributedUnion(
				(Path *) newJoinPath,
				baseDistUnion->colocationId,
				baseDistUnion->partitionValue,
				baseDistUnion->sampleRelid,
				baseDistUnion->custom_path.custom_paths);

		return list_make1(newPath);
	}

	return NIL;
}


static Path *
CreateRepartitionNode(uint32 colocationId, Path *worker_path)
{
	RepartitionPath *repartition = (RepartitionPath *)
			newNode(sizeof(RepartitionPath), T_CustomPath);

	repartition->custom_path.path.pathtype = T_CustomScan;
	repartition->custom_path.path.parent = worker_path->parent;
	repartition->custom_path.path.pathtarget = worker_path->pathtarget;
	repartition->custom_path.path.param_info = worker_path->param_info;

	/* TODO use a better cost model */
	repartition->custom_path.path.rows = worker_path->rows;
	repartition->custom_path.path.startup_cost = worker_path->startup_cost + 500;
	repartition->custom_path.path.total_cost = worker_path->total_cost + 500;

	repartition->custom_path.methods = &repartitionMethods;

	repartition->custom_path.custom_private = list_make1(worker_path);
	repartition->targetColocationId = colocationId;

	return (Path *) repartition;
}


static List *
BroadcastOuterJoinPath(PlannerInfo *root, Path *originalPath)
{
	if (!EnableBroadcastJoin)
	{
		return NIL;
	}

	switch (originalPath->pathtype)
	{
		case T_NestLoop:
		case T_HashJoin:
		{
			const JoinPath *jpath = (JoinPath *) originalPath;
			List *newPaths = NIL;

			if (IsDistributedUnion(jpath->outerjoinpath, false, NULL))
			{
				/* broadcast inner join path */
				DistributedUnionPath *baseDistUnion =
					(DistributedUnionPath *) jpath->outerjoinpath;

				/*
				 * Shallow copy of any join node, this does not imply executing a nested
				 * join, but the nested join contains all the information we need to send
				 * the join to the worker
				 */
				JoinPath *jcpath = makeNode(NestPath);
				*jcpath = *jpath;
				jcpath->path.type = T_NestPath;

				jcpath->outerjoinpath = baseDistUnion->worker_path;
				Path *subPath = jcpath->innerjoinpath;
				jcpath->innerjoinpath = CreateReadIntermediateResultPath(subPath);

				/* TODO update costs of hashjoin, very naife removal of DU cost for now */
				jcpath->path.startup_cost -= 1500;
				jcpath->path.total_cost -= 1500;

				Path *newPath = (Path *) WrapTableAccessWithDistributedUnion(
					(Path *) jcpath,
					baseDistUnion->colocationId,
					baseDistUnion->partitionValue,
					baseDistUnion->sampleRelid,
					lappend(list_copy(baseDistUnion->custom_path.custom_paths), subPath));
				newPaths = lappend(newPaths, newPath);
			}

			return newPaths;
		}

		default:
		{
			return NIL;
		}
	}
}


static List *
BroadcastInnerJoinPath(PlannerInfo *root, Path *originalPath)
{
	if (!EnableBroadcastJoin)
	{
		return NIL;
	}

	switch (originalPath->pathtype)
	{
		case T_NestLoop:
		case T_HashJoin:
		{
			const JoinPath *jpath = (JoinPath *) originalPath;
			List *newPaths = NIL;

			if (IsDistributedUnion(jpath->innerjoinpath, false, NULL))
			{
				/* broadcast inner join path */
				DistributedUnionPath *baseDistUnion =
					(DistributedUnionPath *) jpath->innerjoinpath;

				/*
				 * Shallow copy of any join node, this does not imply executing a nested
				 * join, but the nested join contains all the information we need to send
				 * the join to the worker
				 */
				JoinPath *jcpath = makeNode(NestPath);
				*jcpath = *jpath;
				jcpath->path.type = T_NestPath;

				jcpath->innerjoinpath = baseDistUnion->worker_path;
				Path *subPath = jcpath->outerjoinpath;
				jcpath->outerjoinpath = CreateReadIntermediateResultPath(subPath);

				/* TODO update costs of hashjoin, very naife removal of DU cost for now */
				jcpath->path.startup_cost -= 1500;
				jcpath->path.total_cost -= 1500;

				Path *newPath = (Path *) WrapTableAccessWithDistributedUnion(
					(Path *) jcpath,
					baseDistUnion->colocationId,
					baseDistUnion->partitionValue,
					baseDistUnion->sampleRelid,
					lappend(list_copy(baseDistUnion->custom_path.custom_paths), subPath));
				newPaths = lappend(newPaths, newPath);
			}

			return newPaths;
		}

		default:
		{
			return NIL;
		}
	}
}


static Path *
CreateReadIntermediateResultPath(const Path *originalPath)
{
	/* TODO might require a custom path for read intermediate result */
	Path *path = makeNode(Path);
	path->pathtype = T_FunctionScan;
	path->parent = originalPath->parent;
	path->pathtarget = originalPath->pathtarget;

	/* TODO some network cost to be modelled */
	path->total_cost = originalPath->total_cost + 500;
	path->startup_cost = originalPath->startup_cost + 500;

	return path;
}


void
PathBasedPlannerJoinHook(PlannerInfo *root,
						 RelOptInfo *joinrel,
						 RelOptInfo *outerrel,
						 RelOptInfo *innerrel,
						 JoinType jointype,
						 JoinPathExtraData *extra)
{
	/*
	 * Adding a path to a list includes lappend which might be destructive. Since we are
	 * looping over the paths we are adding to we should keep a list of new paths to add
	 * and only add them after we have found all the paths we want to add.
	 */
	List *newPaths = NIL;

	ListCell *pathCell = NULL;
	foreach(pathCell, joinrel->pathlist)
	{
		Path *originalPath = lfirst(pathCell);
		for (int i = 0; i < sizeof(joinOptimizations) / sizeof(joinOptimizations[1]); i++)
		{
			List *alternativePaths = joinOptimizations[i](root, originalPath);
			newPaths = list_concat(newPaths, alternativePaths);
		}
	}

	Path *path = NULL;
	foreach_ptr(path, newPaths)
	{
		add_path(joinrel, path);
	}
}


/*
 * varno_mapping is an array where the index is the varno in the original query, or 0 if
 * no mapping is required.
 */
static Node *
VarNoMutator(Node *expr, Index *varno_mapping)
{
	if (expr == NULL)
	{
		return NULL;
	}

	switch (nodeTag(expr))
	{
		case T_Var:
		{
			Var *var = castNode(Var, expr);
			Index newVarNo = varno_mapping[var->varno];
			if (newVarNo == 0)
			{
				/* no mapping required */
				return (Node *) var;
			}

			return (Node *) makeVar(
				newVarNo,
				var->varattno,
				var->vartype,
				var->vartypmod,
				var->varcollid,
				var->varlevelsup
				);
		}

		default:
		{
			return expression_tree_mutator(expr, (void *) VarNoMutator, varno_mapping);
		}
	}
}


typedef struct PathQueryInfo
{
	/*
	 * Keep track of the mapping of varno's from the original query to the new query.
	 * This will be used to update the Varno attributes of Var's in the quals and target
	 * list.
	 */
	Index *varno_mapping;
} PathQueryInfo;

static void
ApplyPathToQuery(PlannerInfo *root, Query *query, Path *path, PathQueryInfo *info)
{
	switch (path->pathtype)
	{
		case T_Agg:
		{
			AggPath *apath = castNode(AggPath, path);

			/* the subpath needs to be applied before we can apply the grouping clause */
			ApplyPathToQuery(root, query, apath->subpath, info);

			query->groupClause = apath->groupClause;
			break;
		}

		case T_BitmapHeapScan:
		{
			BitmapHeapPath *bpath = castNode(BitmapHeapPath, path);
			ApplyPathToQuery(root, query, bpath->bitmapqual, info);
			return;
		}

		case T_IndexScan:
		case T_IndexOnlyScan:
		case T_SeqScan:
		{
			/*
			 * Add table as source to the range table and keep track of the mapping with
			 * the original query
			 */
			Index scan_relid = path->parent->relid;
			Index rteIndex = info->varno_mapping[scan_relid];

			if (rteIndex == 0)
			{
				/* not added before, add and keep reference to which entry it has been added */
				RangeTblEntry *rte = root->simple_rte_array[scan_relid];
				query->rtable = lappend(query->rtable, rte);
				rteIndex = list_length(query->rtable);
				info->varno_mapping[scan_relid] = rteIndex;
			}

			/* add to from list */
			RangeTblRef *rr = makeNode(RangeTblRef);
			rr->rtindex = rteIndex;
			query->jointree->fromlist = lappend(query->jointree->fromlist, rr);

			List *quals = NIL;
			RestrictInfo *rinfo = NULL;
			foreach_ptr(rinfo, path->parent->baserestrictinfo)
			{
				Node *clause = (Node *) rinfo->clause;
				quals = lappend(quals, clause);
			}
			if (list_length(quals) > 0)
			{
				Node *qualsAnd = (Node *) make_ands_explicit(quals);
				query->jointree->quals = make_and_qual(query->jointree->quals, qualsAnd);
			}

			break;
		}

		case T_NestLoop:
		case T_HashJoin:
		{
			JoinPath *jpath = (JoinPath *) path;

			/* add both join paths to the query */
			ApplyPathToQuery(root, query, jpath->outerjoinpath, info);
			ApplyPathToQuery(root, query, jpath->innerjoinpath, info);


			List *quals = NIL;
			RestrictInfo *rinfo = NULL;
			foreach_ptr(rinfo, jpath->joinrestrictinfo)
			{
				Node *clause = (Node *) rinfo->clause;
				quals = lappend(quals, clause);
			}
			if (list_length(quals) > 0)
			{
				Node *qualsAnd = (Node *) make_ands_explicit(quals);
				query->jointree->quals = make_and_qual(query->jointree->quals, qualsAnd);
			}

			break;
		}

		/* TODO temporary placeholder for read_intermediate_result*/
		case T_FunctionScan:
		{
			Oid functionOid = CitusReadIntermediateResultFuncId();

			/* result_id text */
			Const *resultIdConst = makeNode(Const);
			resultIdConst->consttype = TEXTOID;
			resultIdConst->consttypmod = -1;
			resultIdConst->constlen = -1;
			resultIdConst->constvalue = CStringGetTextDatum("0_1");
			resultIdConst->constbyval = false;
			resultIdConst->constisnull = false;
			resultIdConst->location = -1;

			/* format citus_copy_format DEFAULT 'csv'::citus_copy_format */
			Oid copyFormatId = BinaryCopyFormatId();
			Const *resultFormatConst = makeNode(Const);
			resultFormatConst->consttype = CitusCopyFormatTypeId();
			resultFormatConst->consttypmod = -1;
			resultFormatConst->constlen = 4;
			resultFormatConst->constvalue = ObjectIdGetDatum(copyFormatId);
			resultFormatConst->constbyval = true;
			resultFormatConst->constisnull = false;
			resultFormatConst->location = -1;

			/* build the call to read_intermediate_result */
			FuncExpr *funcExpr = makeNode(FuncExpr);
			funcExpr->funcid = functionOid;
			funcExpr->funcretset = true;
			funcExpr->funcvariadic = false;
			funcExpr->funcformat = 0;
			funcExpr->funccollid = 0;
			funcExpr->inputcollid = 0;
			funcExpr->location = -1;
			funcExpr->args = list_make2(resultIdConst, resultFormatConst);

			List *funcColNames = NIL;
			List *funcColTypes = NIL;
			List *funcColTypMods = NIL;
			List *funcColCollations = NIL;
			Node *expr = NULL;
			foreach_ptr(expr, path->pathtarget->exprs)
			{
				Oid colType = exprType(expr);
				Oid colCollation = exprCollation(expr);
				int32 colTypeMod = exprTypmod(expr);

				funcColNames = lappend(funcColNames, makeString("t1.b")); /* TODO resolve actual name */
				funcColTypes = lappend_oid(funcColTypes, colType);
				funcColTypMods = lappend_oid(funcColTypMods, colTypeMod);
				funcColCollations = lappend_int(funcColCollations, colCollation);
			}

			/* build the RTE for the call to read_intermediate_result */
			RangeTblFunction *rangeTableFunction = makeNode(RangeTblFunction);
			rangeTableFunction->funccolcount = list_length(funcColNames);
			rangeTableFunction->funccolnames = funcColNames;
			rangeTableFunction->funccoltypes = funcColTypes;
			rangeTableFunction->funccoltypmods = funcColTypMods;
			rangeTableFunction->funccolcollations = funcColCollations;
			rangeTableFunction->funcparams = NULL;
			rangeTableFunction->funcexpr = (Node *) funcExpr;

			Alias *funcAlias = makeNode(Alias);
			funcAlias->aliasname = "Distributed Subplan 0_1";
			funcAlias->colnames = funcColNames;

			RangeTblEntry *rangeTableEntry = makeNode(RangeTblEntry);
			rangeTableEntry->rtekind = RTE_FUNCTION;
			rangeTableEntry->functions = list_make1(rangeTableFunction);
			rangeTableEntry->inFromCl = true;
			rangeTableEntry->eref = funcAlias;

			/* add the RangeTableEntry */
			query->rtable = lappend(query->rtable, rangeTableEntry);
			Index rteIndex = list_length(query->rtable);
			Index scan_relid = path->parent->relid;
			info->varno_mapping[scan_relid] = rteIndex;

			RangeTblRef *rr = makeNode(RangeTblRef);
			rr->rtindex = rteIndex;
			query->jointree->fromlist = lappend(query->jointree->fromlist, rr);

			break;
		}

		case T_CustomScan:
		{
			if (IsGeoScanPath(castNode(CustomPath, path)))
			{
				GeoScanPath *geoPath = (GeoScanPath *) path;

				Index scan_relid = path->parent->relid;
				Index rteIndex = info->varno_mapping[scan_relid];

				if (rteIndex == 0)
				{
					RangeTblEntry *rte = geoPath->rte;
					query->rtable = lappend(query->rtable, rte);
					rteIndex = list_length(query->rtable);
					info->varno_mapping[scan_relid] = rteIndex;
				}

				/* add to from list */
				RangeTblRef *rr = makeNode(RangeTblRef);
				rr->rtindex = rteIndex;
				query->jointree->fromlist = lappend(query->jointree->fromlist, rr);

				break;
			}

			/* fallthrough to error */
		}

		default:
		{
			ereport(ERROR, (errmsg("unknown path type in worker query"),
							errdetail("cannot turn worker path into query due to unknown "
									  "path type in plan. pathtype: %d", path->pathtype))
					);
		}
	}
}


/*
 * when varnoMapping is set it stores an array of varno's in the new query to the original
 * varno's of the source query. This can later be used to understand if the var's used in
 * this query come from an outer rel in a nested loop.
 */
static Query *
GetQueryFromPath(PlannerInfo *root, Path *path, List *tlist, List *clauses,
				 Index **varnoMapping)
{
	PathQueryInfo info = { 0 };
	info.varno_mapping = palloc0(sizeof(Index) * root->simple_rel_array_size);

	Query *q = makeNode(Query);
	q->commandType = CMD_SELECT;
	q->jointree = makeNode(FromExpr);
	ApplyPathToQuery(root, q, path, &info);


	/* copy the target list with mapped varno values to reflect the tables we are selecting */
	List *newTargetList = (List *) VarNoMutator((Node *) tlist, info.varno_mapping);

	q->targetList = newTargetList;

	List *quals = NIL;

	RestrictInfo *rinfo = NULL;
	foreach_ptr(rinfo, clauses)
	{
		Node *clause = (Node *) rinfo->clause;
		quals = lappend(quals, clause);
	}

	if (list_length(quals) > 0)
	{
		Node *qualsAnd = (Node *) make_ands_explicit(quals);
		q->jointree->quals = make_and_qual(q->jointree->quals, qualsAnd);
	}
	q->jointree->quals = VarNoMutator(q->jointree->quals, info.varno_mapping);

	if (varnoMapping)
	{
		/* export the reverse varno mapping */
		int mappingSize = list_length(q->rtable);
		*varnoMapping = palloc0(sizeof(Index) * root->simple_rel_array_size);
		for (int i = 0; i < root->simple_rel_array_size; i++)
		{
			Index varno = info.varno_mapping[i];
			if (varno == 0)
			{
				continue;
			}
			(*varnoMapping)[varno] = i;
		}
	}

	return q;
}


void
PathBasedPlannedUpperPathHook(PlannerInfo *root,
							  UpperRelationKind stage,
							  RelOptInfo *input_rel,
							  RelOptInfo *output_rel,
							  void *extra)
{
	if (!UseCustomPath)
	{
		/* path based planner is turned off, don't do anything here */
		return;
	}

	switch (stage)
	{
		case UPPERREL_GROUP_AGG:
		{
			PathBasedPlannerGroupAgg(root, input_rel, output_rel, extra);
			return;
		}

		default:
		{
			/* no optimizations implemented, beers for the one that removes this due to being unreachable */
			return;
		}
	}
}


static void
PathBasedPlannerGroupAgg(PlannerInfo *root,
						 RelOptInfo *input_rel,
						 RelOptInfo *output_rel,
						 void *extra)
{
	/*
	 * Here we want to find proof that the group by is right above a distributed union
	 * that is partitioned by the grouping key. If that is the case we can pull the
	 * distributed union above the aggregate which causes it to optimize the plan.
	 *
	 * TODO we just replace the plans for now, but during development we have encountered
	 * a plan that would be better if the grouping would not be pushed down. When the
	 * grouping is solely on a primary key the number of rows will stay the same, while
	 * the width will increase due to any aggregates that could be performed on the data.
	 * This plan has lower network traffic if the grouping would not be pushed down.
	 * Instead of replacing it would benefit the planner to add a new path according to
	 * the potential optimization of pushing down. If <no. rows> * <row width> would be
	 * taken into account in the cost of the plan this would cause magic to happen which
	 * we currently could not support.
	 */

	ListCell *pathCell = NULL;
	foreach(pathCell, output_rel->pathlist)
	{
		Path *originalPath = lfirst(pathCell);
		Path *optimizedGroupAdd = OptimizeGroupAgg(root, originalPath);
		SetListCellPtr(pathCell, optimizedGroupAdd);
	}
}


static Path *
OptimizeGroupAgg(PlannerInfo *root, Path *originalPath)
{
	switch (originalPath->pathtype)
	{
		case T_Agg:
		{
			AggPath *apath = castNode(AggPath, originalPath);
			if (CanOptimizeAggPath(root, apath))
			{
				DistributedUnionPath *distUnion = (DistributedUnionPath *) apath->subpath;
				apath->subpath = distUnion->worker_path;

				/* TODO better cost model, for now substract the DU costs */
				apath->path.startup_cost -= 1000;
				apath->path.total_cost -= 1000;

				return (Path *) WrapTableAccessWithDistributedUnion(
					(Path *) apath,
					distUnion->colocationId,
					distUnion->partitionValue,
					distUnion->sampleRelid,
					distUnion->custom_path.custom_paths);
			}
		}

		default:
		{
			/* no optimisations to be performed*/
			return originalPath;
		}
	}
}


static bool
CanOptimizeAggPath(PlannerInfo *root, AggPath *apath)
{
	if (apath->groupClause == NULL)
	{
		return false;
	}

	if (!IsDistributedUnion(apath->subpath, false, NULL))
	{
		/*
		 * we only can optimize if the path below is a distributed union that we can pull
		 * up, if the path below is not a distributed union we cannot optimize
		 */
		return false;
	}

	SortGroupClause *sgc = NULL;

	/*
	 * TODO verify whats the purpose of the list, if we find any of the distribution
	 * columns somewhere in this we optimize, might be wrong
	 */
	foreach_ptr(sgc, apath->groupClause)
	{
		PathTarget *target = apath->path.pathtarget;
		Expr *targetExpr = NULL;
		Index i = 0;
		foreach_ptr(targetExpr, target->exprs)
		{
			Index targetSortGroupRef = target->sortgrouprefs[i];
			i++;

			if (targetSortGroupRef != sgc->tleSortGroupRef)
			{
				continue;
			}

			if (!IsA(targetExpr, Var))
			{
				continue;
			}

			Var *targetVar = castNode(Var, targetExpr);
			Index rteIndex = targetVar->varno;
			RangeTblEntry *rte = root->simple_rte_array[rteIndex];

			CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(rte->relid);
			if (cacheEntry->partitionColumn == NULL)
			{
				/* a table that is not distributed by a particular column, reference table? */
				continue;
			}

			if (cacheEntry->partitionColumn->varattno == targetVar->varattno)
			{
				/*
				 * grouping column contains the distribution column of a distributed
				 * table, safe to optimize
				 */
				return true;
			}
		}
	}

	return false;
}