diff --git a/src/backend/distributed/executor/multi_executor.c b/src/backend/distributed/executor/multi_executor.c index 3a07aecb2..13359d3d5 100644 --- a/src/backend/distributed/executor/multi_executor.c +++ b/src/backend/distributed/executor/multi_executor.c @@ -57,23 +57,18 @@ multi_ExecutorStart(QueryDesc *queryDesc, int eflags) executorType = JobExecutorType(multiPlan); if (executorType == MULTI_EXECUTOR_ROUTER) { - Task *task = NULL; - List *taskList = workerJob->taskList; TupleDesc tupleDescriptor = ExecCleanTypeFromTL( planStatement->planTree->targetlist, false); List *dependendJobList PG_USED_FOR_ASSERTS_ONLY = workerJob->dependedJobList; - /* router executor can only execute distributed plans with a single task */ - Assert(list_length(taskList) == 1); + /* router executor cannot execute task with depencencies */ Assert(dependendJobList == NIL); - task = (Task *) linitial(taskList); - /* we need to set tupleDesc in executorStart */ queryDesc->tupDesc = tupleDescriptor; /* drop into the router executor */ - RouterExecutorStart(queryDesc, eflags, task); + RouterExecutorStart(queryDesc, eflags); } else { diff --git a/src/backend/distributed/executor/multi_router_executor.c b/src/backend/distributed/executor/multi_router_executor.c index 071038024..40f9df47f 100644 --- a/src/backend/distributed/executor/multi_router_executor.c +++ b/src/backend/distributed/executor/multi_router_executor.c @@ -100,8 +100,8 @@ static int64 ExecuteModifyTasks(List *taskList, bool expectResults, TupleDesc tupleDescriptor); static List * TaskShardIntervalList(List *taskList); static void AcquireExecutorShardLock(Task *task, CmdType commandType); -static void AcquireExecutorMultiShardLocks(List *shardIntervalList); -static bool IsReplicated(List *shardIntervalList); +static void AcquireExecutorMultiShardLocks(List *taskList); +static bool RequiresConsistentSnapshot(Task *task); static uint64 ReturnRowsFromTuplestore(uint64 tupleCount, TupleDesc tupleDescriptor, DestReceiver *destination, Tuplestorestate *tupleStore); @@ -133,14 +133,11 @@ static void MarkRemainingInactivePlacements(void); * execution. */ void -RouterExecutorStart(QueryDesc *queryDesc, int eflags, Task *task) +RouterExecutorStart(QueryDesc *queryDesc, int eflags) { EState *executorState = NULL; CmdType commandType = queryDesc->operation; - /* ensure that the task is not NULL */ - Assert(task != NULL); - /* disallow triggers during distributed modify commands */ if (commandType != CMD_SELECT) { @@ -314,15 +311,38 @@ AcquireExecutorShardLock(Task *task, CmdType commandType) { LockShardResource(shardId, lockMode); } + + /* + * If the task has a subselect, then we may need to lock the shards from which + * the query selects as well to prevent the subselects from seeing different + * results on different replicas. In particular this prevents INSERT.. SELECT + * commands from having a different effect on different placements. + */ + if (RequiresConsistentSnapshot(task)) + { + /* + * ExclusiveLock conflicts with all lock types used by modifications + * and therefore prevents other modifications from running + * concurrently. + */ + + LockShardListResources(task->selectShardList, ExclusiveLock); + } } /* - * AcquireExecutorMultiShardLocks acquires shard locks need for execution - * of writes on multiple shards. + * AcquireExecutorMultiShardLocks acquires shard locks needed for execution + * of writes on multiple shards. In addition to honouring commutativity + * rules, we currently only allow a single multi-shard command on a shard at + * a time. Otherwise, concurrent multi-shard commands may take row-level + * locks on the shard placements in a different order and create a distributed + * deadlock. This applies even when writes are commutative and/or there is + * no replication. * * 1. If citus.all_modifications_commutative is set to true, then all locks * are acquired as ShareUpdateExclusiveLock. + * * 2. If citus.all_modifications_commutative is false, then only the shards * with 2 or more replicas are locked with ExclusiveLock. Otherwise, the * lock is acquired with ShareUpdateExclusiveLock. @@ -330,65 +350,121 @@ AcquireExecutorShardLock(Task *task, CmdType commandType) * ShareUpdateExclusiveLock conflicts with itself such that only one * multi-shard modification at a time is allowed on a shard. It also conflicts * with ExclusiveLock, which ensures that updates/deletes/upserts are applied - * in the same order on all placements. It does not conflict with ShareLock, - * which is normally obtained by single-shard commutative writes. + * in the same order on all placements. It does not conflict with + * RowExclusiveLock, which is normally obtained by single-shard, commutative + * writes. */ static void -AcquireExecutorMultiShardLocks(List *shardIntervalList) +AcquireExecutorMultiShardLocks(List *taskList) { - LOCKMODE lockMode = NoLock; + ListCell *taskCell = NULL; - if (AllModificationsCommutative || !IsReplicated(shardIntervalList)) + foreach(taskCell, taskList) { + Task *task = (Task *) lfirst(taskCell); + LOCKMODE lockMode = NoLock; + + if (AllModificationsCommutative || list_length(task->taskPlacementList) == 1) + { + /* + * When all writes are commutative then we only need to prevent multi-shard + * commands from running concurrently with each other and with commands + * that are explicitly non-commutative. When there is no replication then + * we only need to prevent concurrent multi-shard commands. + * + * In either case, ShareUpdateExclusive has the desired effect, since + * it conflicts with itself and ExclusiveLock (taken by non-commutative + * writes). + */ + + lockMode = ShareUpdateExclusiveLock; + } + else + { + /* + * When there is replication, prevent all concurrent writes to the same + * shards to ensure the writes are ordered. + */ + + lockMode = ExclusiveLock; + } + + LockShardResource(task->anchorShardId, lockMode); + /* - * When all writes are commutative then we only need to prevent multi-shard - * commands from running concurrently with each other and with commands - * that are explicitly non-commutative. When there is not replication then - * we only need to prevent concurrent multi-shard commands. - * - * In either case, ShareUpdateExclusive has the desired effect, since - * it conflicts with itself and ExclusiveLock (taken by non-commutative - * writes). + * If the task has a subselect, then we may need to lock the shards from which + * the query selects as well to prevent the subselects from seeing different + * results on different replicas. In particular this prevents INSERT..SELECT + * commands from having different effects on different placements. */ - lockMode = ShareUpdateExclusiveLock; - } - else - { - /* - * When there is replication, prevent all concurrent writes to the same - * shards to ensure the writes are ordered. - */ - lockMode = ExclusiveLock; - } + if (RequiresConsistentSnapshot(task)) + { + /* + * ExclusiveLock conflicts with all lock types used by modifications + * and therefore prevents other modifications from running + * concurrently. + */ - LockShardListResources(shardIntervalList, lockMode); + LockShardListResources(task->selectShardList, ExclusiveLock); + } + } } /* - * IsReplicated checks whether any of the shards in the given list has more - * than one replica. + * RequiresConsistentSnapshot returns true if the given task need to take + * the necessary locks to ensure that a subquery in the INSERT ... SELECT + * query returns the same output for all task placements. */ static bool -IsReplicated(List *shardIntervalList) +RequiresConsistentSnapshot(Task *task) { - ListCell *shardIntervalCell; - bool hasReplication = false; + bool requiresIsolation = false; - foreach(shardIntervalCell, shardIntervalList) + if (!task->insertSelectQuery) { - ShardInterval *shardInterval = (ShardInterval *) lfirst(shardIntervalCell); - uint64 shardId = shardInterval->shardId; - List *shardPlacementList = FinalizedShardPlacementList(shardId); - if (shardPlacementList->length > 1) - { - hasReplication = true; - break; - } + /* + * Only INSERT/SELECT commands currently require SELECT isolation. + * Other commands do not read from other shards. + */ + + requiresIsolation = false; + } + else if (list_length(task->taskPlacementList) == 1) + { + /* + * If there is only one replica then we fully rely on PostgreSQL to + * provide SELECT isolation. In this case, we do not provide isolation + * across the shards, but that was never our intention. + */ + + requiresIsolation = false; + } + else if (AllModificationsCommutative) + { + /* + * An INSERT/SELECT is commutative with other writes if it excludes + * any ongoing writes based on the filter conditions. Without knowing + * whether this is true, we assume the user took this into account + * when enabling citus.all_modifications_commutative. This option + * gives users an escape from aggressive locking during INSERT/SELECT. + */ + + requiresIsolation = false; + } + else + { + /* + * If this is a non-commutative write, then we need to block ongoing + * writes to make sure that the subselect returns the same result + * on all placements. + */ + + requiresIsolation = true; } - return hasReplication; + return requiresIsolation; } @@ -812,7 +888,7 @@ ExecuteModifyTasks(List *taskList, bool expectResults, ParamListInfo paramListIn shardIntervalList = TaskShardIntervalList(taskList); /* ensure that there are no concurrent modifications on the same shards */ - AcquireExecutorMultiShardLocks(shardIntervalList); + AcquireExecutorMultiShardLocks(taskList); /* open connection to all relevant placements, if not already open */ OpenTransactionsToAllShardPlacements(shardIntervalList, userName); diff --git a/src/backend/distributed/executor/multi_server_executor.c b/src/backend/distributed/executor/multi_server_executor.c index 703fa4449..81e12e91b 100644 --- a/src/backend/distributed/executor/multi_server_executor.c +++ b/src/backend/distributed/executor/multi_server_executor.c @@ -49,7 +49,7 @@ JobExecutorType(MultiPlan *multiPlan) double tasksPerNode = taskCount / ((double) workerNodeCount); int dependedJobCount = list_length(job->dependedJobList); MultiExecutorType executorType = TaskExecutorType; - bool routerExecutablePlan = RouterExecutablePlan(multiPlan, executorType); + bool routerExecutablePlan = multiPlan->routerExecutable; /* check if can switch to router executor */ if (routerExecutablePlan) @@ -109,78 +109,6 @@ JobExecutorType(MultiPlan *multiPlan) } -/* - * RouterExecutablePlan returns whether a multi-plan can be executed using the - * router executor. Modify queries are always router executable, select queries - * are router executable only if executorType is real time. - */ -bool -RouterExecutablePlan(MultiPlan *multiPlan, MultiExecutorType executorType) -{ - Job *job = multiPlan->workerJob; - TaskType taskType = TASK_TYPE_INVALID_FIRST; - Query *masterQuery = multiPlan->masterQuery; - List *workerTaskList = job->taskList; - int taskCount = list_length(workerTaskList); - int dependedJobCount = list_length(job->dependedJobList); - Task *workerTask = NULL; - List *workerDependentTaskList = NIL; - bool masterQueryHasAggregates = false; - - /* router executor cannot execute queries that hit more than one shard */ - if (taskCount != 1) - { - return false; - } - - /* check if the first task is a modify or a router task, short-circuit if so */ - workerTask = (Task *) linitial(workerTaskList); - taskType = workerTask->taskType; - if (taskType == MODIFY_TASK || taskType == ROUTER_TASK) - { - return true; - } - - if (executorType == MULTI_EXECUTOR_TASK_TRACKER) - { - return false; - } - - /* router executor cannot execute repartition jobs */ - if (dependedJobCount > 0) - { - return false; - } - - /* router executor cannot execute queries with dependent data fetch tasks */ - workerDependentTaskList = workerTask->dependedTaskList; - if (list_length(workerDependentTaskList) > 0) - { - return false; - } - - /* router executor cannot execute queries with order by */ - if (masterQuery != NULL && list_length(masterQuery->sortClause) > 0) - { - return false; - } - - /* - * Router executor cannot execute queries with aggregates. - * Note that worker query having an aggregate means that the master query should - * have either an aggregate or a function expression which has to be executed for - * the correct results. - */ - masterQueryHasAggregates = job->jobQuery->hasAggs; - if (masterQueryHasAggregates) - { - return false; - } - - return true; -} - - /* * MaxMasterConnectionCount returns the number of connections a master can open. * A master cannot create more than a certain number of file descriptors (FDs). diff --git a/src/backend/distributed/planner/multi_explain.c b/src/backend/distributed/planner/multi_explain.c index f9c3a9473..0e3e90b33 100644 --- a/src/backend/distributed/planner/multi_explain.c +++ b/src/backend/distributed/planner/multi_explain.c @@ -228,7 +228,7 @@ MultiExplainOneQuery(Query *query, IntoClause *into, ExplainState *es, es->indent += 1; } - routerExecutablePlan = RouterExecutablePlan(multiPlan, TaskExecutorType); + routerExecutablePlan = multiPlan->routerExecutable; if (routerExecutablePlan) { diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index 5242e3cdc..e56a00ab0 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -152,7 +152,6 @@ static void ErrorIfCannotPushdownSubquery(Query *subqueryTree, bool outerQueryHa static void ErrorIfUnsupportedTableCombination(Query *queryTree); static void ErrorIfUnsupportedUnionQuery(Query *unionQuery); static bool TargetListOnPartitionColumn(Query *query, List *targetEntryList); -static bool IsPartitionColumnRecursive(Expr *columnExpression, Query *query); static FieldSelect * CompositeFieldRecursive(Expr *expression, Query *query); static bool FullCompositeFieldList(List *compositeFieldList); static Query * LateralQuery(Query *query); @@ -3318,7 +3317,7 @@ TargetListOnPartitionColumn(Query *query, List *targetEntryList) * Note that if the given expression is a field of a composite type, then this * function checks if this composite column is a partition column. */ -static bool +bool IsPartitionColumnRecursive(Expr *columnExpression, Query *query) { bool isPartitionColumn = false; @@ -3863,7 +3862,7 @@ ErrorIfUnsupportedShardDistribution(Query *query) /* * RelationIdList returns list of unique relation ids in query tree. */ -List * +static List * RelationIdList(Query *query) { List *rangeTableList = NIL; diff --git a/src/backend/distributed/planner/multi_physical_planner.c b/src/backend/distributed/planner/multi_physical_planner.c index 70e819d6d..fdfc38c8e 100644 --- a/src/backend/distributed/planner/multi_physical_planner.c +++ b/src/backend/distributed/planner/multi_physical_planner.c @@ -36,6 +36,7 @@ #include "distributed/multi_logical_optimizer.h" #include "distributed/multi_logical_planner.h" #include "distributed/multi_physical_planner.h" +#include "distributed/multi_server_executor.h" #include "distributed/pg_dist_partition.h" #include "distributed/pg_dist_shard.h" #include "distributed/shardinterval_utils.h" @@ -115,7 +116,8 @@ static uint32 HashPartitionCount(void); static ArrayType * SplitPointObject(ShardInterval **shardIntervalArray, uint32 shardIntervalCount); -/* Local functions forward declarations for task list creation */ +/* Local functions forward declarations for task list creation and helper functions */ +static bool MultiPlanRouterExecutable(MultiPlan *multiPlan); static Job * BuildJobTreeTaskList(Job *jobTree); static List * SubquerySqlTaskList(Job *job); static List * SqlTaskList(Job *job); @@ -130,8 +132,6 @@ static OperatorCacheEntry * LookupOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber); static Oid GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber); static Node * HashableClauseMutator(Node *originalNode, Var *partitionColumn); -static Var * MakeInt4Column(void); -static Const * MakeInt4Constant(Datum constantValue); static OpExpr * MakeHashedOperatorExpression(OpExpr *operatorExpression); static List * BuildRestrictInfoList(List *qualList); static List * FragmentCombinationList(List *rangeTableFragmentsList, Query *jobQuery, @@ -150,8 +150,6 @@ static List * DataFetchTaskList(uint64 jobId, uint32 taskIdIndex, List *fragment static StringInfo NodeNameArrayString(List *workerNodeList); static StringInfo NodePortArrayString(List *workerNodeList); static StringInfo DatumArrayString(Datum *datumArray, uint32 datumCount, Oid datumTypeId); -static Task * CreateBasicTask(uint64 jobId, uint32 taskId, TaskType taskType, - char *queryString); static void UpdateRangeTableAlias(List *rangeTableList, List *fragmentList); static Alias * FragmentAlias(RangeTblEntry *rangeTableEntry, RangeTableFragment *fragment); @@ -222,11 +220,73 @@ MultiPhysicalPlanCreate(MultiTreeRoot *multiTree) multiPlan->workerJob = workerJob; multiPlan->masterQuery = masterQuery; multiPlan->masterTableName = jobSchemaName->data; + multiPlan->routerExecutable = MultiPlanRouterExecutable(multiPlan); return multiPlan; } +/* + * MultiPlanRouterExecutable returns true if the input multiPlan is + * router executable. + * + * Note that all the multi plans that are created by router planner are + * already router executable. Thus, this function should only be called + * for multi plans that are not generated by router planner. + */ +static bool +MultiPlanRouterExecutable(MultiPlan *multiPlan) +{ + Query *masterQuery = multiPlan->masterQuery; + Job *job = multiPlan->workerJob; + List *workerTaskList = job->taskList; + int taskCount = list_length(workerTaskList); + int dependedJobCount = list_length(job->dependedJobList); + bool masterQueryHasAggregates = false; + + /* router executor cannot execute SELECT queries that hit more than one shard */ + if (taskCount != 1) + { + return false; + } + + /* router executor cannot execute repartition jobs */ + if (dependedJobCount > 0) + { + return false; + } + + /* + * Router executor does not run master query. This means that aggregation and + * sorting on the master query wouldn't be executed. Thus, such plans shouldn't be + * qualified as router executable. + */ + if (masterQuery != NULL && list_length(masterQuery->sortClause) > 0) + { + return false; + } + + /* + * Note that worker query having an aggregate means that the master query should + * have either an aggregate or a function expression which has to be executed for + * the correct results. + */ + masterQueryHasAggregates = job->jobQuery->hasAggs; + if (masterQueryHasAggregates) + { + return false; + } + + /* FIXME: I tend to think it's time to remove this */ + if (TaskExecutorType != MULTI_EXECUTOR_REAL_TIME) + { + return false; + } + + return true; +} + + /* * BuildJobTree builds the physical job tree from the given logical plan tree. * The function walks over the logical plan from the bottom up, finds boundaries @@ -3017,7 +3077,7 @@ MakeHashedOperatorExpression(OpExpr *operatorExpression) * MakeInt4Column creates a column of int4 type with invalid table id and max * attribute number. */ -static Var * +Var * MakeInt4Column() { Index tableId = 0; @@ -3037,7 +3097,7 @@ MakeInt4Column() * MakeInt4Constant creates a new constant of int4 type and assigns the given * value as a constant value. */ -static Const * +Const * MakeInt4Constant(Datum constantValue) { Oid constantType = INT4OID; @@ -3939,7 +3999,7 @@ DatumArrayString(Datum *datumArray, uint32 datumCount, Oid datumTypeId) * CreateBasicTask creates a task, initializes fields that are common to each task, * and returns the created task. */ -static Task * +Task * CreateBasicTask(uint64 jobId, uint32 taskId, TaskType taskType, char *queryString) { Task *task = CitusMakeNode(Task); diff --git a/src/backend/distributed/planner/multi_planner.c b/src/backend/distributed/planner/multi_planner.c index 8cb91cd28..338b74998 100644 --- a/src/backend/distributed/planner/multi_planner.c +++ b/src/backend/distributed/planner/multi_planner.c @@ -59,6 +59,27 @@ multi_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) if (needsDistributedPlanning) { originalQuery = copyObject(parse); + + /* + * We implement INSERT INTO .. SELECT by pushing down the SELECT to + * each shard. To compute that we use the router planner, by adding + * an "uninstantiated" constraint that the partition column be equal to a + * certain value. standard_planner() distributes that constraint to + * the baserestrictinfos to all the tables where it knows how to push + * the restriction safely. An example is that the tables that are + * connected via equi joins. + * + * The router planner then iterates over the target table's shards, + * for each we replace the "uninstantiated" restriction, with one that + * PruneShardList() handles, and then generate a query for that + * individual shard. If any of the involved tables don't prune down + * to a single shard, or if the pruned shards aren't colocated, + * we error out. + */ + if (InsertSelectQuery(parse)) + { + AddUninstantiatedPartitionRestriction(parse); + } } /* create a restriction context and put it at the end if context list */ diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c index 46acf055e..686a56594 100644 --- a/src/backend/distributed/planner/multi_router_planner.c +++ b/src/backend/distributed/planner/multi_router_planner.c @@ -17,14 +17,17 @@ #include "access/stratnum.h" #include "access/xact.h" +#include "catalog/pg_opfamily.h" #include "distributed/citus_clauses.h" #include "catalog/pg_type.h" +#include "distributed/colocation_utils.h" #include "distributed/citus_nodes.h" #include "distributed/citus_nodefuncs.h" #include "distributed/master_metadata_utility.h" #include "distributed/metadata_cache.h" #include "distributed/multi_join_order.h" #include "distributed/multi_logical_planner.h" +#include "distributed/multi_logical_optimizer.h" #include "distributed/multi_physical_planner.h" #include "distributed/multi_router_executor.h" #include "distributed/multi_router_planner.h" @@ -46,11 +49,14 @@ #include "optimizer/restrictinfo.h" #include "optimizer/var.h" #include "parser/parsetree.h" +#include "parser/parse_oper.h" #include "storage/lock.h" +#include "utils/builtins.h" #include "utils/elog.h" #include "utils/errcodes.h" #include "utils/lsyscache.h" #include "utils/rel.h" +#include "utils/typcache.h" #include "catalog/pg_proc.h" #include "optimizer/planmain.h" @@ -65,6 +71,21 @@ typedef struct WalkerState /* planner functions forward declarations */ +static MultiPlan * CreateSingleTaskRouterPlan(Query *originalQuery, Query *query, + RelationRestrictionContext * + restrictionContext); +static MultiPlan * CreateInsertSelectRouterPlan(Query *originalQuery, + RelationRestrictionContext * + restrictionContext); +static Task * RouterModifyTaskForShardInterval(Query *originalQuery, + ShardInterval *shardInterval, + RelationRestrictionContext * + restrictionContext, + uint32 taskIdIndex); +static void AddShardIntervalRestrictionToSelect(Query *subqery, + ShardInterval *shardInterval); +static RangeTblEntry * ExtractSelectRangeTableEntry(Query *query); +static RangeTblEntry * ExtractInsertRangeTableEntry(Query *query); static bool MasterIrreducibleExpression(Node *expression, bool *varArgument, bool *badCoalesce); static bool MasterIrreducibleExpressionWalker(Node *expression, WalkerState *state); @@ -79,9 +100,13 @@ static ShardInterval * FastShardPruning(Oid distributedTableId, Const *partionColumnValue); static Oid ExtractFirstDistributedTableId(Query *query); static Const * ExtractInsertPartitionValue(Query *query, Var *partitionColumn); -static Task * RouterSelectTask(Query *originalQuery, Query *query, +static Task * RouterSelectTask(Query *originalQuery, RelationRestrictionContext *restrictionContext, List **placementList); +static bool RouterSelectQuery(Query *originalQuery, + RelationRestrictionContext *restrictionContext, + List **placementList, uint64 *anchorShardId, + List **selectShardList); static List * TargetShardIntervalsForSelect(Query *query, RelationRestrictionContext *restrictionContext); static List * WorkersContainingAllShards(List *prunedShardIntervalsList); @@ -91,26 +116,38 @@ static bool UpdateRelationNames(Node *node, static Job * RouterQueryJob(Query *query, Task *task, List *placementList); static bool MultiRouterPlannableQuery(Query *query, MultiExecutorType taskExecutorType, RelationRestrictionContext *restrictionContext); -static bool InsertSelectQuery(Query *query); +static RelationRestrictionContext * CopyRelationRestrictionContext( + RelationRestrictionContext *oldContext); +static Node * InstantiatePartitionQual(Node *node, void *context); +static void ErrorIfInsertSelectQueryNotSupported(Query *queryTree, + RangeTblEntry *insertRte, + RangeTblEntry *subqueryRte); +static void ErrorIfMultiTaskRouterSelectQueryUnsupported(Query *query); +static void ErrorIfInsertPartitionColumnDoesNotMatchSelect(Query *query, + RangeTblEntry *insertRte, + RangeTblEntry *subqueryRte, + Oid * + selectPartitionColumnTableId); +static void AddUninstantiatedEqualityQual(Query *query, Var *targetPartitionColumnVar); + /* - * MultiRouterPlanCreate creates a physical plan for given query. The created plan is - * either a modify task that changes a single shard, or a router task that returns - * query results from a single shard. Supported modify queries (insert/update/delete) - * are router plannable by default. If query is not router plannable then the function - * returns NULL. + * MultiRouterPlanCreate creates a multi plan for the queries + * that includes the following: + * (i) modification queries that hit a single shard + * (ii) select queries hat can be executed on a single worker + * node and does not require any operations on the master node. + * (iii) INSERT INTO .... SELECT queries + * + * The function returns NULL if it cannot create the plan for SELECT + * queries and errors out if it cannot plan the modify queries. */ MultiPlan * MultiRouterPlanCreate(Query *originalQuery, Query *query, MultiExecutorType taskExecutorType, RelationRestrictionContext *restrictionContext) { - Task *task = NULL; - Job *job = NULL; MultiPlan *multiPlan = NULL; - CmdType commandType = query->commandType; - bool modifyTask = false; - List *placementList = NIL; bool routerPlannable = MultiRouterPlannableQuery(query, taskExecutorType, restrictionContext); @@ -119,6 +156,42 @@ MultiRouterPlanCreate(Query *originalQuery, Query *query, return NULL; } + if (InsertSelectQuery(originalQuery)) + { + multiPlan = CreateInsertSelectRouterPlan(originalQuery, restrictionContext); + } + else + { + multiPlan = CreateSingleTaskRouterPlan(originalQuery, query, restrictionContext); + } + + /* plans created by router planner are always router executable */ + if (multiPlan != NULL) + { + multiPlan->routerExecutable = true; + } + + return multiPlan; +} + + +/* + * CreateSingleTaskRouterPlan creates a physical plan for given query. The created plan is + * either a modify task that changes a single shard, or a router task that returns + * query results from a single worker. Supported modify queries (insert/update/delete) + * are router plannable by default. If query is not router plannable then the function + * returns NULL. + */ +static MultiPlan * +CreateSingleTaskRouterPlan(Query *originalQuery, Query *query, + RelationRestrictionContext *restrictionContext) +{ + CmdType commandType = query->commandType; + bool modifyTask = false; + Job *job = NULL; + Task *task = NULL; + List *placementList = NIL; + MultiPlan *multiPlan = NULL; if (commandType == CMD_INSERT || commandType == CMD_UPDATE || commandType == CMD_DELETE) @@ -135,7 +208,7 @@ MultiRouterPlanCreate(Query *originalQuery, Query *query, { Assert(commandType == CMD_SELECT); - task = RouterSelectTask(originalQuery, query, restrictionContext, &placementList); + task = RouterSelectTask(originalQuery, restrictionContext, &placementList); } if (task == NULL) @@ -156,6 +229,709 @@ MultiRouterPlanCreate(Query *originalQuery, Query *query, } +/* + * Creates a router plan for INSERT ... SELECT queries which could consists of + * multiple tasks. + * + * The function never returns NULL, it errors out if cannot create the multi plan. + */ +static MultiPlan * +CreateInsertSelectRouterPlan(Query *originalQuery, + RelationRestrictionContext *restrictionContext) +{ + int shardOffset = 0; + List *sqlTaskList = NIL; + uint32 taskIdIndex = 1; /* 0 is reserved for invalid taskId */ + Job *workerJob = NULL; + uint64 jobId = INVALID_JOB_ID; + MultiPlan *multiPlan = NULL; + RangeTblEntry *insertRte = ExtractInsertRangeTableEntry(originalQuery); + RangeTblEntry *subqueryRte = ExtractSelectRangeTableEntry(originalQuery); + Oid targetRelationId = insertRte->relid; + DistTableCacheEntry *targetCacheEntry = DistributedTableCacheEntry(targetRelationId); + int shardCount = targetCacheEntry->shardIntervalArrayLength; + + /* + * Error semantics for INSERT ... SELECT queries are different than regular + * modify queries. Thus, handle separately. + */ + ErrorIfInsertSelectQueryNotSupported(originalQuery, insertRte, subqueryRte); + + /* + * Plan select query for each shard in the target table. Do so by replacing the + * partitioning qual parameter added in multi_planner() using the current shard's + * actual boundary values. Also, add the current shard's boundary values to the + * top level subquery to ensure that even if the partitioning qual is not distributed + * to all the tables, we never run the queries on the shards that don't match with + * the current shard boundaries. Finally, perform the normal shard pruning to + * decide on whether to push the query to the current shard or not. + */ + for (shardOffset = 0; shardOffset < shardCount; shardOffset++) + { + ShardInterval *targetShardInterval = + targetCacheEntry->sortedShardIntervalArray[shardOffset]; + Task *modifyTask = NULL; + + modifyTask = RouterModifyTaskForShardInterval(originalQuery, targetShardInterval, + restrictionContext, taskIdIndex); + + /* add the task if it could be created */ + if (modifyTask != NULL) + { + modifyTask->insertSelectQuery = true; + + sqlTaskList = lappend(sqlTaskList, modifyTask); + } + + ++taskIdIndex; + } + + /* Create the worker job */ + workerJob = CitusMakeNode(Job); + workerJob->taskList = sqlTaskList; + workerJob->subqueryPushdown = false; + workerJob->dependedJobList = NIL; + workerJob->jobId = jobId; + workerJob->jobQuery = originalQuery; + + /* for now we do not support any function evaluation */ + workerJob->requiresMasterEvaluation = false; + + /* and finally the multi plan */ + multiPlan = CitusMakeNode(MultiPlan); + multiPlan->workerJob = workerJob; + multiPlan->masterTableName = NULL; + multiPlan->masterQuery = NULL; + + return multiPlan; +} + + +/* + * RouterModifyTaskForShardInterval creates a modify task by + * replacing the partitioning qual parameter added in multi_planner() + * with the shardInterval's boundary value. Then perform the normal + * shard pruning on the subquery. Finally, checks if the target shardInterval + * has exactly same placements with the select task's available anchor + * placements. + * + * The function errors out if the subquery is not router select query (i.e., + * subqueries with non equi-joins.). + */ +static Task * +RouterModifyTaskForShardInterval(Query *originalQuery, ShardInterval *shardInterval, + RelationRestrictionContext *restrictionContext, + uint32 taskIdIndex) +{ + Query *copiedQuery = copyObject(originalQuery); + RangeTblEntry *copiedInsertRte = ExtractInsertRangeTableEntry(copiedQuery); + RangeTblEntry *copiedSubqueryRte = ExtractSelectRangeTableEntry(copiedQuery); + Query *copiedSubquery = (Query *) copiedSubqueryRte->subquery; + + uint64 shardId = shardInterval->shardId; + Oid distributedTableId = shardInterval->relationId; + + RelationRestrictionContext *copiedRestrictionContext = + CopyRelationRestrictionContext(restrictionContext); + + StringInfo queryString = makeStringInfo(); + ListCell *restrictionCell = NULL; + Task *modifyTask = NULL; + List *selectPlacementList = NIL; + uint64 selectAnchorShardId = INVALID_SHARD_ID; + List *selectShardList = NIL; + uint64 jobId = INVALID_JOB_ID; + List *insertShardPlacementList = NULL; + List *intersectedPlacementList = NULL; + bool routerPlannable = false; + bool upsertQuery = false; + + /* grab shared metadata lock to stop concurrent placement additions */ + LockShardDistributionMetadata(shardId, ShareLock); + + /* + * Replace the partitioning qual parameter value in all baserestrictinfos. + * Note that this has to be done on a copy, as the walker modifies in place. + */ + foreach(restrictionCell, copiedRestrictionContext->relationRestrictionList) + { + RelationRestriction *restriction = lfirst(restrictionCell); + List *originalBaserestrictInfo = restriction->relOptInfo->baserestrictinfo; + + originalBaserestrictInfo = + (List *) InstantiatePartitionQual((Node *) originalBaserestrictInfo, + shardInterval); + } + + /* + * We also need to add shard interval range to the subquery in case + * the partition qual not distributed all tables such as some + * subqueries in WHERE clause. + * + * Note that we need to add the ranges before the shard pruning to + * prevent shard pruning logic (i.e, namely UpdateRelationNames()) + * modifies range table entries, which makes hard to add the quals. + */ + AddShardIntervalRestrictionToSelect(copiedSubquery, shardInterval); + + /* + * Use router select planner to decide on whether we can push down the query + * or not. If we can, we also rely on the side-effects that all RTEs have been + * updated to point to the relevant nodes and selectPlacementList is determined. + */ + routerPlannable = RouterSelectQuery(copiedSubquery, copiedRestrictionContext, + &selectPlacementList, &selectAnchorShardId, + &selectShardList); + + if (!routerPlannable) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot perform distributed planning for the given " + "modification"), + errdetail("Select query cannot be pushed down to the worker."))); + } + + /* Ensure that we have INSERTed table's placement exists on the same worker */ + insertShardPlacementList = FinalizedShardPlacementList(shardId); + intersectedPlacementList = IntersectPlacementList(insertShardPlacementList, + selectPlacementList); + + if (list_length(insertShardPlacementList) != list_length(intersectedPlacementList)) + { + ereport(DEBUG2, (errmsg("could not generate task for target shardId: %ld", + shardId), + errdetail("Insert query hits %d placements, Select query " + "hits %d placements and only %d of those placements match.", + list_length(insertShardPlacementList), + list_length(selectPlacementList), + list_length(intersectedPlacementList)))); + + return NULL; + } + + /* this is required for correct deparsing of the query */ + ReorderInsertSelectTargetLists(copiedQuery, copiedInsertRte, copiedSubqueryRte); + + /* set the upsert flag */ + if (originalQuery->onConflict != NULL) + { + upsertQuery = true; + } + + /* setting an alias simplifies deparsing of RETURNING */ + if (copiedInsertRte->alias == NULL) + { + Alias *alias = makeAlias(CITUS_TABLE_ALIAS, NIL); + copiedInsertRte->alias = alias; + } + + /* and generate the full query string */ + deparse_shard_query(copiedQuery, distributedTableId, shardInterval->shardId, + queryString); + ereport(DEBUG4, (errmsg("distributed statement: %s", queryString->data))); + + modifyTask = CreateBasicTask(jobId, taskIdIndex, MODIFY_TASK, queryString->data); + modifyTask->dependedTaskList = NULL; + modifyTask->anchorShardId = shardId; + modifyTask->taskPlacementList = insertShardPlacementList; + modifyTask->upsertQuery = upsertQuery; + modifyTask->selectShardList = selectShardList; + + return modifyTask; +} + + +/* + * AddShardIntervalRestrictionToSelect adds the following range boundaries + * with the given subquery and shardInterval: + * + * hashfunc(partitionColumn) >= $lower_bound AND + * hashfunc(partitionColumn) <= $upper_bound + * + * The function expects and asserts that subquery's target list contains a partition + * column value. + */ +static void +AddShardIntervalRestrictionToSelect(Query *subqery, ShardInterval *shardInterval) +{ + List *targetList = subqery->targetList; + ListCell *targetEntryCell = NULL; + Var *targetPartitionColumnVar = NULL; + Oid integer4GEoperatorId = InvalidOid; + Oid integer4LEoperatorId = InvalidOid; + TypeCacheEntry *typeEntry = NULL; + FuncExpr *hashFunctionExpr = NULL; + OpExpr *greaterThanAndEqualsBoundExpr = NULL; + OpExpr *lessThanAndEqualsBoundExpr = NULL; + List *boundExpressionList = NIL; + Expr *andedBoundExpressions = NULL; + + /* iterate through the target entries */ + foreach(targetEntryCell, targetList) + { + TargetEntry *targetEntry = lfirst(targetEntryCell); + + if (IsPartitionColumnRecursive(targetEntry->expr, subqery) && + IsA(targetEntry->expr, Var)) + { + targetPartitionColumnVar = (Var *) targetEntry->expr; + break; + } + } + + /* we should have found target partition column */ + Assert(targetPartitionColumnVar != NULL); + + integer4GEoperatorId = get_opfamily_member(INTEGER_BTREE_FAM_OID, INT4OID, + INT4OID, + BTGreaterEqualStrategyNumber); + integer4LEoperatorId = get_opfamily_member(INTEGER_BTREE_FAM_OID, INT4OID, + INT4OID, + BTLessEqualStrategyNumber); + + /* ensure that we find the correct operators */ + Assert(integer4GEoperatorId != InvalidOid); + Assert(integer4LEoperatorId != InvalidOid); + + /* look up the type cache */ + typeEntry = lookup_type_cache(targetPartitionColumnVar->vartype, + TYPECACHE_HASH_PROC_FINFO); + + /* probable never possible given that the tables are already hash partitioned */ + if (!OidIsValid(typeEntry->hash_proc_finfo.fn_oid)) + { + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify a hash function for type %s", + format_type_be(targetPartitionColumnVar->vartype)))); + } + + /* generate hashfunc(partCol) expression */ + hashFunctionExpr = makeNode(FuncExpr); + hashFunctionExpr->funcid = typeEntry->hash_proc_finfo.fn_oid; + hashFunctionExpr->args = list_make1(targetPartitionColumnVar); + + /* hash functions always return INT4 */ + hashFunctionExpr->funcresulttype = INT4OID; + + /* generate hashfunc(partCol) >= shardMinValue OpExpr */ + greaterThanAndEqualsBoundExpr = + (OpExpr *) make_opclause(integer4GEoperatorId, + InvalidOid, false, + (Expr *) hashFunctionExpr, + (Expr *) MakeInt4Constant(shardInterval->minValue), + targetPartitionColumnVar->varcollid, + targetPartitionColumnVar->varcollid); + + /* update the operators with correct operator numbers and function ids */ + greaterThanAndEqualsBoundExpr->opfuncid = + get_opcode(greaterThanAndEqualsBoundExpr->opno); + greaterThanAndEqualsBoundExpr->opresulttype = + get_func_rettype(greaterThanAndEqualsBoundExpr->opfuncid); + + /* generate hashfunc(partCol) <= shardMinValue OpExpr */ + lessThanAndEqualsBoundExpr = + (OpExpr *) make_opclause(integer4LEoperatorId, + InvalidOid, false, + (Expr *) hashFunctionExpr, + (Expr *) MakeInt4Constant(shardInterval->maxValue), + targetPartitionColumnVar->varcollid, + targetPartitionColumnVar->varcollid); + + /* update the operators with correct operator numbers and function ids */ + lessThanAndEqualsBoundExpr->opfuncid = get_opcode(lessThanAndEqualsBoundExpr->opno); + lessThanAndEqualsBoundExpr->opresulttype = + get_func_rettype(lessThanAndEqualsBoundExpr->opfuncid); + + /* finally add the operators to a list and make them explicitly anded */ + boundExpressionList = lappend(boundExpressionList, greaterThanAndEqualsBoundExpr); + boundExpressionList = lappend(boundExpressionList, lessThanAndEqualsBoundExpr); + + andedBoundExpressions = make_ands_explicit(boundExpressionList); + + /* finally add the quals */ + if (subqery->jointree->quals == NULL) + { + subqery->jointree->quals = (Node *) andedBoundExpressions; + } + else + { + subqery->jointree->quals = make_and_qual(subqery->jointree->quals, + (Node *) andedBoundExpressions); + } +} + + +/* + * ExtractSelectRangeTableEntry returns the range table entry of the subquery. + * Note that the function expects and asserts that the input query be + * an INSERT...SELECT query. + */ +static RangeTblEntry * +ExtractSelectRangeTableEntry(Query *query) +{ + List *fromList = NULL; + RangeTblRef *reference = NULL; + RangeTblEntry *subqueryRte = NULL; + + Assert(InsertSelectQuery(query)); + + /* since we already asserted InsertSelectQuery() it is safe to access both lists */ + fromList = query->jointree->fromlist; + reference = linitial(fromList); + subqueryRte = rt_fetch(reference->rtindex, query->rtable); + + return subqueryRte; +} + + +/* + * ExtractInsertRangeTableEntry returns the INSERT'ed table's range table entry. + * Note that the function expects and asserts that the input query be + * an INSERT...SELECT query. + */ +static RangeTblEntry * +ExtractInsertRangeTableEntry(Query *query) +{ + int resultRelation = query->resultRelation; + List *rangeTableList = query->rtable; + RangeTblEntry *insertRTE = NULL; + + AssertArg(InsertSelectQuery(query)); + + insertRTE = rt_fetch(resultRelation, rangeTableList); + + return insertRTE; +} + + +/* + * ErrorIfInsertSelectQueryNotSupported errors out for unsupported + * INSERT ... SELECT queries. + */ +static void +ErrorIfInsertSelectQueryNotSupported(Query *queryTree, RangeTblEntry *insertRte, + RangeTblEntry *subqueryRte) +{ + Query *subquery = NULL; + Oid selectPartitionColumnTableId = InvalidOid; + + /* we only do this check for INSERT ... SELECT queries */ + AssertArg(InsertSelectQuery(queryTree)); + + subquery = subqueryRte->subquery; + + if (contain_mutable_functions((Node *) queryTree)) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot perform distributed planning for the given " + "modification"), + errdetail( + "Stable and volatile functions are not allowed in INSERT ... " + "SELECT queries"))); + } + + /* we don't support LIMIT, OFFSET and WINDOW functions */ + ErrorIfMultiTaskRouterSelectQueryUnsupported(subquery); + + /* ensure that INSERT's partition column comes from SELECT's partition column */ + ErrorIfInsertPartitionColumnDoesNotMatchSelect(queryTree, insertRte, subqueryRte, + &selectPartitionColumnTableId); + + /* we expect partition column values come from colocated tables */ + if (!TablesColocated(insertRte->relid, selectPartitionColumnTableId)) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("INSERT target table and the source relation " + "of the SELECT partition column value " + "must be colocated"))); + } +} + + +/* + * ErrorUnsupportedMultiTaskSelectQuery errors out on queries that we support + * for single task router queries, but, cannot allow for multi task router + * queries. We do these checks recursively to prevent any wrong results. + */ +static void +ErrorIfMultiTaskRouterSelectQueryUnsupported(Query *query) +{ + List *queryList = NIL; + ListCell *queryCell = NULL; + + ExtractQueryWalker((Node *) query, &queryList); + foreach(queryCell, queryList) + { + Query *subquery = (Query *) lfirst(queryCell); + + Assert(subquery->commandType == CMD_SELECT); + + /* pushing down limit per shard would yield wrong results */ + if (subquery->limitCount != NULL) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot perform distributed planning for the given " + "modification"), + errdetail("LIMIT clauses are not allowed in " + "INSERT ... SELECT queries"))); + } + + /* pushing down limit offest per shard would yield wrong results */ + if (subquery->limitOffset != NULL) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot perform distributed planning for the given " + "modification"), + errdetail("OFFSET clauses are not allowed in " + "INSERT ... SELECT queries"))); + } + + /* + * We could potentially support window clauses where the data is partitioned + * over distribution column. For simplicity, we currently do not support window + * clauses at all. + */ + if (subquery->windowClause != NULL) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot perform distributed planning for the given " + "modification"), + errdetail("Window functions are not allowed in " + "INSERT ... SELECT queries"))); + } + + /* see comment on AddUninstantiatedPartitionRestriction() */ + if (subquery->setOperations != NULL) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot perform distributed planning for the given " + "modification"), + errdetail("Set operations are not allowed in " + "INSERT ... SELECT queries"))); + } + + /* + * We currently do not support grouping sets since it could generate NULL + * results even after the restrictions are applied to the query. A solution + * would be to add the whole query into a subquery and add the restrictions + * on that subquery. + */ + if (subquery->groupingSets != NULL) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot perform distributed planning for the given " + "modification"), + errdetail("Grouping sets are not allowed in " + "INSERT ... SELECT queries"))); + } + + /* + * We cannot support DISTINCT ON clauses since it could be on a non-partition column. + * In that case, there is no way that Citus can support this. + */ + if (subquery->hasDistinctOn) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot perform distributed planning for the given " + "modification"), + errdetail("DISTINCT ON clauses are not allowed in " + "INSERT ... SELECT queries"))); + } + } +} + + +/* + * ErrorIfInsertPartitionColumnDoesNotMatchSelect checks whether the INSERTed table's + * partition column value matches with the any of the SELECTed table's partition column. + * + * On return without error (i.e., if partition columns match), the function also sets + * selectPartitionColumnTableId. + */ +static void +ErrorIfInsertPartitionColumnDoesNotMatchSelect(Query *query, RangeTblEntry *insertRte, + RangeTblEntry *subqueryRte, + Oid *selectPartitionColumnTableId) +{ + ListCell *targetEntryCell = NULL; + uint32 rangeTableId = 1; + Oid insertRelationId = insertRte->relid; + Var *insertPartitionColumn = PartitionColumn(insertRelationId, rangeTableId); + bool partitionColumnsMatch = false; + Query *subquery = subqueryRte->subquery; + + foreach(targetEntryCell, query->targetList) + { + TargetEntry *targetEntry = (TargetEntry *) lfirst(targetEntryCell); + + if (IsA(targetEntry->expr, Var)) + { + Var *insertVar = (Var *) targetEntry->expr; + AttrNumber originalAttrNo = get_attnum(insertRelationId, + targetEntry->resname); + TargetEntry *subqeryTargetEntry = NULL; + + if (originalAttrNo != insertPartitionColumn->varattno) + { + continue; + } + + subqeryTargetEntry = list_nth(subquery->targetList, + insertVar->varattno - 1); + + if (!IsA(subqeryTargetEntry->expr, Var)) + { + partitionColumnsMatch = false; + break; + } + + if (!IsPartitionColumnRecursive(subqeryTargetEntry->expr, subquery)) + { + partitionColumnsMatch = false; + break; + } + + partitionColumnsMatch = true; + *selectPartitionColumnTableId = subqeryTargetEntry->resorigtbl; + + break; + } + } + + if (!partitionColumnsMatch) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("SELECT query should return bare partition column on " + "the same ordinal position as the INSERT's partition " + "column"))); + } +} + + +/* + * AddUninstantiatedPartitionRestriction() can only be used with + * INSERT ... SELECT queries. + * + * AddUninstantiatedPartitionRestriction adds an equality qual + * to the SELECT query of the given originalQuery. The function currently + * does NOT add the quals if + * (i) Set operations are present on the top level query + * (ii) Target list does not include a bare partition column. + * + * Note that if the input query is not an INSERT .. SELECT the assertion fails. + */ +void +AddUninstantiatedPartitionRestriction(Query *originalQuery) +{ + Query *subquery = NULL; + RangeTblEntry *subqueryEntry = NULL; + ListCell *targetEntryCell = NULL; + Var *targetPartitionColumnVar = NULL; + List *targetList = NULL; + + Assert(InsertSelectQuery(originalQuery)); + + subqueryEntry = ExtractSelectRangeTableEntry(originalQuery); + subquery = subqueryEntry->subquery; + + /* + * We currently not support the subquery with set operations. The main reason is that + * there is an "Assert(parse->jointree->quals == NULL);" on standard planner's execution + * path (i.e., plan_set_operations). + * If we are to add uninstantiated equality qual to the query, we may end up hitting that + * assertion, so it's better not to support for now. + */ + if (subquery->setOperations != NULL) + { + return; + } + + /* iterate through the target list and find the partition column on the target list */ + targetList = subquery->targetList; + foreach(targetEntryCell, targetList) + { + TargetEntry *targetEntry = lfirst(targetEntryCell); + + if (IsPartitionColumnRecursive(targetEntry->expr, subquery) && + IsA(targetEntry->expr, Var)) + { + targetPartitionColumnVar = (Var *) targetEntry->expr; + break; + } + } + + /* + * If we cannot find the bare partition column, no need to add the qual since + * we're already going to error out on the multi planner. + */ + if (!targetPartitionColumnVar) + { + return; + } + + /* finally add the equality qual of target column to subquery */ + AddUninstantiatedEqualityQual(subquery, targetPartitionColumnVar); +} + + +/* + * AddUninstantiatedEqualityQual adds a qual in the following form + * ($1 = partitionColumn) on the input query and partitionColumn. + */ +static void +AddUninstantiatedEqualityQual(Query *query, Var *partitionColumn) +{ + Param *equalityParameter = makeNode(Param); + OpExpr *uninstantiatedEqualityQual = NULL; + Oid partitionColumnCollid = InvalidOid; + Oid lessThanOperator = InvalidOid; + Oid equalsOperator = InvalidOid; + Oid greaterOperator = InvalidOid; + bool hashable = false; + + AssertArg(query->commandType == CMD_SELECT); + + /* get the necessary equality operator */ + get_sort_group_operators(partitionColumn->vartype, false, true, false, + &lessThanOperator, &equalsOperator, &greaterOperator, + &hashable); + + + partitionColumnCollid = partitionColumn->varcollid; + + equalityParameter->paramkind = PARAM_EXTERN; + equalityParameter->paramid = UNINSTANTIATED_PARAMETER_ID; + equalityParameter->paramtype = partitionColumn->vartype; + equalityParameter->paramtypmod = partitionColumn->vartypmod; + equalityParameter->paramcollid = partitionColumnCollid; + equalityParameter->location = -1; + + /* create an equality on the on the target partition column */ + uninstantiatedEqualityQual = (OpExpr *) make_opclause(equalsOperator, InvalidOid, + false, + (Expr *) partitionColumn, + (Expr *) equalityParameter, + partitionColumnCollid, + partitionColumnCollid); + + /* update the operators with correct operator numbers and function ids */ + uninstantiatedEqualityQual->opfuncid = get_opcode(uninstantiatedEqualityQual->opno); + uninstantiatedEqualityQual->opresulttype = + get_func_rettype(uninstantiatedEqualityQual->opfuncid); + + /* add restriction on partition column */ + if (query->jointree->quals == NULL) + { + query->jointree->quals = (Node *) uninstantiatedEqualityQual; + } + else + { + query->jointree->quals = make_and_qual(query->jointree->quals, + (Node *) uninstantiatedEqualityQual); + } +} + + /* * ErrorIfModifyQueryNotSupported checks if the query contains unsupported features, * and errors out if it does. @@ -773,7 +1549,7 @@ RouterModifyTask(Query *originalQuery, Query *query) rangeTableEntry = linitial(originalQuery->rtable); if (rangeTableEntry->alias == NULL) { - Alias *alias = makeAlias(UPSERT_ALIAS, NIL); + Alias *alias = makeAlias(CITUS_TABLE_ALIAS, NIL); rangeTableEntry->alias = alias; } } @@ -1034,17 +1810,57 @@ ExtractInsertPartitionValue(Query *query, Var *partitionColumn) /* RouterSelectTask builds a Task to represent a single shard select query */ static Task * -RouterSelectTask(Query *originalQuery, Query *query, - RelationRestrictionContext *restrictionContext, +RouterSelectTask(Query *originalQuery, RelationRestrictionContext *restrictionContext, List **placementList) { Task *task = NULL; - List *prunedRelationShardList = TargetShardIntervalsForSelect(query, - restrictionContext); + bool queryRoutable = false; StringInfo queryString = makeStringInfo(); - uint64 shardId = INVALID_SHARD_ID; bool upsertQuery = false; - CmdType commandType PG_USED_FOR_ASSERTS_ONLY = query->commandType; + uint64 shardId = INVALID_SHARD_ID; + List *selectShardList = NIL; + + queryRoutable = RouterSelectQuery(originalQuery, restrictionContext, + placementList, &shardId, &selectShardList); + + + if (!queryRoutable) + { + return NULL; + } + + pg_get_query_def(originalQuery, queryString); + + task = CitusMakeNode(Task); + task->jobId = INVALID_JOB_ID; + task->taskId = INVALID_TASK_ID; + task->taskType = ROUTER_TASK; + task->queryString = queryString->data; + task->anchorShardId = shardId; + task->dependedTaskList = NIL; + task->upsertQuery = upsertQuery; + + return task; +} + + +/* + * RouterSelectQuery returns true if the input query can be pushed down to the + * worker node as it is. Otherwise, the function returns false. + * + * On return true, all RTEs have been updated to point to the relevant shards in + * the originalQuery. Also, placementList is filled with the list of worker nodes + * that has all the required shard placements for the query execution. Finally, + * anchorShardId is set to the first pruned shardId of the given query. + */ +static bool +RouterSelectQuery(Query *originalQuery, RelationRestrictionContext *restrictionContext, + List **placementList, uint64 *anchorShardId, List **selectShardList) +{ + List *prunedRelationShardList = TargetShardIntervalsForSelect(originalQuery, + restrictionContext); + uint64 shardId = INVALID_SHARD_ID; + CmdType commandType PG_USED_FOR_ASSERTS_ONLY = originalQuery->commandType; ListCell *prunedRelationShardListCell = NULL; List *workerList = NIL; bool shardsPresent = false; @@ -1053,7 +1869,7 @@ RouterSelectTask(Query *originalQuery, Query *query, if (prunedRelationShardList == NULL) { - return NULL; + return false; } Assert(commandType == CMD_SELECT); @@ -1074,12 +1890,15 @@ RouterSelectTask(Query *originalQuery, Query *query, /* all relations are now pruned down to 0 or 1 shards */ Assert(list_length(prunedShardList) <= 1); + shardInterval = (ShardInterval *) linitial(prunedShardList); + /* anchor shard id */ if (shardId == INVALID_SHARD_ID) { - shardInterval = (ShardInterval *) linitial(prunedShardList); shardId = shardInterval->shardId; } + + *selectShardList = lappend(*selectShardList, shardInterval); } /* @@ -1110,25 +1929,15 @@ RouterSelectTask(Query *originalQuery, Query *query, { ereport(DEBUG2, (errmsg("Found no worker with all shard placements"))); - return NULL; + return false; } UpdateRelationNames((Node *) originalQuery, restrictionContext); - pg_get_query_def(originalQuery, queryString); - - task = CitusMakeNode(Task); - task->jobId = INVALID_JOB_ID; - task->taskId = INVALID_TASK_ID; - task->taskType = ROUTER_TASK; - task->queryString = queryString->data; - task->anchorShardId = shardId; - task->dependedTaskList = NIL; - task->upsertQuery = upsertQuery; - *placementList = workerList; + *anchorShardId = shardId; - return task; + return true; } @@ -1562,20 +2371,20 @@ MultiRouterPlannableQuery(Query *query, MultiExecutorType taskExecutorType, /* - * ReorderInsertSelectTargetListsIfExists reorders the target lists of INSERT/SELECT + * ReorderInsertSelectTargetLists reorders the target lists of INSERT/SELECT * query which is required for deparsing purposes. The reordered query is returned. * - * The necessity for this function comes from the fact that ruleutils.c is not supposed to be - * used on "rewritten" queries (i.e. ones that have been passed through QueryRewrite()). - * Query rewriting is the process in which views and such are expanded, + * The necessity for this function comes from the fact that ruleutils.c is not supposed + * to be used on "rewritten" queries (i.e. ones that have been passed through + * QueryRewrite()). Query rewriting is the process in which views and such are expanded, * and, INSERT/UPDATE targetlists are reordered to match the physical order, - * defaults etc. For the details of reordeing, see transformInsertRow(). + * defaults etc. For the details of reordeing, see transformInsertRow() and + * rewriteTargetListIU(). */ Query * -ReorderInsertSelectTargetListsIfExists(Query *originalQuery) +ReorderInsertSelectTargetLists(Query *originalQuery, RangeTblEntry *insertRte, + RangeTblEntry *subqueryRte) { - RangeTblEntry *insertRte = NULL; - RangeTblEntry *subqueryRte = NULL; Query *subquery = NULL; ListCell *insertTargetEntryCell; List *newSubqueryTargetlist = NIL; @@ -1586,14 +2395,8 @@ ReorderInsertSelectTargetListsIfExists(Query *originalQuery) Oid insertRelationId = InvalidOid; int subqueryTargetLength = 0; - /* we only apply the reording for INSERT ... SELECT queries */ - if (!InsertSelectQuery(originalQuery)) - { - return originalQuery; - } + AssertArg(InsertSelectQuery(originalQuery)); - insertRte = linitial(originalQuery->rtable); - subqueryRte = lsecond(originalQuery->rtable); subquery = subqueryRte->subquery; insertRelationId = insertRte->relid; @@ -1634,9 +2437,15 @@ ReorderInsertSelectTargetListsIfExists(Query *originalQuery) * It is safe to pull Var clause and ignore the coercions since that * are already going to be added on the workers implicitly. */ +#if (PG_VERSION_NUM >= 90600) + targetVarList = pull_var_clause((Node *) oldInsertTargetEntry->expr, + PVC_RECURSE_AGGREGATES); +#else targetVarList = pull_var_clause((Node *) oldInsertTargetEntry->expr, PVC_RECURSE_AGGREGATES, PVC_RECURSE_PLACEHOLDERS); +#endif + targetVarCount = list_length(targetVarList); /* a single INSERT target entry cannot have more than one Var */ @@ -1667,12 +2476,9 @@ ReorderInsertSelectTargetListsIfExists(Query *originalQuery) } newInsertVar = makeVar(insertTableId, originalAttrNo, - exprType( - (Node *) newSubqueryTargetEntry->expr), - exprTypmod( - (Node *) newSubqueryTargetEntry->expr), - exprCollation( - (Node *) newSubqueryTargetEntry->expr), + exprType((Node *) newSubqueryTargetEntry->expr), + exprTypmod((Node *) newSubqueryTargetEntry->expr), + exprCollation((Node *) newSubqueryTargetEntry->expr), 0); newInsertTargetEntry = makeTargetEntry((Expr *) newInsertVar, originalAttrNo, oldInsertTargetEntry->resname, @@ -1717,32 +2523,228 @@ ReorderInsertSelectTargetListsIfExists(Query *originalQuery) /* * InsertSelectQuery returns true when the input query * is INSERT INTO ... SELECT kind of query. + * + * Note that the input query should be the original parsetree of + * the query (i.e., not passed trough the standard planner). + * + * This function is inspired from getInsertSelectQuery() on + * rewrite/rewriteManip.c. */ -static bool +bool InsertSelectQuery(Query *query) { CmdType commandType = query->commandType; - List *rangeTableList = query->rtable; + List *fromList = NULL; + RangeTblRef *rangeTableReference = NULL; RangeTblEntry *subqueryRte = NULL; - Query *subquery = NULL; if (commandType != CMD_INSERT) { return false; } - rangeTableList = query->rtable; - if (list_length(rangeTableList) < 2) + if (query->jointree == NULL || !IsA(query->jointree, FromExpr)) { return false; } - subqueryRte = lsecond(query->rtable); - subquery = subqueryRte->subquery; - if (subquery == NULL) + fromList = query->jointree->fromlist; + if (list_length(fromList) != 1) { return false; } + rangeTableReference = linitial(fromList); + Assert(IsA(rangeTableReference, RangeTblRef)); + + subqueryRte = rt_fetch(rangeTableReference->rtindex, query->rtable); + if (subqueryRte->rtekind != RTE_SUBQUERY) + { + return false; + } + + /* ensure that there is a query */ + Assert(IsA(subqueryRte->subquery, Query)); + return true; } + + +/* + * Copy a RelationRestrictionContext. Note that several subfields are copied + * shallowly, for lack of copyObject support. + * + * Note that CopyRelationRestrictionContext copies the following fields per relation + * context: index, relationId, distributedRelation, rte, relOptInfo->baserestrictinfo, + * relOptInfo->joininfo and prunedShardIntervalList. Also, the function shallowly copies + * plannerInfo which is read-only. All other parts of the relOptInfo is also shallowly + * copied. + */ +static RelationRestrictionContext * +CopyRelationRestrictionContext(RelationRestrictionContext *oldContext) +{ + RelationRestrictionContext *newContext = (RelationRestrictionContext *) + palloc(sizeof(RelationRestrictionContext)); + ListCell *relationRestrictionCell = NULL; + + newContext->hasDistributedRelation = oldContext->hasDistributedRelation; + newContext->hasLocalRelation = oldContext->hasLocalRelation; + newContext->relationRestrictionList = NIL; + + foreach(relationRestrictionCell, oldContext->relationRestrictionList) + { + RelationRestriction *oldRestriction = + (RelationRestriction *) lfirst(relationRestrictionCell); + RelationRestriction *newRestriction = (RelationRestriction *) + palloc0(sizeof(RelationRestriction)); + + newRestriction->index = oldRestriction->index; + newRestriction->relationId = oldRestriction->relationId; + newRestriction->distributedRelation = oldRestriction->distributedRelation; + newRestriction->rte = copyObject(oldRestriction->rte); + + /* can't be copied, we copy (flatly) a RelOptInfo, and then decouple baserestrictinfo */ + newRestriction->relOptInfo = palloc(sizeof(RelOptInfo)); + memcpy(newRestriction->relOptInfo, oldRestriction->relOptInfo, + sizeof(RelOptInfo)); + + newRestriction->relOptInfo->baserestrictinfo = + copyObject(oldRestriction->relOptInfo->baserestrictinfo); + + newRestriction->relOptInfo->joininfo = + copyObject(oldRestriction->relOptInfo->joininfo); + + /* not copyable, but readonly */ + newRestriction->plannerInfo = oldRestriction->plannerInfo; + newRestriction->prunedShardIntervalList = + copyObject(oldRestriction->prunedShardIntervalList); + + newContext->relationRestrictionList = + lappend(newContext->relationRestrictionList, newRestriction); + } + + return newContext; +} + + +/* + * InstantiatePartitionQual replaces the "uninstantiated" partition + * restriction clause with the current shard's (passed in context) + * boundary value. + * + * Once we see ($1 = partition column), we replace it with + * (partCol >= shardMinValue && partCol <= shardMaxValue). + */ +static Node * +InstantiatePartitionQual(Node *node, void *context) +{ + ShardInterval *shardInterval = (ShardInterval *) context; + Assert(shardInterval->minValueExists); + Assert(shardInterval->maxValueExists); + + if (node == NULL) + { + return NULL; + } + + /* + * Look for operator expressions with two arguments. + * + * Once Found the uninstantiate, replace with appropriate boundaries for the + * current shard interval. + * + * The boundaries are replaced in the following manner: + * (partCol >= shardMinValue && partCol <= shardMaxValue) + */ + if (IsA(node, OpExpr) && list_length(((OpExpr *) node)->args) == 2) + { + OpExpr *op = (OpExpr *) node; + Node *leftop = get_leftop((Expr *) op); + Node *rightop = get_rightop((Expr *) op); + Param *param = NULL; + + Var *hashedGEColumn = NULL; + OpExpr *hashedGEOpExpr = NULL; + Datum shardMinValue = shardInterval->minValue; + + Var *hashedLEColumn = NULL; + OpExpr *hashedLEOpExpr = NULL; + Datum shardMaxValue = shardInterval->maxValue; + + List *hashedOperatorList = NIL; + + Oid integer4GEoperatorId = InvalidOid; + Oid integer4LEoperatorId = InvalidOid; + + /* look for the Params */ + if (IsA(leftop, Param)) + { + param = (Param *) leftop; + } + else if (IsA(rightop, Param)) + { + param = (Param *) rightop; + } + + /* not an interesting param for our purpose, so return */ + if (!(param && param->paramid == UNINSTANTIATED_PARAMETER_ID)) + { + return node; + } + + /* get the integer >=, <= operators from the catalog */ + integer4GEoperatorId = get_opfamily_member(INTEGER_BTREE_FAM_OID, INT4OID, + INT4OID, + BTGreaterEqualStrategyNumber); + integer4LEoperatorId = get_opfamily_member(INTEGER_BTREE_FAM_OID, INT4OID, + INT4OID, + BTLessEqualStrategyNumber); + + /* generate hashed columns */ + hashedGEColumn = MakeInt4Column(); + hashedLEColumn = MakeInt4Column(); + + /* generate the necessary operators */ + hashedGEOpExpr = (OpExpr *) make_opclause(integer4GEoperatorId, + InvalidOid, false, + (Expr *) hashedGEColumn, + (Expr *) MakeInt4Constant( + shardMinValue), + InvalidOid, InvalidOid); + + hashedLEOpExpr = (OpExpr *) make_opclause(integer4LEoperatorId, + InvalidOid, false, + (Expr *) hashedLEColumn, + (Expr *) MakeInt4Constant( + shardMaxValue), + InvalidOid, InvalidOid); + + /* update the operators with correct operator numbers and function ids */ + hashedGEOpExpr->opfuncid = get_opcode(hashedGEOpExpr->opno); + hashedGEOpExpr->opresulttype = get_func_rettype(hashedGEOpExpr->opfuncid); + + hashedLEOpExpr->opfuncid = get_opcode(hashedLEOpExpr->opno); + hashedLEOpExpr->opresulttype = get_func_rettype(hashedLEOpExpr->opfuncid); + + /* finally add the hashed operators to a list and return it */ + hashedOperatorList = lappend(hashedOperatorList, hashedGEOpExpr); + hashedOperatorList = lappend(hashedOperatorList, hashedLEOpExpr); + + return (Node *) hashedOperatorList; + } + + /* ensure that it is not a query */ + Assert(!IsA(node, Query)); + + /* recurse into restrict info */ + if (IsA(node, RestrictInfo)) + { + RestrictInfo *restrictInfo = (RestrictInfo *) node; + restrictInfo->clause = (Expr *) InstantiatePartitionQual( + (Node *) restrictInfo->clause, context); + + return (Node *) restrictInfo; + } + + return expression_tree_mutator(node, InstantiatePartitionQual, context); +} diff --git a/src/backend/distributed/test/deparse_shard_query.c b/src/backend/distributed/test/deparse_shard_query.c index 4d586d3a0..883f59258 100644 --- a/src/backend/distributed/test/deparse_shard_query.c +++ b/src/backend/distributed/test/deparse_shard_query.c @@ -59,7 +59,16 @@ deparse_shard_query_test(PG_FUNCTION_ARGS) Query *query = lfirst(queryTreeCell); StringInfo buffer = makeStringInfo(); - ReorderInsertSelectTargetListsIfExists(query); + /* reoreder the target list only for INSERT .. SELECT queries */ + if (InsertSelectQuery(query)) + { + RangeTblEntry *insertRte = linitial(query->rtable); + RangeTblEntry *subqueryRte = lsecond(query->rtable); + + + ReorderInsertSelectTargetLists(query, insertRte, subqueryRte); + } + deparse_shard_query(query, InvalidOid, 0, buffer); elog(INFO, "query: %s", buffer->data); diff --git a/src/backend/distributed/test/test_helper_functions.c b/src/backend/distributed/test/test_helper_functions.c index e98a1f7f3..43f1a8fcb 100644 --- a/src/backend/distributed/test/test_helper_functions.c +++ b/src/backend/distributed/test/test_helper_functions.c @@ -1,6 +1,6 @@ /*------------------------------------------------------------------------- * - * le * test/src/test_helper_functions.c + * test/src/test_helper_functions.c * * This file contains helper functions used in many Citus tests. * diff --git a/src/backend/distributed/utils/citus_outfuncs.c b/src/backend/distributed/utils/citus_outfuncs.c index 49b41db27..b4006db91 100644 --- a/src/backend/distributed/utils/citus_outfuncs.c +++ b/src/backend/distributed/utils/citus_outfuncs.c @@ -277,6 +277,7 @@ OutMultiPlan(OUTFUNC_ARGS) WRITE_NODE_FIELD(workerJob); WRITE_NODE_FIELD(masterQuery); WRITE_STRING_FIELD(masterTableName); + WRITE_BOOL_FIELD(routerExecutable); } @@ -493,6 +494,8 @@ OutTask(OUTFUNC_ARGS) WRITE_BOOL_FIELD(assignmentConstrained); WRITE_NODE_FIELD(taskExecution); WRITE_BOOL_FIELD(upsertQuery); + WRITE_BOOL_FIELD(insertSelectQuery); + WRITE_NODE_FIELD(selectShardList); } #if (PG_VERSION_NUM < 90600) diff --git a/src/backend/distributed/utils/citus_readfuncs.c b/src/backend/distributed/utils/citus_readfuncs.c index 256dc20a2..8d647d3f7 100644 --- a/src/backend/distributed/utils/citus_readfuncs.c +++ b/src/backend/distributed/utils/citus_readfuncs.c @@ -184,6 +184,7 @@ ReadMultiPlan(READFUNC_ARGS) READ_NODE_FIELD(workerJob); READ_NODE_FIELD(masterQuery); READ_STRING_FIELD(masterTableName); + READ_BOOL_FIELD(routerExecutable); READ_DONE(); } @@ -289,6 +290,8 @@ ReadTask(READFUNC_ARGS) READ_BOOL_FIELD(assignmentConstrained); READ_NODE_FIELD(taskExecution); READ_BOOL_FIELD(upsertQuery); + READ_BOOL_FIELD(insertSelectQuery); + READ_NODE_FIELD(selectShardList); READ_DONE(); } diff --git a/src/include/distributed/multi_logical_optimizer.h b/src/include/distributed/multi_logical_optimizer.h index 3b840f935..810cc31bd 100644 --- a/src/include/distributed/multi_logical_optimizer.h +++ b/src/include/distributed/multi_logical_optimizer.h @@ -122,6 +122,7 @@ extern bool ExtractQueryWalker(Node *node, List **queryList); extern bool LeafQuery(Query *queryTree); extern List * PartitionColumnOpExpressionList(Query *query); extern List * ReplaceColumnsInOpExpressionList(List *opExpressionList, Var *newColumn); +extern bool IsPartitionColumnRecursive(Expr *columnExpression, Query *query); #endif /* MULTI_LOGICAL_OPTIMIZER_H */ diff --git a/src/include/distributed/multi_physical_planner.h b/src/include/distributed/multi_physical_planner.h index 3d12fa3aa..325c6513d 100644 --- a/src/include/distributed/multi_physical_planner.h +++ b/src/include/distributed/multi_physical_planner.h @@ -169,6 +169,9 @@ typedef struct Task uint64 shardId; /* only applies to shard fetch tasks */ TaskExecution *taskExecution; /* used by task tracker executor */ bool upsertQuery; /* only applies to modify tasks */ + + bool insertSelectQuery; + List *selectShardList; /* only applies INSERT/SELECT tasks */ } Task; @@ -205,6 +208,7 @@ typedef struct MultiPlan Job *workerJob; Query *masterQuery; char *masterTableName; + bool routerExecutable; } MultiPlan; @@ -227,6 +231,8 @@ extern int TaskAssignmentPolicy; /* Function declarations for building physical plans and constructing queries */ extern MultiPlan * MultiPhysicalPlanCreate(MultiTreeRoot *multiTree); extern StringInfo ShardFetchQueryString(uint64 shardId); +extern Task * CreateBasicTask(uint64 jobId, uint32 taskId, TaskType taskType, + char *queryString); /* Function declarations for shard pruning */ extern List * PruneShardList(Oid relationId, Index tableId, List *whereClauseList, @@ -243,9 +249,10 @@ extern void UpdateConstraint(Node *baseConstraint, ShardInterval *shardInterval) extern bool SimpleOpExpression(Expr *clause); extern bool OpExpressionContainsColumn(OpExpr *operatorExpression, Var *partitionColumn); +/* helper functions */ +extern Var * MakeInt4Column(void); +extern Const * MakeInt4Constant(Datum constantValue); extern int CompareShardPlacements(const void *leftElement, const void *rightElement); - -/* Function declarations for sorting shards. */ extern bool ShardIntervalsOverlap(ShardInterval *firstInterval, ShardInterval *secondInterval); diff --git a/src/include/distributed/multi_router_executor.h b/src/include/distributed/multi_router_executor.h index 3abbef272..fd3cc1c5e 100644 --- a/src/include/distributed/multi_router_executor.h +++ b/src/include/distributed/multi_router_executor.h @@ -33,7 +33,7 @@ typedef struct XactShardConnSet extern bool AllModificationsCommutative; -extern void RouterExecutorStart(QueryDesc *queryDesc, int eflags, Task *task); +extern void RouterExecutorStart(QueryDesc *queryDesc, int eflags); extern void RouterExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count); extern void RouterExecutorFinish(QueryDesc *queryDesc); extern void RouterExecutorEnd(QueryDesc *queryDesc); diff --git a/src/include/distributed/multi_router_planner.h b/src/include/distributed/multi_router_planner.h index 5d380575a..343f60ed7 100644 --- a/src/include/distributed/multi_router_planner.h +++ b/src/include/distributed/multi_router_planner.h @@ -21,14 +21,21 @@ #include "nodes/parsenodes.h" +/* reserved parameted id, we chose a negative number since it is not assigned by postgres */ +#define UNINSTANTIATED_PARAMETER_ID INT_MIN + /* reserved alias name for UPSERTs */ -#define UPSERT_ALIAS "citus_table_alias" +#define CITUS_TABLE_ALIAS "citus_table_alias" extern MultiPlan * MultiRouterPlanCreate(Query *originalQuery, Query *query, MultiExecutorType taskExecutorType, RelationRestrictionContext *restrictionContext); +extern void AddUninstantiatedPartitionRestriction(Query *originalQuery); extern void ErrorIfModifyQueryNotSupported(Query *queryTree); -extern Query * ReorderInsertSelectTargetListsIfExists(Query *originalQuery); +extern Query * ReorderInsertSelectTargetLists(Query *originalQuery, + RangeTblEntry *insertRte, + RangeTblEntry *subqueryRte); +extern bool InsertSelectQuery(Query *query); #endif /* MULTI_ROUTER_PLANNER_H */ diff --git a/src/include/distributed/multi_server_executor.h b/src/include/distributed/multi_server_executor.h index 741829857..9067125bf 100644 --- a/src/include/distributed/multi_server_executor.h +++ b/src/include/distributed/multi_server_executor.h @@ -191,7 +191,6 @@ extern void MultiRealTimeExecute(Job *job); extern void MultiTaskTrackerExecute(Job *job); /* Function declarations common to more than one executor */ -extern bool RouterExecutablePlan(MultiPlan *multiPlan, MultiExecutorType executorType); extern MultiExecutorType JobExecutorType(MultiPlan *multiPlan); extern void RemoveJobDirectory(uint64 jobId); extern TaskExecution * InitTaskExecution(Task *task, TaskExecStatus initialStatus); diff --git a/src/test/regress/expected/multi_insert_select.out b/src/test/regress/expected/multi_insert_select.out new file mode 100644 index 000000000..f3f067f7c --- /dev/null +++ b/src/test/regress/expected/multi_insert_select.out @@ -0,0 +1,1477 @@ +-- +-- MULTI_INSERT_SELECT +-- +ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 13300000; +ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 13300000; +-- create co-located tables +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 2; +CREATE TABLE raw_events_first (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1)); +SELECT create_distributed_table('raw_events_first', 'user_id'); + create_distributed_table +-------------------------- + +(1 row) + +CREATE TABLE raw_events_second (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1)); +SELECT create_distributed_table('raw_events_second', 'user_id'); + create_distributed_table +-------------------------- + +(1 row) + +CREATE TABLE agg_events (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp, UNIQUE(user_id, value_1_agg)); +SELECT create_distributed_table('agg_events', 'user_id');; + create_distributed_table +-------------------------- + +(1 row) + +-- create the reference table as well +CREATE TABLE reference_table (user_id int); +SELECT create_reference_table('reference_table'); + create_reference_table +------------------------ + +(1 row) + +-- set back to the defaults +SET citus.shard_count = DEFAULT; +SET citus.shard_replication_factor = DEFAULT; +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (1, now(), 10, 100, 1000.1, 10000); +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (2, now(), 20, 200, 2000.1, 20000); +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (3, now(), 30, 300, 3000.1, 30000); +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (4, now(), 40, 400, 4000.1, 40000); +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (5, now(), 50, 500, 5000.1, 50000); +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (6, now(), 60, 600, 6000.1, 60000); +SET client_min_messages TO DEBUG4; +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +-- raw table to raw table +INSERT INTO raw_events_second SELECT * FROM raw_events_first; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300004 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.raw_events_first_13300000 raw_events_first WHERE ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300005 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.raw_events_first_13300001 raw_events_first WHERE ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300006 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.raw_events_first_13300002 raw_events_first WHERE ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300007 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.raw_events_first_13300003 raw_events_first WHERE ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)) +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: sent COMMIT over connection 13300007 +DEBUG: sent COMMIT over connection 13300007 +DEBUG: sent COMMIT over connection 13300005 +DEBUG: sent COMMIT over connection 13300005 +DEBUG: sent COMMIT over connection 13300006 +DEBUG: sent COMMIT over connection 13300006 +DEBUG: sent COMMIT over connection 13300004 +DEBUG: sent COMMIT over connection 13300004 +-- see that our first multi shard INSERT...SELECT works expected +SET client_min_messages TO INFO; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: ProcessUtility +SELECT + raw_events_first.user_id +FROM + raw_events_first, raw_events_second +WHERE + raw_events_first.user_id = raw_events_second.user_id; + user_id +--------- + 1 + 5 + 3 + 4 + 6 + 2 +(6 rows) + +-- see that we get unique vialitons +INSERT INTO raw_events_second SELECT * FROM raw_events_first; +ERROR: duplicate key value violates unique constraint "raw_events_second_user_id_value_1_key_13300004" +DETAIL: Key (user_id, value_1)=(1, 10) already exists. +CONTEXT: while executing command on localhost:57637 +-- add one more row +INSERT INTO raw_events_first (user_id, time) VALUES + (7, now()); +-- try a single shard query +SET client_min_messages TO DEBUG4; +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +INSERT INTO raw_events_second (user_id, time) SELECT user_id, time FROM raw_events_first WHERE user_id = 7; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: could not generate task for target shardId: 13300004 +DETAIL: Insert query hits 2 placements, Select query hits 1 placements and only 1 of those placements match. +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300005 AS citus_table_alias (user_id, "time") SELECT user_id, "time" FROM public.raw_events_first_13300001 raw_events_first WHERE ((user_id = 7) AND ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer))) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: could not generate task for target shardId: 13300006 +DETAIL: Insert query hits 2 placements, Select query hits 1 placements and only 1 of those placements match. +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: could not generate task for target shardId: 13300007 +DETAIL: Insert query hits 2 placements, Select query hits 1 placements and only 1 of those placements match. +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +SET client_min_messages TO INFO; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: ProcessUtility +-- add one more row +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (8, now(), 80, 800, 8000, 80000); +-- reorder columns +SET client_min_messages TO DEBUG4; +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) +SELECT + value_2, value_1, value_3, value_4, user_id, time +FROM + raw_events_first +WHERE + user_id = 8; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300004 AS citus_table_alias (user_id, "time", value_1, value_2, value_3, value_4) SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.raw_events_first_13300000 raw_events_first WHERE ((user_id = 8) AND ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer))) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: could not generate task for target shardId: 13300005 +DETAIL: Insert query hits 2 placements, Select query hits 1 placements and only 1 of those placements match. +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: could not generate task for target shardId: 13300006 +DETAIL: Insert query hits 2 placements, Select query hits 1 placements and only 1 of those placements match. +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: could not generate task for target shardId: 13300007 +DETAIL: Insert query hits 2 placements, Select query hits 1 placements and only 1 of those placements match. +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +-- a zero shard select +INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) +SELECT + value_2, value_1, value_3, value_4, user_id, time +FROM + raw_events_first +WHERE + false; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: could not generate task for target shardId: 13300004 +DETAIL: Insert query hits 2 placements, Select query hits 1 placements and only 1 of those placements match. +DEBUG: could not generate task for target shardId: 13300005 +DETAIL: Insert query hits 2 placements, Select query hits 1 placements and only 1 of those placements match. +DEBUG: could not generate task for target shardId: 13300006 +DETAIL: Insert query hits 2 placements, Select query hits 1 placements and only 1 of those placements match. +DEBUG: could not generate task for target shardId: 13300007 +DETAIL: Insert query hits 2 placements, Select query hits 1 placements and only 1 of those placements match. +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +-- another zero shard select +INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) +SELECT + value_2, value_1, value_3, value_4, user_id, time +FROM + raw_events_first +WHERE + 0 != 0; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: could not generate task for target shardId: 13300004 +DETAIL: Insert query hits 2 placements, Select query hits 1 placements and only 1 of those placements match. +DEBUG: could not generate task for target shardId: 13300005 +DETAIL: Insert query hits 2 placements, Select query hits 1 placements and only 1 of those placements match. +DEBUG: could not generate task for target shardId: 13300006 +DETAIL: Insert query hits 2 placements, Select query hits 1 placements and only 1 of those placements match. +DEBUG: could not generate task for target shardId: 13300007 +DETAIL: Insert query hits 2 placements, Select query hits 1 placements and only 1 of those placements match. +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +-- add one more row +SET client_min_messages TO INFO; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: ProcessUtility +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (9, now(), 90, 900, 9000, 90000); +-- show that RETURNING also works +SET client_min_messages TO DEBUG4; +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +INSERT INTO raw_events_second (user_id, value_1, value_3) +SELECT + user_id, value_1, value_3 +FROM + raw_events_first +WHERE + value_3 = 9000 +RETURNING *; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300004 AS citus_table_alias (user_id, value_1, value_3) SELECT user_id, value_1, value_3 FROM public.raw_events_first_13300000 raw_events_first WHERE ((value_3 = (9000)::double precision) AND ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer))) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300005 AS citus_table_alias (user_id, value_1, value_3) SELECT user_id, value_1, value_3 FROM public.raw_events_first_13300001 raw_events_first WHERE ((value_3 = (9000)::double precision) AND ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer))) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300006 AS citus_table_alias (user_id, value_1, value_3) SELECT user_id, value_1, value_3 FROM public.raw_events_first_13300002 raw_events_first WHERE ((value_3 = (9000)::double precision) AND ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823))) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300007 AS citus_table_alias (user_id, value_1, value_3) SELECT user_id, value_1, value_3 FROM public.raw_events_first_13300003 raw_events_first WHERE ((value_3 = (9000)::double precision) AND ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647))) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: sent COMMIT over connection 13300007 +DEBUG: sent COMMIT over connection 13300007 +DEBUG: sent COMMIT over connection 13300005 +DEBUG: sent COMMIT over connection 13300005 +DEBUG: sent COMMIT over connection 13300006 +DEBUG: sent COMMIT over connection 13300006 +DEBUG: sent COMMIT over connection 13300004 +DEBUG: sent COMMIT over connection 13300004 + user_id | time | value_1 | value_2 | value_3 | value_4 +---------+------+---------+---------+---------+--------- + 9 | | 90 | | 9000 | +(1 row) + +-- hits two shards +INSERT INTO raw_events_second (user_id, value_1, value_3) +SELECT + user_id, value_1, value_3 +FROM + raw_events_first +WHERE + user_id = 9 OR user_id = 16 +RETURNING *; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: could not generate task for target shardId: 13300004 +DETAIL: Insert query hits 2 placements, Select query hits 1 placements and only 1 of those placements match. +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300005 AS citus_table_alias (user_id, value_1, value_3) SELECT user_id, value_1, value_3 FROM public.raw_events_first_13300001 raw_events_first WHERE (((user_id = 9) OR (user_id = 16)) AND ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer))) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: could not generate task for target shardId: 13300006 +DETAIL: Insert query hits 2 placements, Select query hits 1 placements and only 1 of those placements match. +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300007 AS citus_table_alias (user_id, value_1, value_3) SELECT user_id, value_1, value_3 FROM public.raw_events_first_13300003 raw_events_first WHERE (((user_id = 9) OR (user_id = 16)) AND ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647))) RETURNING citus_table_alias.user_id, citus_table_alias."time", citus_table_alias.value_1, citus_table_alias.value_2, citus_table_alias.value_3, citus_table_alias.value_4 +DEBUG: ProcessQuery +DEBUG: Plan is router executable +ERROR: duplicate key value violates unique constraint "raw_events_second_user_id_value_1_key_13300007" +DETAIL: Key (user_id, value_1)=(9, 90) already exists. +CONTEXT: while executing command on localhost:57638 +-- now do some aggregations +INSERT INTO agg_events +SELECT + user_id, sum(value_1), avg(value_2), sum(value_3), count(value_4) +FROM + raw_events_first +GROUP BY + user_id; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT user_id, sum(value_1) AS sum, avg(value_2) AS avg, sum(value_3) AS sum, count(value_4) AS count FROM public.raw_events_first_13300000 raw_events_first WHERE ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)) GROUP BY user_id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT user_id, sum(value_1) AS sum, avg(value_2) AS avg, sum(value_3) AS sum, count(value_4) AS count FROM public.raw_events_first_13300001 raw_events_first WHERE ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer)) GROUP BY user_id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT user_id, sum(value_1) AS sum, avg(value_2) AS avg, sum(value_3) AS sum, count(value_4) AS count FROM public.raw_events_first_13300002 raw_events_first WHERE ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823)) GROUP BY user_id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_2_agg, value_3_agg, value_4_agg) SELECT user_id, sum(value_1) AS sum, avg(value_2) AS avg, sum(value_3) AS sum, count(value_4) AS count FROM public.raw_events_first_13300003 raw_events_first WHERE ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)) GROUP BY user_id +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300010 +DEBUG: sent COMMIT over connection 13300010 +-- group by column not exists on the SELECT target list +INSERT INTO agg_events (value_3_agg, value_4_agg, value_1_agg, user_id) +SELECT + sum(value_3), count(value_4), sum(value_1), user_id +FROM + raw_events_first +GROUP BY + value_2, user_id +RETURNING *; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT user_id, sum(value_1) AS sum, sum(value_3) AS sum, count(value_4) AS count FROM public.raw_events_first_13300000 raw_events_first WHERE ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)) GROUP BY value_2, user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT user_id, sum(value_1) AS sum, sum(value_3) AS sum, count(value_4) AS count FROM public.raw_events_first_13300001 raw_events_first WHERE ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer)) GROUP BY value_2, user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT user_id, sum(value_1) AS sum, sum(value_3) AS sum, count(value_4) AS count FROM public.raw_events_first_13300002 raw_events_first WHERE ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823)) GROUP BY value_2, user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_3_agg, value_4_agg) SELECT user_id, sum(value_1) AS sum, sum(value_3) AS sum, count(value_4) AS count FROM public.raw_events_first_13300003 raw_events_first WHERE ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)) GROUP BY value_2, user_id RETURNING citus_table_alias.user_id, citus_table_alias.value_1_agg, citus_table_alias.value_2_agg, citus_table_alias.value_3_agg, citus_table_alias.value_4_agg, citus_table_alias.agg_time +DEBUG: ProcessQuery +DEBUG: Plan is router executable +ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008" +DETAIL: Key (user_id, value_1_agg)=(1, 10) already exists. +CONTEXT: while executing command on localhost:57638 +-- some subquery tests +INSERT INTO agg_events + (value_1_agg, + user_id) +SELECT SUM(value_1), + id +FROM (SELECT raw_events_second.user_id AS id, + raw_events_second.value_1 + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id) AS foo +GROUP BY id; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: predicate pruning for shardId 13300007 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT id, sum(value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM public.raw_events_first_13300000 raw_events_first, public.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id)) foo WHERE ((hashint4(id) >= '-2147483648'::integer) AND (hashint4(id) <= '-1073741825'::integer)) GROUP BY id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: predicate pruning for shardId 13300007 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT id, sum(value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM public.raw_events_first_13300001 raw_events_first, public.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id)) foo WHERE ((hashint4(id) >= '-1073741824'::integer) AND (hashint4(id) <= '-1'::integer)) GROUP BY id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300007 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT id, sum(value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM public.raw_events_first_13300002 raw_events_first, public.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id)) foo WHERE ((hashint4(id) >= 0) AND (hashint4(id) <= 1073741823)) GROUP BY id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT id, sum(value_1) AS sum FROM (SELECT raw_events_second.user_id AS id, raw_events_second.value_1 FROM public.raw_events_first_13300003 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id)) foo WHERE ((hashint4(id) >= 1073741824) AND (hashint4(id) <= 2147483647)) GROUP BY id +DEBUG: ProcessQuery +DEBUG: Plan is router executable +ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008" +DETAIL: Key (user_id, value_1_agg)=(5, 50) already exists. +CONTEXT: while executing command on localhost:57638 +-- subquery one more level depth +INSERT INTO agg_events + (value_4_agg, + value_1_agg, + user_id) +SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id) AS foo; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: predicate pruning for shardId 13300007 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT id, v1, v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300000 raw_events_first, public.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE ((hashint4(id) >= '-2147483648'::integer) AND (hashint4(id) <= '-1073741825'::integer)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: predicate pruning for shardId 13300007 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT id, v1, v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300001 raw_events_first, public.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE ((hashint4(id) >= '-1073741824'::integer) AND (hashint4(id) <= '-1'::integer)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300007 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT id, v1, v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300002 raw_events_first, public.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE ((hashint4(id) >= 0) AND (hashint4(id) <= 1073741823)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg, value_4_agg) SELECT id, v1, v4 FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300003 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id) foo WHERE ((hashint4(id) >= 1073741824) AND (hashint4(id) <= 2147483647)) +DEBUG: ProcessQuery +DEBUG: Plan is router executable +ERROR: duplicate key value violates unique constraint "agg_events_user_id_value_1_agg_key_13300008" +DETAIL: Key (user_id, value_1_agg)=(5, 50) already exists. +CONTEXT: while executing command on localhost:57638 +-- join between subqueries +INSERT INTO agg_events + (user_id) +SELECT f2.id FROM +(SELECT + id +FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f +INNER JOIN +(SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id); +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: predicate pruning for shardId 13300007 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300000 raw_events_first, public.reference_table_13300012 reference_table WHERE (raw_events_first.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300000 raw_events_first, public.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id))) WHERE ((hashint4(f2.id) >= '-2147483648'::integer) AND (hashint4(f2.id) <= '-1073741825'::integer)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: predicate pruning for shardId 13300007 +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300001 raw_events_first, public.reference_table_13300012 reference_table WHERE (raw_events_first.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300001 raw_events_first, public.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id))) WHERE ((hashint4(f2.id) >= '-1073741824'::integer) AND (hashint4(f2.id) <= '-1'::integer)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300007 +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300002 raw_events_first, public.reference_table_13300012 reference_table WHERE (raw_events_first.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300002 raw_events_first, public.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id))) WHERE ((hashint4(f2.id) >= 0) AND (hashint4(f2.id) <= 1073741823)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300003 raw_events_first, public.reference_table_13300012 reference_table WHERE (raw_events_first.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300003 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id))) WHERE ((hashint4(f2.id) >= 1073741824) AND (hashint4(f2.id) <= 2147483647)) +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300010 +DEBUG: sent COMMIT over connection 13300010 +-- add one more level subqueris on top of subquery JOINs +INSERT INTO agg_events + (user_id, value_4_agg) +SELECT + outer_most.id, max(outer_most.value) +FROM +( + SELECT f2.id as id, f2.v4 as value FROM + (SELECT + id + FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f + INNER JOIN + (SELECT v4, + v1, + id + FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id)) as outer_most +GROUP BY + outer_most.id; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: predicate pruning for shardId 13300007 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id, value_4_agg) SELECT id, max(value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300000 raw_events_first, public.reference_table_13300012 reference_table WHERE (raw_events_first.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300000 raw_events_first, public.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id)))) outer_most WHERE ((hashint4(id) >= '-2147483648'::integer) AND (hashint4(id) <= '-1073741825'::integer)) GROUP BY id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: predicate pruning for shardId 13300007 +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id, value_4_agg) SELECT id, max(value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300001 raw_events_first, public.reference_table_13300012 reference_table WHERE (raw_events_first.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300001 raw_events_first, public.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id)))) outer_most WHERE ((hashint4(id) >= '-1073741824'::integer) AND (hashint4(id) <= '-1'::integer)) GROUP BY id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300007 +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id, value_4_agg) SELECT id, max(value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300002 raw_events_first, public.reference_table_13300012 reference_table WHERE (raw_events_first.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300002 raw_events_first, public.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id)))) outer_most WHERE ((hashint4(id) >= 0) AND (hashint4(id) <= 1073741823)) GROUP BY id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id, value_4_agg) SELECT id, max(value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300003 raw_events_first, public.reference_table_13300012 reference_table WHERE (raw_events_first.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300003 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id)))) outer_most WHERE ((hashint4(id) >= 1073741824) AND (hashint4(id) <= 2147483647)) GROUP BY id +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300010 +DEBUG: sent COMMIT over connection 13300010 +-- subqueries in WHERE clause +INSERT INTO raw_events_second + (user_id) +SELECT user_id +FROM raw_events_first +WHERE user_id IN (SELECT user_id + FROM raw_events_second + WHERE user_id = 2); +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id = 2))) AND ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer))) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id = 2))) AND ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer))) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id = 2))) AND ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823))) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id = 2))) AND ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647))) +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: sent COMMIT over connection 13300007 +DEBUG: sent COMMIT over connection 13300007 +DEBUG: sent COMMIT over connection 13300005 +DEBUG: sent COMMIT over connection 13300005 +DEBUG: sent COMMIT over connection 13300006 +DEBUG: sent COMMIT over connection 13300006 +DEBUG: sent COMMIT over connection 13300004 +DEBUG: sent COMMIT over connection 13300004 +-- some UPSERTS +INSERT INTO agg_events AS ae + ( + user_id, + value_1_agg, + agg_time + ) +SELECT user_id, + value_1, + time +FROM raw_events_first +ON conflict (user_id, value_1_agg) +DO UPDATE + SET agg_time = EXCLUDED.agg_time + WHERE ae.agg_time < EXCLUDED.agg_time; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS ae (user_id, value_1_agg, agg_time) SELECT user_id, value_1, "time" FROM public.raw_events_first_13300000 raw_events_first WHERE ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time < excluded.agg_time) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS ae (user_id, value_1_agg, agg_time) SELECT user_id, value_1, "time" FROM public.raw_events_first_13300001 raw_events_first WHERE ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer)) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time < excluded.agg_time) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS ae (user_id, value_1_agg, agg_time) SELECT user_id, value_1, "time" FROM public.raw_events_first_13300002 raw_events_first WHERE ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823)) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time < excluded.agg_time) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS ae (user_id, value_1_agg, agg_time) SELECT user_id, value_1, "time" FROM public.raw_events_first_13300003 raw_events_first WHERE ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time < excluded.agg_time) +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300010 +DEBUG: sent COMMIT over connection 13300010 +-- upserts with returning +INSERT INTO agg_events AS ae + ( + user_id, + value_1_agg, + agg_time + ) +SELECT user_id, + value_1, + time +FROM raw_events_first +ON conflict (user_id, value_1_agg) +DO UPDATE + SET agg_time = EXCLUDED.agg_time + WHERE ae.agg_time < EXCLUDED.agg_time +RETURNING user_id, value_1_agg; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS ae (user_id, value_1_agg, agg_time) SELECT user_id, value_1, "time" FROM public.raw_events_first_13300000 raw_events_first WHERE ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time < excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS ae (user_id, value_1_agg, agg_time) SELECT user_id, value_1, "time" FROM public.raw_events_first_13300001 raw_events_first WHERE ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer)) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time < excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS ae (user_id, value_1_agg, agg_time) SELECT user_id, value_1, "time" FROM public.raw_events_first_13300002 raw_events_first WHERE ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823)) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time < excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS ae (user_id, value_1_agg, agg_time) SELECT user_id, value_1, "time" FROM public.raw_events_first_13300003 raw_events_first WHERE ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)) ON CONFLICT(user_id, value_1_agg) DO UPDATE SET agg_time = excluded.agg_time WHERE (ae.agg_time < excluded.agg_time) RETURNING ae.user_id, ae.value_1_agg +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300010 +DEBUG: sent COMMIT over connection 13300010 + user_id | value_1_agg +---------+------------- + 7 | +(1 row) + +INSERT INTO agg_events (user_id, value_1_agg) +SELECT + user_id, sum(value_1 + value_2) +FROM + raw_events_first GROUP BY user_id; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, sum((value_1 + value_2)) AS sum FROM public.raw_events_first_13300000 raw_events_first WHERE ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)) GROUP BY user_id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, sum((value_1 + value_2)) AS sum FROM public.raw_events_first_13300001 raw_events_first WHERE ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer)) GROUP BY user_id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, sum((value_1 + value_2)) AS sum FROM public.raw_events_first_13300002 raw_events_first WHERE ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823)) GROUP BY user_id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, sum((value_1 + value_2)) AS sum FROM public.raw_events_first_13300003 raw_events_first WHERE ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)) GROUP BY user_id +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300010 +DEBUG: sent COMMIT over connection 13300010 +-- FILTER CLAUSE +INSERT INTO agg_events (user_id, value_1_agg) +SELECT + user_id, sum(value_1 + value_2) FILTER (where value_3 = 15) +FROM + raw_events_first GROUP BY user_id; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, sum((value_1 + value_2)) FILTER (WHERE (value_3 = (15)::double precision)) AS sum FROM public.raw_events_first_13300000 raw_events_first WHERE ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)) GROUP BY user_id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, sum((value_1 + value_2)) FILTER (WHERE (value_3 = (15)::double precision)) AS sum FROM public.raw_events_first_13300001 raw_events_first WHERE ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer)) GROUP BY user_id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, sum((value_1 + value_2)) FILTER (WHERE (value_3 = (15)::double precision)) AS sum FROM public.raw_events_first_13300002 raw_events_first WHERE ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823)) GROUP BY user_id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT user_id, sum((value_1 + value_2)) FILTER (WHERE (value_3 = (15)::double precision)) AS sum FROM public.raw_events_first_13300003 raw_events_first WHERE ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)) GROUP BY user_id +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300010 +DEBUG: sent COMMIT over connection 13300010 +-- a test with reference table JOINs +INSERT INTO + agg_events (user_id, value_1_agg) +SELECT + raw_events_first.user_id, sum(value_1) +FROM + reference_table, raw_events_first +WHERE + raw_events_first.user_id = reference_table.user_id +GROUP BY + raw_events_first.user_id; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM public.reference_table_13300012 reference_table, public.raw_events_first_13300000 raw_events_first WHERE ((raw_events_first.user_id = reference_table.user_id) AND ((hashint4(raw_events_first.user_id) >= '-2147483648'::integer) AND (hashint4(raw_events_first.user_id) <= '-1073741825'::integer))) GROUP BY raw_events_first.user_id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM public.reference_table_13300012 reference_table, public.raw_events_first_13300001 raw_events_first WHERE ((raw_events_first.user_id = reference_table.user_id) AND ((hashint4(raw_events_first.user_id) >= '-1073741824'::integer) AND (hashint4(raw_events_first.user_id) <= '-1'::integer))) GROUP BY raw_events_first.user_id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM public.reference_table_13300012 reference_table, public.raw_events_first_13300002 raw_events_first WHERE ((raw_events_first.user_id = reference_table.user_id) AND ((hashint4(raw_events_first.user_id) >= 0) AND (hashint4(raw_events_first.user_id) <= 1073741823))) GROUP BY raw_events_first.user_id +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT raw_events_first.user_id, sum(raw_events_first.value_1) AS sum FROM public.reference_table_13300012 reference_table, public.raw_events_first_13300003 raw_events_first WHERE ((raw_events_first.user_id = reference_table.user_id) AND ((hashint4(raw_events_first.user_id) >= 1073741824) AND (hashint4(raw_events_first.user_id) <= 2147483647))) GROUP BY raw_events_first.user_id +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300010 +DEBUG: sent COMMIT over connection 13300010 +-- a note on the outer joins is that +-- we filter out outer join results +-- where partition column returns +-- NULL. Thus, we could INSERT less rows +-- than we expect from subquery result. +-- see the following tests +SET client_min_messages TO INFO; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: ProcessUtility +-- we don't want to see constraint vialotions, so truncate first +TRUNCATE agg_events; +-- add a row to first table to make table contents different +INSERT INTO raw_events_second (user_id, time, value_1, value_2, value_3, value_4) VALUES + (10, now(), 100, 10000, 10000, 100000); +DELETE FROM raw_events_second WHERE user_id = 2; +-- we select 11 rows +SELECT t1.user_id AS col1, + t2.user_id AS col2 + FROM raw_events_first t1 + FULL JOIN raw_events_second t2 + ON t1.user_id = t2.user_id + ORDER BY t1.user_id, + t2.user_id; + col1 | col2 +------+------ + 1 | 1 + 2 | + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + | 10 +(10 rows) + +SET client_min_messages TO DEBUG4; +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +-- we insert 10 rows since we filtered out +-- NULL partition column values +INSERT INTO agg_events (user_id, value_1_agg) +SELECT t1.user_id AS col1, + t2.user_id AS col2 +FROM raw_events_first t1 + FULL JOIN raw_events_second t2 + ON t1.user_id = t2.user_id; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: predicate pruning for shardId 13300007 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (public.raw_events_first_13300000 t1 FULL JOIN public.raw_events_second_13300004 t2 ON ((t1.user_id = t2.user_id))) WHERE ((hashint4(t1.user_id) >= '-2147483648'::integer) AND (hashint4(t1.user_id) <= '-1073741825'::integer)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: predicate pruning for shardId 13300007 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (public.raw_events_first_13300001 t1 FULL JOIN public.raw_events_second_13300005 t2 ON ((t1.user_id = t2.user_id))) WHERE ((hashint4(t1.user_id) >= '-1073741824'::integer) AND (hashint4(t1.user_id) <= '-1'::integer)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300007 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (public.raw_events_first_13300002 t1 FULL JOIN public.raw_events_second_13300006 t2 ON ((t1.user_id = t2.user_id))) WHERE ((hashint4(t1.user_id) >= 0) AND (hashint4(t1.user_id) <= 1073741823)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300004 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT t1.user_id AS col1, t2.user_id AS col2 FROM (public.raw_events_first_13300003 t1 FULL JOIN public.raw_events_second_13300007 t2 ON ((t1.user_id = t2.user_id))) WHERE ((hashint4(t1.user_id) >= 1073741824) AND (hashint4(t1.user_id) <= 2147483647)) +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300010 +DEBUG: sent COMMIT over connection 13300010 +SET client_min_messages TO INFO; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: ProcessUtility +-- see that the results are different from the SELECT query +SELECT + user_id, value_1_agg +FROM + agg_events +ORDER BY + user_id, value_1_agg; + user_id | value_1_agg +---------+------------- + 1 | 1 + 2 | + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(9 rows) + +-- we don't want to see constraint vialotions, so truncate first +SET client_min_messages TO INFO; +TRUNCATE agg_events; +SET client_min_messages TO DEBUG4; +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +-- DISTINCT clause +INSERT INTO agg_events (value_1_agg, user_id) + SELECT + DISTINCT value_1, user_id + FROM + raw_events_first; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT user_id, value_1 FROM public.raw_events_first_13300000 raw_events_first WHERE ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT user_id, value_1 FROM public.raw_events_first_13300001 raw_events_first WHERE ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT user_id, value_1 FROM public.raw_events_first_13300002 raw_events_first WHERE ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) SELECT DISTINCT user_id, value_1 FROM public.raw_events_first_13300003 raw_events_first WHERE ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)) +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300010 +DEBUG: sent COMMIT over connection 13300010 +-- we don't want to see constraint vialotions, so truncate first +SET client_min_messages TO INFO; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: ProcessUtility +truncate agg_events; +SET client_min_messages TO DEBUG4; +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +-- we do not support DISTINCT ON clauses +INSERT INTO agg_events (value_1_agg, user_id) + SELECT + DISTINCT ON (value_1) value_1, user_id + FROM + raw_events_first; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: cannot perform distributed planning for the given modification +DETAIL: DISTINCT ON clauses are not allowed in INSERT ... SELECT queries +-- We do not support some CTEs +WITH fist_table_agg AS + (SELECT sum(value_1) as v1_agg, user_id FROM raw_events_first GROUP BY user_id) +INSERT INTO agg_events + (value_1_agg, user_id) + SELECT + v1_agg, user_id + FROM + fist_table_agg; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: SELECT query should return bare partition column on the same ordinal position as the INSERT's partition column +-- We do support some CTEs +INSERT INTO agg_events + WITH sub_cte AS (SELECT 1) + SELECT + raw_events_first.user_id, (SELECT * FROM sub_cte) + FROM + raw_events_first; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id, value_1_agg) WITH sub_cte AS (SELECT 1) SELECT user_id, (SELECT sub_cte."?column?" FROM sub_cte) FROM public.raw_events_first_13300000 raw_events_first WHERE ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id, value_1_agg) WITH sub_cte AS (SELECT 1) SELECT user_id, (SELECT sub_cte."?column?" FROM sub_cte) FROM public.raw_events_first_13300001 raw_events_first WHERE ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id, value_1_agg) WITH sub_cte AS (SELECT 1) SELECT user_id, (SELECT sub_cte."?column?" FROM sub_cte) FROM public.raw_events_first_13300002 raw_events_first WHERE ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823)) +DEBUG: predicate pruning for shardId 13300000 +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id, value_1_agg) WITH sub_cte AS (SELECT 1) SELECT user_id, (SELECT sub_cte."?column?" FROM sub_cte) FROM public.raw_events_first_13300003 raw_events_first WHERE ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)) +DEBUG: ProcessQuery +DEBUG: Plan is router executable +DEBUG: CommitTransactionCommand +DEBUG: CommitTransaction +DEBUG: name: unnamed; blockState: STARTED; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300008 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300011 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300009 +DEBUG: sent COMMIT over connection 13300010 +DEBUG: sent COMMIT over connection 13300010 +-- We do not support any set operations +INSERT INTO + raw_events_first(user_id) +SELECT + user_id +FROM + ((SELECT user_id FROM raw_events_first) UNION + (SELECT user_id FROM raw_events_second)) as foo; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: cannot perform distributed planning for the given modification +DETAIL: Set operations are not allowed in INSERT ... SELECT queries +-- We do not support any set operations +INSERT INTO + raw_events_first(user_id) + (SELECT user_id FROM raw_events_first) INTERSECT + (SELECT user_id FROM raw_events_first); +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: cannot perform distributed planning for the given modification +DETAIL: Set operations are not allowed in INSERT ... SELECT queries +-- We do not support any set operations +INSERT INTO + raw_events_first(user_id) +SELECT + user_id +FROM + ((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT + (SELECT user_id FROM raw_events_second where user_id = 17)) as foo; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: cannot perform distributed planning for the given modification +DETAIL: Set operations are not allowed in INSERT ... SELECT queries +-- unsupported JOIN +INSERT INTO agg_events + (value_4_agg, + value_1_agg, + user_id) +SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id != raw_events_second.user_id + GROUP BY raw_events_second.user_id) AS foo; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: cannot perform distributed planning for the given modification +DETAIL: Select query cannot be pushed down to the worker. +-- INSERT partition column does not match with SELECT partition column +INSERT INTO agg_events + (value_4_agg, + value_1_agg, + user_id) +SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.value_3 AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.value_3) AS foo; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: SELECT query should return bare partition column on the same ordinal position as the INSERT's partition column +-- error cases +-- no part column at all +INSERT INTO raw_events_second + (value_1) +SELECT value_1 +FROM raw_events_first; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: SELECT query should return bare partition column on the same ordinal position as the INSERT's partition column +INSERT INTO raw_events_second + (value_1) +SELECT user_id +FROM raw_events_first; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: SELECT query should return bare partition column on the same ordinal position as the INSERT's partition column +INSERT INTO raw_events_second + (user_id) +SELECT value_1 +FROM raw_events_first; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: SELECT query should return bare partition column on the same ordinal position as the INSERT's partition column +INSERT INTO raw_events_second + (user_id) +SELECT user_id * 2 +FROM raw_events_first; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: SELECT query should return bare partition column on the same ordinal position as the INSERT's partition column +INSERT INTO raw_events_second + (user_id) +SELECT user_id :: bigint +FROM raw_events_first; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: SELECT query should return bare partition column on the same ordinal position as the INSERT's partition column +INSERT INTO agg_events + (value_3_agg, + value_4_agg, + value_1_agg, + value_2_agg, + user_id) +SELECT SUM(value_3), + Count(value_4), + user_id, + SUM(value_1), + Avg(value_2) +FROM raw_events_first +GROUP BY user_id; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: SELECT query should return bare partition column on the same ordinal position as the INSERT's partition column +INSERT INTO agg_events + (value_3_agg, + value_4_agg, + value_1_agg, + value_2_agg, + user_id) +SELECT SUM(value_3), + Count(value_4), + user_id, + SUM(value_1), + value_2 +FROM raw_events_first +GROUP BY user_id, + value_2; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: SELECT query should return bare partition column on the same ordinal position as the INSERT's partition column +-- tables should be co-located +INSERT INTO agg_events (user_id) +SELECT + user_id +FROM + reference_table; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: INSERT target table and the source relation of the SELECT partition column value must be colocated +-- unsupported joins between subqueries +-- we do not return bare partition column on the inner query +INSERT INTO agg_events + (user_id) +SELECT f2.id FROM +(SELECT + id +FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f +INNER JOIN +(SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + raw_events_second.value_1 AS v1, + SUM(raw_events_second.user_id) AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.value_1 + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id); +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: SELECT query should return bare partition column on the same ordinal position as the INSERT's partition column +-- the second part of the query is not routable since +-- no GROUP BY on the partition column +INSERT INTO agg_events + (user_id) +SELECT f.id FROM +(SELECT + id +FROM (SELECT raw_events_first.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f +INNER JOIN +(SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + raw_events_second.value_1 AS v1, + SUM(raw_events_second.user_id) AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.value_1 + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id); +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: cannot perform distributed planning for the given modification +DETAIL: Select query cannot be pushed down to the worker. +-- cannot pushdown the query since the JOIN is not equi JOIN +INSERT INTO agg_events + (user_id, value_4_agg) +SELECT +outer_most.id, max(outer_most.value) + FROM +( + SELECT f2.id as id, f2.v4 as value FROM + (SELECT + id + FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f + INNER JOIN + (SELECT v4, + v1, + id + FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id != f2.id)) as outer_most +GROUP BY outer_most.id; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: predicate pruning for shardId 13300001 +DEBUG: predicate pruning for shardId 13300002 +DEBUG: predicate pruning for shardId 13300003 +DEBUG: predicate pruning for shardId 13300005 +DEBUG: predicate pruning for shardId 13300006 +DEBUG: predicate pruning for shardId 13300007 +ERROR: cannot perform distributed planning for the given modification +DETAIL: Select query cannot be pushed down to the worker. +-- we currently not support grouping sets +INSERT INTO agg_events + (user_id, + value_1_agg, + value_2_agg) +SELECT user_id, + Sum(value_1) AS sum_val1, + Sum(value_2) AS sum_val2 +FROM raw_events_second +GROUP BY grouping sets ( ( user_id ), ( value_1 ), ( user_id, value_1 ), ( ) ); +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +ERROR: cannot perform distributed planning for the given modification +DETAIL: Grouping sets are not allowed in INSERT ... SELECT queries +-- set back to INFO +SET client_min_messages TO INFO; +DEBUG: StartTransactionCommand +DEBUG: StartTransaction +DEBUG: name: unnamed; blockState: DEFAULT; state: INPROGR, xid/subid/cid: 0/1/0, nestlvl: 1, children: +DEBUG: ProcessUtility +-- Views does not work +CREATE VIEW test_view AS SELECT * FROM raw_events_first; +INSERT INTO raw_events_second SELECT * FROM test_view; +ERROR: cannot plan queries that include both regular and partitioned relations diff --git a/src/test/regress/expected/multi_modifications.out b/src/test/regress/expected/multi_modifications.out index 1f2f6c6ba..821bbecbf 100644 --- a/src/test/regress/expected/multi_modifications.out +++ b/src/test/regress/expected/multi_modifications.out @@ -205,10 +205,9 @@ DELETE FROM limit_orders WHERE id = 246 AND placed_at = current_timestamp::times INSERT INTO limit_orders VALUES (DEFAULT), (DEFAULT); ERROR: cannot perform distributed planning for the given modification DETAIL: Multi-row INSERTs to distributed tables are not supported. +-- Who says that? :) -- INSERT ... SELECT ... FROM commands are unsupported -INSERT INTO limit_orders SELECT * FROM limit_orders; -ERROR: cannot perform distributed planning for the given modifications -DETAIL: Subqueries are not supported in distributed modifications. +-- INSERT INTO limit_orders SELECT * FROM limit_orders; -- commands containing a CTE are unsupported WITH deleted_orders AS (DELETE FROM limit_orders RETURNING *) INSERT INTO limit_orders DEFAULT VALUES; diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 9f8862be9..1f236f22e 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -29,6 +29,8 @@ test: multi_create_table_constraints test: multi_master_protocol test: multi_load_data +test: multi_insert_select + # ---------- # Miscellaneous tests to check our query planning behavior # ---------- diff --git a/src/test/regress/sql/multi_insert_select.sql b/src/test/regress/sql/multi_insert_select.sql new file mode 100644 index 000000000..b049993cd --- /dev/null +++ b/src/test/regress/sql/multi_insert_select.sql @@ -0,0 +1,608 @@ +-- +-- MULTI_INSERT_SELECT +-- + +ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 13300000; +ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 13300000; + +-- create co-located tables +SET citus.shard_count = 4; +SET citus.shard_replication_factor = 2; + +CREATE TABLE raw_events_first (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1)); +SELECT create_distributed_table('raw_events_first', 'user_id'); + +CREATE TABLE raw_events_second (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint, UNIQUE(user_id, value_1)); +SELECT create_distributed_table('raw_events_second', 'user_id'); + +CREATE TABLE agg_events (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp, UNIQUE(user_id, value_1_agg)); +SELECT create_distributed_table('agg_events', 'user_id');; + +-- create the reference table as well +CREATE TABLE reference_table (user_id int); +SELECT create_reference_table('reference_table'); + +-- set back to the defaults +SET citus.shard_count = DEFAULT; +SET citus.shard_replication_factor = DEFAULT; + +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (1, now(), 10, 100, 1000.1, 10000); +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (2, now(), 20, 200, 2000.1, 20000); +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (3, now(), 30, 300, 3000.1, 30000); +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (4, now(), 40, 400, 4000.1, 40000); +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (5, now(), 50, 500, 5000.1, 50000); +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (6, now(), 60, 600, 6000.1, 60000); + +SET client_min_messages TO DEBUG4; + +-- raw table to raw table +INSERT INTO raw_events_second SELECT * FROM raw_events_first; + +-- see that our first multi shard INSERT...SELECT works expected +SET client_min_messages TO INFO; +SELECT + raw_events_first.user_id +FROM + raw_events_first, raw_events_second +WHERE + raw_events_first.user_id = raw_events_second.user_id; + +-- see that we get unique vialitons +INSERT INTO raw_events_second SELECT * FROM raw_events_first; + +-- add one more row +INSERT INTO raw_events_first (user_id, time) VALUES + (7, now()); + +-- try a single shard query +SET client_min_messages TO DEBUG4; +INSERT INTO raw_events_second (user_id, time) SELECT user_id, time FROM raw_events_first WHERE user_id = 7; + + +SET client_min_messages TO INFO; + +-- add one more row +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (8, now(), 80, 800, 8000, 80000); + + +-- reorder columns +SET client_min_messages TO DEBUG4; +INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) +SELECT + value_2, value_1, value_3, value_4, user_id, time +FROM + raw_events_first +WHERE + user_id = 8; + +-- a zero shard select +INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) +SELECT + value_2, value_1, value_3, value_4, user_id, time +FROM + raw_events_first +WHERE + false; + + +-- another zero shard select +INSERT INTO raw_events_second (value_2, value_1, value_3, value_4, user_id, time) +SELECT + value_2, value_1, value_3, value_4, user_id, time +FROM + raw_events_first +WHERE + 0 != 0; + +-- add one more row +SET client_min_messages TO INFO; +INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4) VALUES + (9, now(), 90, 900, 9000, 90000); + + +-- show that RETURNING also works +SET client_min_messages TO DEBUG4; +INSERT INTO raw_events_second (user_id, value_1, value_3) +SELECT + user_id, value_1, value_3 +FROM + raw_events_first +WHERE + value_3 = 9000 +RETURNING *; + +-- hits two shards +INSERT INTO raw_events_second (user_id, value_1, value_3) +SELECT + user_id, value_1, value_3 +FROM + raw_events_first +WHERE + user_id = 9 OR user_id = 16 +RETURNING *; + + +-- now do some aggregations +INSERT INTO agg_events +SELECT + user_id, sum(value_1), avg(value_2), sum(value_3), count(value_4) +FROM + raw_events_first +GROUP BY + user_id; + +-- group by column not exists on the SELECT target list +INSERT INTO agg_events (value_3_agg, value_4_agg, value_1_agg, user_id) +SELECT + sum(value_3), count(value_4), sum(value_1), user_id +FROM + raw_events_first +GROUP BY + value_2, user_id +RETURNING *; + + +-- some subquery tests +INSERT INTO agg_events + (value_1_agg, + user_id) +SELECT SUM(value_1), + id +FROM (SELECT raw_events_second.user_id AS id, + raw_events_second.value_1 + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id) AS foo +GROUP BY id; + + +-- subquery one more level depth +INSERT INTO agg_events + (value_4_agg, + value_1_agg, + user_id) +SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id) AS foo; + +-- join between subqueries +INSERT INTO agg_events + (user_id) +SELECT f2.id FROM + +(SELECT + id +FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f +INNER JOIN +(SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id); + +-- add one more level subqueris on top of subquery JOINs +INSERT INTO agg_events + (user_id, value_4_agg) +SELECT + outer_most.id, max(outer_most.value) +FROM +( + SELECT f2.id as id, f2.v4 as value FROM + (SELECT + id + FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f + INNER JOIN + (SELECT v4, + v1, + id + FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id)) as outer_most +GROUP BY + outer_most.id; + +-- subqueries in WHERE clause +INSERT INTO raw_events_second + (user_id) +SELECT user_id +FROM raw_events_first +WHERE user_id IN (SELECT user_id + FROM raw_events_second + WHERE user_id = 2); + +-- some UPSERTS +INSERT INTO agg_events AS ae + ( + user_id, + value_1_agg, + agg_time + ) +SELECT user_id, + value_1, + time +FROM raw_events_first +ON conflict (user_id, value_1_agg) +DO UPDATE + SET agg_time = EXCLUDED.agg_time + WHERE ae.agg_time < EXCLUDED.agg_time; + +-- upserts with returning +INSERT INTO agg_events AS ae + ( + user_id, + value_1_agg, + agg_time + ) +SELECT user_id, + value_1, + time +FROM raw_events_first +ON conflict (user_id, value_1_agg) +DO UPDATE + SET agg_time = EXCLUDED.agg_time + WHERE ae.agg_time < EXCLUDED.agg_time +RETURNING user_id, value_1_agg; + + +INSERT INTO agg_events (user_id, value_1_agg) +SELECT + user_id, sum(value_1 + value_2) +FROM + raw_events_first GROUP BY user_id; + +-- FILTER CLAUSE +INSERT INTO agg_events (user_id, value_1_agg) +SELECT + user_id, sum(value_1 + value_2) FILTER (where value_3 = 15) +FROM + raw_events_first GROUP BY user_id; + +-- a test with reference table JOINs +INSERT INTO + agg_events (user_id, value_1_agg) +SELECT + raw_events_first.user_id, sum(value_1) +FROM + reference_table, raw_events_first +WHERE + raw_events_first.user_id = reference_table.user_id +GROUP BY + raw_events_first.user_id; + +-- a note on the outer joins is that +-- we filter out outer join results +-- where partition column returns +-- NULL. Thus, we could INSERT less rows +-- than we expect from subquery result. +-- see the following tests + +SET client_min_messages TO INFO; + +-- we don't want to see constraint vialotions, so truncate first +TRUNCATE agg_events; +-- add a row to first table to make table contents different +INSERT INTO raw_events_second (user_id, time, value_1, value_2, value_3, value_4) VALUES + (10, now(), 100, 10000, 10000, 100000); + +DELETE FROM raw_events_second WHERE user_id = 2; + +-- we select 11 rows +SELECT t1.user_id AS col1, + t2.user_id AS col2 + FROM raw_events_first t1 + FULL JOIN raw_events_second t2 + ON t1.user_id = t2.user_id + ORDER BY t1.user_id, + t2.user_id; + +SET client_min_messages TO DEBUG4; +-- we insert 10 rows since we filtered out +-- NULL partition column values +INSERT INTO agg_events (user_id, value_1_agg) +SELECT t1.user_id AS col1, + t2.user_id AS col2 +FROM raw_events_first t1 + FULL JOIN raw_events_second t2 + ON t1.user_id = t2.user_id; + +SET client_min_messages TO INFO; +-- see that the results are different from the SELECT query +SELECT + user_id, value_1_agg +FROM + agg_events +ORDER BY + user_id, value_1_agg; + +-- we don't want to see constraint vialotions, so truncate first +SET client_min_messages TO INFO; +TRUNCATE agg_events; +SET client_min_messages TO DEBUG4; + +-- DISTINCT clause +INSERT INTO agg_events (value_1_agg, user_id) + SELECT + DISTINCT value_1, user_id + FROM + raw_events_first; + +-- we don't want to see constraint vialotions, so truncate first +SET client_min_messages TO INFO; +truncate agg_events; +SET client_min_messages TO DEBUG4; + +-- we do not support DISTINCT ON clauses +INSERT INTO agg_events (value_1_agg, user_id) + SELECT + DISTINCT ON (value_1) value_1, user_id + FROM + raw_events_first; + +-- We do not support some CTEs +WITH fist_table_agg AS + (SELECT sum(value_1) as v1_agg, user_id FROM raw_events_first GROUP BY user_id) +INSERT INTO agg_events + (value_1_agg, user_id) + SELECT + v1_agg, user_id + FROM + fist_table_agg; + +-- We do support some CTEs +INSERT INTO agg_events + WITH sub_cte AS (SELECT 1) + SELECT + raw_events_first.user_id, (SELECT * FROM sub_cte) + FROM + raw_events_first; + +-- We do not support any set operations +INSERT INTO + raw_events_first(user_id) +SELECT + user_id +FROM + ((SELECT user_id FROM raw_events_first) UNION + (SELECT user_id FROM raw_events_second)) as foo; + +-- We do not support any set operations +INSERT INTO + raw_events_first(user_id) + (SELECT user_id FROM raw_events_first) INTERSECT + (SELECT user_id FROM raw_events_first); + +-- We do not support any set operations +INSERT INTO + raw_events_first(user_id) +SELECT + user_id +FROM + ((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT + (SELECT user_id FROM raw_events_second where user_id = 17)) as foo; + +-- unsupported JOIN +INSERT INTO agg_events + (value_4_agg, + value_1_agg, + user_id) +SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id != raw_events_second.user_id + GROUP BY raw_events_second.user_id) AS foo; + + +-- INSERT partition column does not match with SELECT partition column +INSERT INTO agg_events + (value_4_agg, + value_1_agg, + user_id) +SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.value_3 AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.value_3) AS foo; + +-- error cases +-- no part column at all +INSERT INTO raw_events_second + (value_1) +SELECT value_1 +FROM raw_events_first; + +INSERT INTO raw_events_second + (value_1) +SELECT user_id +FROM raw_events_first; + +INSERT INTO raw_events_second + (user_id) +SELECT value_1 +FROM raw_events_first; + +INSERT INTO raw_events_second + (user_id) +SELECT user_id * 2 +FROM raw_events_first; + +INSERT INTO raw_events_second + (user_id) +SELECT user_id :: bigint +FROM raw_events_first; + +INSERT INTO agg_events + (value_3_agg, + value_4_agg, + value_1_agg, + value_2_agg, + user_id) +SELECT SUM(value_3), + Count(value_4), + user_id, + SUM(value_1), + Avg(value_2) +FROM raw_events_first +GROUP BY user_id; + +INSERT INTO agg_events + (value_3_agg, + value_4_agg, + value_1_agg, + value_2_agg, + user_id) +SELECT SUM(value_3), + Count(value_4), + user_id, + SUM(value_1), + value_2 +FROM raw_events_first +GROUP BY user_id, + value_2; + +-- tables should be co-located +INSERT INTO agg_events (user_id) +SELECT + user_id +FROM + reference_table; + +-- unsupported joins between subqueries +-- we do not return bare partition column on the inner query +INSERT INTO agg_events + (user_id) +SELECT f2.id FROM +(SELECT + id +FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f +INNER JOIN +(SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + raw_events_second.value_1 AS v1, + SUM(raw_events_second.user_id) AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.value_1 + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id); + + +-- the second part of the query is not routable since +-- no GROUP BY on the partition column +INSERT INTO agg_events + (user_id) +SELECT f.id FROM +(SELECT + id +FROM (SELECT raw_events_first.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f +INNER JOIN +(SELECT v4, + v1, + id +FROM (SELECT SUM(raw_events_second.value_4) AS v4, + raw_events_second.value_1 AS v1, + SUM(raw_events_second.user_id) AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.value_1 + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id = f2.id); + +-- cannot pushdown the query since the JOIN is not equi JOIN +INSERT INTO agg_events + (user_id, value_4_agg) +SELECT +outer_most.id, max(outer_most.value) + FROM +( + SELECT f2.id as id, f2.v4 as value FROM + (SELECT + id + FROM (SELECT reference_table.user_id AS id + FROM raw_events_first, + reference_table + WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f + INNER JOIN + (SELECT v4, + v1, + id + FROM (SELECT SUM(raw_events_second.value_4) AS v4, + SUM(raw_events_first.value_1) AS v1, + raw_events_second.user_id AS id + FROM raw_events_first, + raw_events_second + WHERE raw_events_first.user_id = raw_events_second.user_id + GROUP BY raw_events_second.user_id + HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2 +ON (f.id != f2.id)) as outer_most +GROUP BY outer_most.id; + +-- we currently not support grouping sets +INSERT INTO agg_events + (user_id, + value_1_agg, + value_2_agg) +SELECT user_id, + Sum(value_1) AS sum_val1, + Sum(value_2) AS sum_val2 +FROM raw_events_second +GROUP BY grouping sets ( ( user_id ), ( value_1 ), ( user_id, value_1 ), ( ) ); + +-- set back to INFO +SET client_min_messages TO INFO; + +-- Views does not work +CREATE VIEW test_view AS SELECT * FROM raw_events_first; +INSERT INTO raw_events_second SELECT * FROM test_view; diff --git a/src/test/regress/sql/multi_modifications.sql b/src/test/regress/sql/multi_modifications.sql index 88eff899f..e8e493038 100644 --- a/src/test/regress/sql/multi_modifications.sql +++ b/src/test/regress/sql/multi_modifications.sql @@ -150,8 +150,9 @@ DELETE FROM limit_orders WHERE id = 246 AND placed_at = current_timestamp::times -- commands with multiple rows are unsupported INSERT INTO limit_orders VALUES (DEFAULT), (DEFAULT); +-- Who says that? :) -- INSERT ... SELECT ... FROM commands are unsupported -INSERT INTO limit_orders SELECT * FROM limit_orders; +-- INSERT INTO limit_orders SELECT * FROM limit_orders; -- commands containing a CTE are unsupported WITH deleted_orders AS (DELETE FROM limit_orders RETURNING *)