Merge pull request #1261 from citusdata/fix_wrong_pushdown_properly

Fix pushing down wrong queries for INSERT ... SELECT queries
pull/1299/head
Önder Kalacı 2017-03-24 12:52:31 +02:00 committed by GitHub
commit 95e43eb256
3 changed files with 1340 additions and 146 deletions

View File

@ -48,6 +48,7 @@
#include "nodes/pg_list.h" #include "nodes/pg_list.h"
#include "nodes/primnodes.h" #include "nodes/primnodes.h"
#include "optimizer/clauses.h" #include "optimizer/clauses.h"
#include "optimizer/paths.h"
#include "optimizer/predtest.h" #include "optimizer/predtest.h"
#include "optimizer/restrictinfo.h" #include "optimizer/restrictinfo.h"
#include "optimizer/var.h" #include "optimizer/var.h"
@ -87,6 +88,8 @@ static Task * RouterModifyTaskForShardInterval(Query *originalQuery,
RelationRestrictionContext * RelationRestrictionContext *
restrictionContext, restrictionContext,
uint32 taskIdIndex); uint32 taskIdIndex);
static List * HashedShardIntervalOpExpressions(ShardInterval *shardInterval);
static Param * UninstantiatedParameterForColumn(Var *relationPartitionKey);
static bool MasterIrreducibleExpression(Node *expression, bool *varArgument, static bool MasterIrreducibleExpression(Node *expression, bool *varArgument,
bool *badCoalesce); bool *badCoalesce);
static bool MasterIrreducibleExpressionWalker(Node *expression, WalkerState *state); static bool MasterIrreducibleExpressionWalker(Node *expression, WalkerState *state);
@ -115,7 +118,6 @@ static bool MultiRouterPlannableQuery(Query *query,
RelationRestrictionContext *restrictionContext); RelationRestrictionContext *restrictionContext);
static RelationRestrictionContext * CopyRelationRestrictionContext( static RelationRestrictionContext * CopyRelationRestrictionContext(
RelationRestrictionContext *oldContext); RelationRestrictionContext *oldContext);
static Node * InstantiatePartitionQual(Node *node, void *context);
static DeferredErrorMessage * InsertSelectQuerySupported(Query *queryTree, static DeferredErrorMessage * InsertSelectQuerySupported(Query *queryTree,
RangeTblEntry *insertRte, RangeTblEntry *insertRte,
RangeTblEntry *subqueryRte, RangeTblEntry *subqueryRte,
@ -379,6 +381,7 @@ RouterModifyTaskForShardInterval(Query *originalQuery, ShardInterval *shardInter
bool upsertQuery = false; bool upsertQuery = false;
bool replacePrunedQueryWithDummy = false; bool replacePrunedQueryWithDummy = false;
bool allReferenceTables = restrictionContext->allReferenceTables; bool allReferenceTables = restrictionContext->allReferenceTables;
List *hashedOpExpressions = NIL;
/* grab shared metadata lock to stop concurrent placement additions */ /* grab shared metadata lock to stop concurrent placement additions */
LockShardDistributionMetadata(shardId, ShareLock); LockShardDistributionMetadata(shardId, ShareLock);
@ -391,19 +394,43 @@ RouterModifyTaskForShardInterval(Query *originalQuery, ShardInterval *shardInter
{ {
RelationRestriction *restriction = lfirst(restrictionCell); RelationRestriction *restriction = lfirst(restrictionCell);
List *originalBaserestrictInfo = restriction->relOptInfo->baserestrictinfo; List *originalBaserestrictInfo = restriction->relOptInfo->baserestrictinfo;
Var *relationPartitionKey = PartitionColumn(restriction->relationId,
restriction->index);
Param *uninstantiatedParameter = NULL;
/* /*
* We haven't added the quals if all participating tables are reference * We don't need to add restriction to reference tables given that they are
* tables. Thus, now skip instantiating them. * already single sharded and always prune to that single shard.
*/ */
if (allReferenceTables) if (PartitionMethod(restriction->relationId) == DISTRIBUTE_BY_NONE)
{ {
break; continue;
} }
originalBaserestrictInfo = hashedOpExpressions = HashedShardIntervalOpExpressions(shardInterval);
(List *) InstantiatePartitionQual((Node *) originalBaserestrictInfo, Assert(list_length(hashedOpExpressions) == 2);
shardInterval);
/*
* Here we check whether the planner knows an equality between the partition column
* and the uninstantiated parameter. If such an equality exists, we simply add the
* shard restrictions.
*/
uninstantiatedParameter = UninstantiatedParameterForColumn(relationPartitionKey);
if (exprs_known_equal(restriction->plannerInfo, (Node *) relationPartitionKey,
(Node *) uninstantiatedParameter))
{
RestrictInfo *geRestrictInfo = NULL;
RestrictInfo *leRestrictInfo = NULL;
OpExpr *hashedGEOpExpr = (OpExpr *) linitial(hashedOpExpressions);
OpExpr *hashedLEOpExpr = (OpExpr *) lsecond(hashedOpExpressions);
geRestrictInfo = make_simple_restrictinfo((Expr *) hashedGEOpExpr);
originalBaserestrictInfo = lappend(originalBaserestrictInfo, geRestrictInfo);
leRestrictInfo = make_simple_restrictinfo((Expr *) hashedLEOpExpr);
originalBaserestrictInfo = lappend(originalBaserestrictInfo, leRestrictInfo);
}
} }
/* /*
@ -502,6 +529,96 @@ RouterModifyTaskForShardInterval(Query *originalQuery, ShardInterval *shardInter
} }
/*
* HashedShardIntervalOpExpressions returns a list of OpExprs with exactly two
* items in it. The list consists of shard interval ranges with hashed columns
* such as (hashColumn >= shardMinValue) and (hashedColumn <= shardMaxValue).
*
* The function errors out if the given shard interval does not belong to a hash
* distributed table.
*/
static List *
HashedShardIntervalOpExpressions(ShardInterval *shardInterval)
{
List *operatorExpressions = NIL;
Var *hashedGEColumn = NULL;
Var *hashedLEColumn = NULL;
OpExpr *hashedGEOpExpr = NULL;
OpExpr *hashedLEOpExpr = NULL;
Oid integer4GEoperatorId = InvalidOid;
Oid integer4LEoperatorId = InvalidOid;
Datum shardMinValue = shardInterval->minValue;
Datum shardMaxValue = shardInterval->maxValue;
char partitionMethod = PartitionMethod(shardInterval->relationId);
if (partitionMethod != DISTRIBUTE_BY_HASH)
{
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot create shard interval operator expression for "
"distributed relations other than hash distributed "
"relations")));
}
/* get the integer >=, <= operators from the catalog */
integer4GEoperatorId = get_opfamily_member(INTEGER_BTREE_FAM_OID, INT4OID,
INT4OID,
BTGreaterEqualStrategyNumber);
integer4LEoperatorId = get_opfamily_member(INTEGER_BTREE_FAM_OID, INT4OID,
INT4OID,
BTLessEqualStrategyNumber);
/* generate hashed columns */
hashedGEColumn = MakeInt4Column();
hashedLEColumn = MakeInt4Column();
/* generate the necessary operators */
hashedGEOpExpr = (OpExpr *) make_opclause(integer4GEoperatorId, InvalidOid, false,
(Expr *) hashedGEColumn,
(Expr *) MakeInt4Constant(shardMinValue),
InvalidOid, InvalidOid);
hashedLEOpExpr = (OpExpr *) make_opclause(integer4LEoperatorId, InvalidOid, false,
(Expr *) hashedLEColumn,
(Expr *) MakeInt4Constant(shardMaxValue),
InvalidOid, InvalidOid);
/* update the operators with correct operator numbers and function ids */
hashedGEOpExpr->opfuncid = get_opcode(hashedGEOpExpr->opno);
hashedGEOpExpr->opresulttype = get_func_rettype(hashedGEOpExpr->opfuncid);
operatorExpressions = lappend(operatorExpressions, hashedGEOpExpr);
hashedLEOpExpr->opfuncid = get_opcode(hashedLEOpExpr->opno);
hashedLEOpExpr->opresulttype = get_func_rettype(hashedLEOpExpr->opfuncid);
operatorExpressions = lappend(operatorExpressions, hashedLEOpExpr);
return operatorExpressions;
}
/*
* UninstantiatedParameterForColumn returns a Param that can be used as an uninstantiated
* parameter for the given column in the sense that paramtype, paramtypmod and collid
* is set to the input Var's corresponding values.
*
* Note that we're using hard coded UNINSTANTIATED_PARAMETER_ID which is the required parameter
* for our purposes. See multi_planner.c@multi_planner for the details.
*/
static Param *
UninstantiatedParameterForColumn(Var *relationPartitionKey)
{
Param *uninstantiatedParameter = makeNode(Param);
uninstantiatedParameter->paramkind = PARAM_EXTERN;
uninstantiatedParameter->paramid = UNINSTANTIATED_PARAMETER_ID;
uninstantiatedParameter->paramtype = relationPartitionKey->vartype;
uninstantiatedParameter->paramtypmod = relationPartitionKey->vartypmod;
uninstantiatedParameter->paramcollid = relationPartitionKey->varcollid;
return uninstantiatedParameter;
}
/* /*
* AddShardIntervalRestrictionToSelect adds the following range boundaries * AddShardIntervalRestrictionToSelect adds the following range boundaries
* with the given subquery and shardInterval: * with the given subquery and shardInterval:
@ -1149,7 +1266,7 @@ AddUninstantiatedPartitionRestriction(Query *originalQuery)
static void static void
AddUninstantiatedEqualityQual(Query *query, Var *partitionColumn) AddUninstantiatedEqualityQual(Query *query, Var *partitionColumn)
{ {
Param *equalityParameter = makeNode(Param); Param *equalityParameter = UninstantiatedParameterForColumn(partitionColumn);
OpExpr *uninstantiatedEqualityQual = NULL; OpExpr *uninstantiatedEqualityQual = NULL;
Oid partitionColumnCollid = InvalidOid; Oid partitionColumnCollid = InvalidOid;
Oid lessThanOperator = InvalidOid; Oid lessThanOperator = InvalidOid;
@ -1167,13 +1284,6 @@ AddUninstantiatedEqualityQual(Query *query, Var *partitionColumn)
partitionColumnCollid = partitionColumn->varcollid; partitionColumnCollid = partitionColumn->varcollid;
equalityParameter->paramkind = PARAM_EXTERN;
equalityParameter->paramid = UNINSTANTIATED_PARAMETER_ID;
equalityParameter->paramtype = partitionColumn->vartype;
equalityParameter->paramtypmod = partitionColumn->vartypmod;
equalityParameter->paramcollid = partitionColumnCollid;
equalityParameter->location = -1;
/* create an equality on the on the target partition column */ /* create an equality on the on the target partition column */
uninstantiatedEqualityQual = (OpExpr *) make_opclause(equalsOperator, InvalidOid, uninstantiatedEqualityQual = (OpExpr *) make_opclause(equalsOperator, InvalidOid,
false, false,
@ -2964,129 +3074,6 @@ CopyRelationRestrictionContext(RelationRestrictionContext *oldContext)
} }
/*
* InstantiatePartitionQual replaces the "uninstantiated" partition
* restriction clause with the current shard's (passed in context)
* boundary value.
*
* Once we see ($1 = partition column), we replace it with
* (partCol >= shardMinValue && partCol <= shardMaxValue).
*/
static Node *
InstantiatePartitionQual(Node *node, void *context)
{
ShardInterval *shardInterval = (ShardInterval *) context;
Assert(shardInterval->minValueExists);
Assert(shardInterval->maxValueExists);
if (node == NULL)
{
return NULL;
}
/*
* Look for operator expressions with two arguments.
*
* Once Found the uninstantiate, replace with appropriate boundaries for the
* current shard interval.
*
* The boundaries are replaced in the following manner:
* (partCol >= shardMinValue && partCol <= shardMaxValue)
*/
if (IsA(node, OpExpr) && list_length(((OpExpr *) node)->args) == 2)
{
OpExpr *op = (OpExpr *) node;
Node *leftop = get_leftop((Expr *) op);
Node *rightop = get_rightop((Expr *) op);
Param *param = NULL;
Var *hashedGEColumn = NULL;
OpExpr *hashedGEOpExpr = NULL;
Datum shardMinValue = shardInterval->minValue;
Var *hashedLEColumn = NULL;
OpExpr *hashedLEOpExpr = NULL;
Datum shardMaxValue = shardInterval->maxValue;
List *hashedOperatorList = NIL;
Oid integer4GEoperatorId = InvalidOid;
Oid integer4LEoperatorId = InvalidOid;
/* look for the Params */
if (IsA(leftop, Param))
{
param = (Param *) leftop;
}
else if (IsA(rightop, Param))
{
param = (Param *) rightop;
}
/* not an interesting param for our purpose, so return */
if (!(param && param->paramid == UNINSTANTIATED_PARAMETER_ID))
{
return node;
}
/* get the integer >=, <= operators from the catalog */
integer4GEoperatorId = get_opfamily_member(INTEGER_BTREE_FAM_OID, INT4OID,
INT4OID,
BTGreaterEqualStrategyNumber);
integer4LEoperatorId = get_opfamily_member(INTEGER_BTREE_FAM_OID, INT4OID,
INT4OID,
BTLessEqualStrategyNumber);
/* generate hashed columns */
hashedGEColumn = MakeInt4Column();
hashedLEColumn = MakeInt4Column();
/* generate the necessary operators */
hashedGEOpExpr = (OpExpr *) make_opclause(integer4GEoperatorId,
InvalidOid, false,
(Expr *) hashedGEColumn,
(Expr *) MakeInt4Constant(
shardMinValue),
InvalidOid, InvalidOid);
hashedLEOpExpr = (OpExpr *) make_opclause(integer4LEoperatorId,
InvalidOid, false,
(Expr *) hashedLEColumn,
(Expr *) MakeInt4Constant(
shardMaxValue),
InvalidOid, InvalidOid);
/* update the operators with correct operator numbers and function ids */
hashedGEOpExpr->opfuncid = get_opcode(hashedGEOpExpr->opno);
hashedGEOpExpr->opresulttype = get_func_rettype(hashedGEOpExpr->opfuncid);
hashedLEOpExpr->opfuncid = get_opcode(hashedLEOpExpr->opno);
hashedLEOpExpr->opresulttype = get_func_rettype(hashedLEOpExpr->opfuncid);
/* finally add the hashed operators to a list and return it */
hashedOperatorList = lappend(hashedOperatorList, hashedGEOpExpr);
hashedOperatorList = lappend(hashedOperatorList, hashedLEOpExpr);
return (Node *) hashedOperatorList;
}
/* ensure that it is not a query */
Assert(!IsA(node, Query));
/* recurse into restrict info */
if (IsA(node, RestrictInfo))
{
RestrictInfo *restrictInfo = (RestrictInfo *) node;
restrictInfo->clause = (Expr *) InstantiatePartitionQual(
(Node *) restrictInfo->clause, context);
return (Node *) restrictInfo;
}
return expression_tree_mutator(node, InstantiatePartitionQual, context);
}
/* /*
* ErrorIfQueryHasModifyingCTE checks if the query contains modifying common table * ErrorIfQueryHasModifyingCTE checks if the query contains modifying common table
* expressions and errors out if it does. * expressions and errors out if it does.

View File

@ -613,24 +613,30 @@ WHERE user_id IN (SELECT user_id
DEBUG: predicate pruning for shardId 13300000 DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001 DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002 DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004 DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005 DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006 DEBUG: predicate pruning for shardId 13300006
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id = 2))) AND ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer))) DEBUG: predicate pruning for shardId 13300007
DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away
DEBUG: predicate pruning for shardId 13300000 DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001 DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002 DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004 DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005 DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006 DEBUG: predicate pruning for shardId 13300006
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id = 2))) AND ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer))) DEBUG: predicate pruning for shardId 13300007
DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away
DEBUG: predicate pruning for shardId 13300000 DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001 DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002 DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004 DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005 DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006 DEBUG: predicate pruning for shardId 13300006
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id = 2))) AND ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823))) DEBUG: predicate pruning for shardId 13300007
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
DEBUG: predicate pruning for shardId 13300000 DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001 DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002 DEBUG: predicate pruning for shardId 13300002
@ -639,6 +645,168 @@ DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006 DEBUG: predicate pruning for shardId 13300006
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id = 2))) AND ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647))) DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id = 2))) AND ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)))
DEBUG: Plan is router executable DEBUG: Plan is router executable
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (SELECT user_id
FROM raw_events_second
WHERE user_id != 2 AND value_1 = 2000)
ON conflict (user_id, value_1) DO NOTHING;
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300000 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.user_id <> 2) AND (raw_events_second.value_1 = 2000)))) AND ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer))) ON CONFLICT(user_id, value_1) DO NOTHING
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300001 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300005 raw_events_second WHERE ((raw_events_second.user_id <> 2) AND (raw_events_second.value_1 = 2000)))) AND ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer))) ON CONFLICT(user_id, value_1) DO NOTHING
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300002 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300006 raw_events_second WHERE ((raw_events_second.user_id <> 2) AND (raw_events_second.value_1 = 2000)))) AND ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823))) ON CONFLICT(user_id, value_1) DO NOTHING
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300007 raw_events_second WHERE ((raw_events_second.user_id <> 2) AND (raw_events_second.value_1 = 2000)))) AND ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647))) ON CONFLICT(user_id, value_1) DO NOTHING
DEBUG: Plan is router executable
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (SELECT user_id
FROM raw_events_second WHERE false);
DEBUG: Skipping target shard interval 13300004 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away
DEBUG: Plan is router executable
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (SELECT user_id
FROM raw_events_second
WHERE value_1 = 1000 OR value_1 = 2000 OR value_1 = 3000);
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300000 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300004 raw_events_second WHERE ((raw_events_second.value_1 = 1000) OR (raw_events_second.value_1 = 2000) OR (raw_events_second.value_1 = 3000)))) AND ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300001 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300005 raw_events_second WHERE ((raw_events_second.value_1 = 1000) OR (raw_events_second.value_1 = 2000) OR (raw_events_second.value_1 = 3000)))) AND ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer)))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300002 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300006 raw_events_second WHERE ((raw_events_second.value_1 = 1000) OR (raw_events_second.value_1 = 2000) OR (raw_events_second.value_1 = 3000)))) AND ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823)))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300007 raw_events_second WHERE ((raw_events_second.value_1 = 1000) OR (raw_events_second.value_1 = 2000) OR (raw_events_second.value_1 = 3000)))) AND ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)))
DEBUG: Plan is router executable
-- lets mix subqueries in FROM clause and subqueries in WHERE
INSERT INTO agg_events
(user_id)
SELECT f2.id FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 1000) AS foo2 ) as f2
ON (f.id = f2.id)
WHERE f.id IN (SELECT user_id
FROM raw_events_second);
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300000 raw_events_first, public.reference_table_13300012 reference_table WHERE (raw_events_first.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300000 raw_events_first, public.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (1000)::numeric)) foo2) f2 ON ((f.id = f2.id))) WHERE ((f.id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300004 raw_events_second)) AND ((hashint4(f2.id) >= '-2147483648'::integer) AND (hashint4(f2.id) <= '-1073741825'::integer)))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300001 raw_events_first, public.reference_table_13300012 reference_table WHERE (raw_events_first.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300001 raw_events_first, public.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (1000)::numeric)) foo2) f2 ON ((f.id = f2.id))) WHERE ((f.id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300005 raw_events_second)) AND ((hashint4(f2.id) >= '-1073741824'::integer) AND (hashint4(f2.id) <= '-1'::integer)))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300007
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300002 raw_events_first, public.reference_table_13300012 reference_table WHERE (raw_events_first.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300002 raw_events_first, public.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (1000)::numeric)) foo2) f2 ON ((f.id = f2.id))) WHERE ((f.id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300006 raw_events_second)) AND ((hashint4(f2.id) >= 0) AND (hashint4(f2.id) <= 1073741823)))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id) SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300003 raw_events_first, public.reference_table_13300012 reference_table WHERE (raw_events_first.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300003 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (1000)::numeric)) foo2) f2 ON ((f.id = f2.id))) WHERE ((f.id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300007 raw_events_second)) AND ((hashint4(f2.id) >= 1073741824) AND (hashint4(f2.id) <= 2147483647)))
DEBUG: Plan is router executable
-- some UPSERTS -- some UPSERTS
INSERT INTO agg_events AS ae INSERT INTO agg_events AS ae
( (
@ -973,7 +1141,469 @@ FROM
((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT ((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT
(SELECT user_id FROM raw_events_second where user_id = 17)) as foo; (SELECT user_id FROM raw_events_second where user_id = 17)) as foo;
ERROR: set operations are not allowed in INSERT ... SELECT queries ERROR: set operations are not allowed in INSERT ... SELECT queries
-- unsupported JOIN -- some supported LEFT joins
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id;
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300000 raw_events_first LEFT JOIN public.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((hashint4(raw_events_first.user_id) >= '-2147483648'::integer) AND (hashint4(raw_events_first.user_id) <= '-1073741825'::integer))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300001 raw_events_first LEFT JOIN public.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((hashint4(raw_events_first.user_id) >= '-1073741824'::integer) AND (hashint4(raw_events_first.user_id) <= '-1'::integer))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300002 raw_events_first LEFT JOIN public.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((hashint4(raw_events_first.user_id) >= 0) AND (hashint4(raw_events_first.user_id) <= 1073741823))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300003 raw_events_first LEFT JOIN public.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((hashint4(raw_events_first.user_id) >= 1073741824) AND (hashint4(raw_events_first.user_id) <= 2147483647))
DEBUG: Plan is router executable
INSERT INTO agg_events (user_id)
SELECT
raw_events_second.user_id
FROM
reference_table LEFT JOIN raw_events_second ON reference_table.user_id = raw_events_second.user_id;
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (public.reference_table_13300012 reference_table LEFT JOIN public.raw_events_second_13300004 raw_events_second ON ((reference_table.user_id = raw_events_second.user_id))) WHERE ((hashint4(raw_events_second.user_id) >= '-2147483648'::integer) AND (hashint4(raw_events_second.user_id) <= '-1073741825'::integer))
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (public.reference_table_13300012 reference_table LEFT JOIN public.raw_events_second_13300005 raw_events_second ON ((reference_table.user_id = raw_events_second.user_id))) WHERE ((hashint4(raw_events_second.user_id) >= '-1073741824'::integer) AND (hashint4(raw_events_second.user_id) <= '-1'::integer))
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (public.reference_table_13300012 reference_table LEFT JOIN public.raw_events_second_13300006 raw_events_second ON ((reference_table.user_id = raw_events_second.user_id))) WHERE ((hashint4(raw_events_second.user_id) >= 0) AND (hashint4(raw_events_second.user_id) <= 1073741823))
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (public.reference_table_13300012 reference_table LEFT JOIN public.raw_events_second_13300007 raw_events_second ON ((reference_table.user_id = raw_events_second.user_id))) WHERE ((hashint4(raw_events_second.user_id) >= 1073741824) AND (hashint4(raw_events_second.user_id) <= 2147483647))
DEBUG: Plan is router executable
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_first.user_id = 10;
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300000 raw_events_first LEFT JOIN public.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_first.user_id = 10) AND ((hashint4(raw_events_first.user_id) >= '-2147483648'::integer) AND (hashint4(raw_events_first.user_id) <= '-1073741825'::integer)))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away
DEBUG: Plan is router executable
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_second.user_id = 10 OR raw_events_second.user_id = 11;
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300000 raw_events_first LEFT JOIN public.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE (((raw_events_second.user_id = 10) OR (raw_events_second.user_id = 11)) AND ((hashint4(raw_events_first.user_id) >= '-2147483648'::integer) AND (hashint4(raw_events_first.user_id) <= '-1073741825'::integer)))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300001 raw_events_first LEFT JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE (((raw_events_second.user_id = 10) OR (raw_events_second.user_id = 11)) AND ((hashint4(raw_events_first.user_id) >= '-1073741824'::integer) AND (hashint4(raw_events_first.user_id) <= '-1'::integer)))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300002 raw_events_first LEFT JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE (((raw_events_second.user_id = 10) OR (raw_events_second.user_id = 11)) AND ((hashint4(raw_events_first.user_id) >= 0) AND (hashint4(raw_events_first.user_id) <= 1073741823)))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300003 raw_events_first LEFT JOIN public.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE (((raw_events_second.user_id = 10) OR (raw_events_second.user_id = 11)) AND ((hashint4(raw_events_first.user_id) >= 1073741824) AND (hashint4(raw_events_first.user_id) <= 2147483647)))
DEBUG: Plan is router executable
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_first.user_id = 10 AND raw_events_first.user_id = 20;
DEBUG: Skipping target shard interval 13300008 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away
DEBUG: Plan is router executable
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_first.user_id = 10 AND raw_events_second.user_id = 20;
DEBUG: Skipping target shard interval 13300008 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300009 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300010 since SELECT query for it pruned away
DEBUG: Skipping target shard interval 13300011 since SELECT query for it pruned away
DEBUG: Plan is router executable
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_first.user_id IN (19, 20, 21);
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300000 raw_events_first LEFT JOIN public.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_first.user_id = ANY (ARRAY[19, 20, 21])) AND ((hashint4(raw_events_first.user_id) >= '-2147483648'::integer) AND (hashint4(raw_events_first.user_id) <= '-1073741825'::integer)))
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300001 raw_events_first LEFT JOIN public.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_first.user_id = ANY (ARRAY[19, 20, 21])) AND ((hashint4(raw_events_first.user_id) >= '-1073741824'::integer) AND (hashint4(raw_events_first.user_id) <= '-1'::integer)))
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300002 raw_events_first LEFT JOIN public.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_first.user_id = ANY (ARRAY[19, 20, 21])) AND ((hashint4(raw_events_first.user_id) >= 0) AND (hashint4(raw_events_first.user_id) <= 1073741823)))
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300003 raw_events_first LEFT JOIN public.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_first.user_id = ANY (ARRAY[19, 20, 21])) AND ((hashint4(raw_events_first.user_id) >= 1073741824) AND (hashint4(raw_events_first.user_id) <= 2147483647)))
DEBUG: Plan is router executable
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_second.user_id IN (19, 20, 21);
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300000 raw_events_first JOIN public.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_second.user_id = ANY (ARRAY[19, 20, 21])) AND ((hashint4(raw_events_first.user_id) >= '-2147483648'::integer) AND (hashint4(raw_events_first.user_id) <= '-1073741825'::integer)))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300001 raw_events_first JOIN public.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_second.user_id = ANY (ARRAY[19, 20, 21])) AND ((hashint4(raw_events_first.user_id) >= '-1073741824'::integer) AND (hashint4(raw_events_first.user_id) <= '-1'::integer)))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300003
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300007
DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300002 raw_events_first JOIN public.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_second.user_id = ANY (ARRAY[19, 20, 21])) AND ((hashint4(raw_events_first.user_id) >= 0) AND (hashint4(raw_events_first.user_id) <= 1073741823)))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300003 raw_events_first JOIN public.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_second.user_id = ANY (ARRAY[19, 20, 21])) AND ((hashint4(raw_events_first.user_id) >= 1073741824) AND (hashint4(raw_events_first.user_id) <= 2147483647)))
DEBUG: Plan is router executable
-- the following is a very tricky query for Citus
-- although we do not support pushing down JOINs on non-partition
-- columns here it is safe to push it down given that we're looking for
-- a specific value (i.e., value_1 = 12) on the joining column.
-- Note that the query always hits the same shard on raw_events_second
-- and this query wouldn't have worked if we're to use different worker
-- count or shard replication factor
INSERT INTO agg_events
(user_id)
SELECT raw_events_first.user_id
FROM raw_events_first,
raw_events_second
WHERE raw_events_second.user_id = raw_events_first.value_1
AND raw_events_first.value_1 = 12;
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM public.raw_events_first_13300000 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (((raw_events_second.user_id = raw_events_first.value_1) AND (raw_events_first.value_1 = 12)) AND ((hashint4(raw_events_first.user_id) >= '-2147483648'::integer) AND (hashint4(raw_events_first.user_id) <= '-1073741825'::integer)))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM public.raw_events_first_13300001 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (((raw_events_second.user_id = raw_events_first.value_1) AND (raw_events_first.value_1 = 12)) AND ((hashint4(raw_events_first.user_id) >= '-1073741824'::integer) AND (hashint4(raw_events_first.user_id) <= '-1'::integer)))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM public.raw_events_first_13300002 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (((raw_events_second.user_id = raw_events_first.value_1) AND (raw_events_first.value_1 = 12)) AND ((hashint4(raw_events_first.user_id) >= 0) AND (hashint4(raw_events_first.user_id) <= 1073741823)))
DEBUG: predicate pruning for shardId 13300000
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300004
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM public.raw_events_first_13300003 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (((raw_events_second.user_id = raw_events_first.value_1) AND (raw_events_first.value_1 = 12)) AND ((hashint4(raw_events_first.user_id) >= 1073741824) AND (hashint4(raw_events_first.user_id) <= 2147483647)))
DEBUG: Plan is router executable
-- some unsupported LEFT/INNER JOINs
-- JOIN on one table with partition column other is not
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1;
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- same as the above with INNER JOIN
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1;
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- a not meaningful query
INSERT INTO agg_events
(user_id)
SELECT raw_events_second.user_id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_first.value_1;
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- both tables joined on non-partition columns
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- same as the above with INNER JOIN
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- even if there is a filter on the partition key, since the join is not on the partition key we reject
-- this query
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1
WHERE
raw_events_first.user_id = 10;
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- same as the above with INNER JOIN
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1
WHERE raw_events_first.user_id = 10;
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- make things a bit more complicate with IN clauses
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1
WHERE raw_events_first.value_1 IN (10, 11,12) OR raw_events_second.user_id IN (1,2,3,4);
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- implicit join on non partition column should also not be pushed down
INSERT INTO agg_events
(user_id)
SELECT raw_events_first.user_id
FROM raw_events_first,
raw_events_second
WHERE raw_events_second.user_id = raw_events_first.value_1;
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- the following is again a tricky query for Citus
-- if the given filter was on value_1 as shown in the above, Citus could
-- push it down. But here the query is refused
INSERT INTO agg_events
(user_id)
SELECT raw_events_first.user_id
FROM raw_events_first,
raw_events_second
WHERE raw_events_second.user_id = raw_events_first.value_1
AND raw_events_first.value_2 = 12;
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- lets do some unsupported query tests with subqueries
-- foo is not joined on the partition key so the query is not
-- pushed down
INSERT INTO agg_events
(user_id, value_4_agg)
SELECT
outer_most.id, max(outer_most.value)
FROM
(
SELECT f2.id as id, f2.v4 as value FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first LEFT JOIN
reference_table
ON (raw_events_first.value_1 = reference_table.user_id)) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)) as outer_most
GROUP BY
outer_most.id;
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
INSERT INTO agg_events INSERT INTO agg_events
(value_4_agg, (value_4_agg,
value_1_agg, value_1_agg,
@ -1109,7 +1739,8 @@ ERROR: INSERT INTO ... SELECT partition columns in the source table and subquer
DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column. DETAIL: Subquery contains an expression that is not a simple column reference in the same position as the target table's partition column.
HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery. HINT: Ensure the target table's partition column has a corresponding simple column reference to a distributed table's partition column in the subquery.
-- the second part of the query is not routable since -- the second part of the query is not routable since
-- no GROUP BY on the partition column -- GROUP BY not on the partition column (i.e., value_1) and thus join
-- on f.id = f2.id is not on the partition key (instead on the sum of partition key)
INSERT INTO agg_events INSERT INTO agg_events
(user_id) (user_id)
SELECT f.id FROM SELECT f.id FROM
@ -1170,6 +1801,189 @@ DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007 DEBUG: predicate pruning for shardId 13300007
ERROR: cannot perform distributed planning for the given modification ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker. DETAIL: Select query cannot be pushed down to the worker.
-- cannot pushdown since foo2 is not join on partition key
INSERT INTO agg_events
(user_id, value_4_agg)
SELECT
outer_most.id, max(outer_most.value)
FROM
(
SELECT f2.id as id, f2.v4 as value FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.value_1
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)) as outer_most
GROUP BY
outer_most.id;
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- cannot push down since foo doesn't have en equi join
INSERT INTO agg_events
(user_id, value_4_agg)
SELECT
outer_most.id, max(outer_most.value)
FROM
(
SELECT f2.id as id, f2.v4 as value FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id != reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)) as outer_most
GROUP BY
outer_most.id;
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- some unsupported LATERAL JOINs
-- join on averages is not on the partition key
INSERT INTO agg_events (user_id, value_4_agg)
SELECT
averages.user_id, avg(averages.value_4)
FROM
(SELECT
raw_events_second.user_id
FROM
reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id)
) reference_ids
JOIN LATERAL
(SELECT
user_id, value_4
FROM
raw_events_first WHERE
value_4 = reference_ids.user_id) as averages ON true
GROUP BY averages.user_id;
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- join among reference_ids and averages is not on the partition key
INSERT INTO agg_events (user_id, value_4_agg)
SELECT
averages.user_id, avg(averages.value_4)
FROM
(SELECT
raw_events_second.user_id
FROM
reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id)
) reference_ids
JOIN LATERAL
(SELECT
user_id, value_4
FROM
raw_events_first) as averages ON averages.value_4 = reference_ids.user_id
GROUP BY averages.user_id;
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- join among the agg_ids and averages is not on the partition key
INSERT INTO agg_events (user_id, value_4_agg)
SELECT
averages.user_id, avg(averages.value_4)
FROM
(SELECT
raw_events_second.user_id
FROM
reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id)
) reference_ids
JOIN LATERAL
(SELECT
user_id, value_4
FROM
raw_events_first) as averages ON averages.user_id = reference_ids.user_id
JOIN LATERAL
(SELECT user_id, value_4 FROM agg_events) as agg_ids ON (agg_ids.value_4 = averages.user_id)
GROUP BY averages.user_id;
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- not supported subqueries in WHERE clause
-- since the selected value in the WHERE is not
-- partition key
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (SELECT value_1
FROM raw_events_second);
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- same as above but slightly more complex
-- since it also includes subquery in FROM as well
INSERT INTO agg_events
(user_id)
SELECT f2.id FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)
WHERE f.id IN (SELECT value_1
FROM raw_events_second);
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
DEBUG: predicate pruning for shardId 13300005
DEBUG: predicate pruning for shardId 13300006
DEBUG: predicate pruning for shardId 13300007
DEBUG: predicate pruning for shardId 13300001
DEBUG: predicate pruning for shardId 13300002
DEBUG: predicate pruning for shardId 13300003
ERROR: cannot perform distributed planning for the given modification
DETAIL: Select query cannot be pushed down to the worker.
-- we currently not support grouping sets -- we currently not support grouping sets
INSERT INTO agg_events INSERT INTO agg_events
(user_id, (user_id,

View File

@ -326,6 +326,57 @@ WHERE user_id IN (SELECT user_id
FROM raw_events_second FROM raw_events_second
WHERE user_id = 2); WHERE user_id = 2);
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (SELECT user_id
FROM raw_events_second
WHERE user_id != 2 AND value_1 = 2000)
ON conflict (user_id, value_1) DO NOTHING;
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (SELECT user_id
FROM raw_events_second WHERE false);
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (SELECT user_id
FROM raw_events_second
WHERE value_1 = 1000 OR value_1 = 2000 OR value_1 = 3000);
-- lets mix subqueries in FROM clause and subqueries in WHERE
INSERT INTO agg_events
(user_id)
SELECT f2.id FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 1000) AS foo2 ) as f2
ON (f.id = f2.id)
WHERE f.id IN (SELECT user_id
FROM raw_events_second);
-- some UPSERTS -- some UPSERTS
INSERT INTO agg_events AS ae INSERT INTO agg_events AS ae
( (
@ -496,7 +547,191 @@ FROM
((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT ((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT
(SELECT user_id FROM raw_events_second where user_id = 17)) as foo; (SELECT user_id FROM raw_events_second where user_id = 17)) as foo;
-- unsupported JOIN -- some supported LEFT joins
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id;
INSERT INTO agg_events (user_id)
SELECT
raw_events_second.user_id
FROM
reference_table LEFT JOIN raw_events_second ON reference_table.user_id = raw_events_second.user_id;
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_first.user_id = 10;
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_second.user_id = 10 OR raw_events_second.user_id = 11;
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_first.user_id = 10 AND raw_events_first.user_id = 20;
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_first.user_id = 10 AND raw_events_second.user_id = 20;
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_first.user_id IN (19, 20, 21);
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_second.user_id IN (19, 20, 21);
-- the following is a very tricky query for Citus
-- although we do not support pushing down JOINs on non-partition
-- columns here it is safe to push it down given that we're looking for
-- a specific value (i.e., value_1 = 12) on the joining column.
-- Note that the query always hits the same shard on raw_events_second
-- and this query wouldn't have worked if we're to use different worker
-- count or shard replication factor
INSERT INTO agg_events
(user_id)
SELECT raw_events_first.user_id
FROM raw_events_first,
raw_events_second
WHERE raw_events_second.user_id = raw_events_first.value_1
AND raw_events_first.value_1 = 12;
-- some unsupported LEFT/INNER JOINs
-- JOIN on one table with partition column other is not
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1;
-- same as the above with INNER JOIN
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1;
-- a not meaningful query
INSERT INTO agg_events
(user_id)
SELECT raw_events_second.user_id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_first.value_1;
-- both tables joined on non-partition columns
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
-- same as the above with INNER JOIN
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
-- even if there is a filter on the partition key, since the join is not on the partition key we reject
-- this query
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1
WHERE
raw_events_first.user_id = 10;
-- same as the above with INNER JOIN
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1
WHERE raw_events_first.user_id = 10;
-- make things a bit more complicate with IN clauses
INSERT INTO agg_events (user_id)
SELECT
raw_events_first.user_id
FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1
WHERE raw_events_first.value_1 IN (10, 11,12) OR raw_events_second.user_id IN (1,2,3,4);
-- implicit join on non partition column should also not be pushed down
INSERT INTO agg_events
(user_id)
SELECT raw_events_first.user_id
FROM raw_events_first,
raw_events_second
WHERE raw_events_second.user_id = raw_events_first.value_1;
-- the following is again a tricky query for Citus
-- if the given filter was on value_1 as shown in the above, Citus could
-- push it down. But here the query is refused
INSERT INTO agg_events
(user_id)
SELECT raw_events_first.user_id
FROM raw_events_first,
raw_events_second
WHERE raw_events_second.user_id = raw_events_first.value_1
AND raw_events_first.value_2 = 12;
-- lets do some unsupported query tests with subqueries
-- foo is not joined on the partition key so the query is not
-- pushed down
INSERT INTO agg_events
(user_id, value_4_agg)
SELECT
outer_most.id, max(outer_most.value)
FROM
(
SELECT f2.id as id, f2.v4 as value FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first LEFT JOIN
reference_table
ON (raw_events_first.value_1 = reference_table.user_id)) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)) as outer_most
GROUP BY
outer_most.id;
INSERT INTO agg_events INSERT INTO agg_events
(value_4_agg, (value_4_agg,
value_1_agg, value_1_agg,
@ -619,7 +854,8 @@ ON (f.id = f2.id);
-- the second part of the query is not routable since -- the second part of the query is not routable since
-- no GROUP BY on the partition column -- GROUP BY not on the partition column (i.e., value_1) and thus join
-- on f.id = f2.id is not on the partition key (instead on the sum of partition key)
INSERT INTO agg_events INSERT INTO agg_events
(user_id) (user_id)
SELECT f.id FROM SELECT f.id FROM
@ -672,6 +908,163 @@ outer_most.id, max(outer_most.value)
ON (f.id != f2.id)) as outer_most ON (f.id != f2.id)) as outer_most
GROUP BY outer_most.id; GROUP BY outer_most.id;
-- cannot pushdown since foo2 is not join on partition key
INSERT INTO agg_events
(user_id, value_4_agg)
SELECT
outer_most.id, max(outer_most.value)
FROM
(
SELECT f2.id as id, f2.v4 as value FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.value_1
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)) as outer_most
GROUP BY
outer_most.id;
-- cannot push down since foo doesn't have en equi join
INSERT INTO agg_events
(user_id, value_4_agg)
SELECT
outer_most.id, max(outer_most.value)
FROM
(
SELECT f2.id as id, f2.v4 as value FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id != reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)) as outer_most
GROUP BY
outer_most.id;
-- some unsupported LATERAL JOINs
-- join on averages is not on the partition key
INSERT INTO agg_events (user_id, value_4_agg)
SELECT
averages.user_id, avg(averages.value_4)
FROM
(SELECT
raw_events_second.user_id
FROM
reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id)
) reference_ids
JOIN LATERAL
(SELECT
user_id, value_4
FROM
raw_events_first WHERE
value_4 = reference_ids.user_id) as averages ON true
GROUP BY averages.user_id;
-- join among reference_ids and averages is not on the partition key
INSERT INTO agg_events (user_id, value_4_agg)
SELECT
averages.user_id, avg(averages.value_4)
FROM
(SELECT
raw_events_second.user_id
FROM
reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id)
) reference_ids
JOIN LATERAL
(SELECT
user_id, value_4
FROM
raw_events_first) as averages ON averages.value_4 = reference_ids.user_id
GROUP BY averages.user_id;
-- join among the agg_ids and averages is not on the partition key
INSERT INTO agg_events (user_id, value_4_agg)
SELECT
averages.user_id, avg(averages.value_4)
FROM
(SELECT
raw_events_second.user_id
FROM
reference_table JOIN raw_events_second on (reference_table.user_id = raw_events_second.user_id)
) reference_ids
JOIN LATERAL
(SELECT
user_id, value_4
FROM
raw_events_first) as averages ON averages.user_id = reference_ids.user_id
JOIN LATERAL
(SELECT user_id, value_4 FROM agg_events) as agg_ids ON (agg_ids.value_4 = averages.user_id)
GROUP BY averages.user_id;
-- not supported subqueries in WHERE clause
-- since the selected value in the WHERE is not
-- partition key
INSERT INTO raw_events_second
(user_id)
SELECT user_id
FROM raw_events_first
WHERE user_id IN (SELECT value_1
FROM raw_events_second);
-- same as above but slightly more complex
-- since it also includes subquery in FROM as well
INSERT INTO agg_events
(user_id)
SELECT f2.id FROM
(SELECT
id
FROM (SELECT reference_table.user_id AS id
FROM raw_events_first,
reference_table
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
INNER JOIN
(SELECT v4,
v1,
id
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
SUM(raw_events_first.value_1) AS v1,
raw_events_second.user_id AS id
FROM raw_events_first,
raw_events_second
WHERE raw_events_first.user_id = raw_events_second.user_id
GROUP BY raw_events_second.user_id
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
ON (f.id = f2.id)
WHERE f.id IN (SELECT value_1
FROM raw_events_second);
-- we currently not support grouping sets -- we currently not support grouping sets
INSERT INTO agg_events INSERT INTO agg_events
(user_id, (user_id,