mirror of https://github.com/citusdata/citus.git
Run update quals code only for queries with outer joins, update planners for error handling, update test cases.
parent
b8fd1e64ff
commit
13405e7871
|
|
@ -319,6 +319,35 @@ DefineQualsForShardInterval(RelationShard *relationShard, int attnum, int rtinde
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* UpdateWhereClauseForOuterJoinWalker walks over the query tree and
|
||||
* updates the WHERE clause for outer joins satisfying feasibility conditions.
|
||||
*/
|
||||
bool
|
||||
UpdateWhereClauseForOuterJoinWalker(Node *node, List *relationShardList)
|
||||
{
|
||||
if (node == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (IsA(node, Query))
|
||||
{
|
||||
UpdateWhereClauseForOuterJoin((Query *) node, relationShardList);
|
||||
return query_tree_walker((Query *) node, UpdateWhereClauseForOuterJoinWalker,
|
||||
relationShardList, QTW_EXAMINE_RTES_BEFORE);
|
||||
}
|
||||
|
||||
if (!IsA(node, RangeTblEntry))
|
||||
{
|
||||
return expression_tree_walker(node, UpdateWhereClauseForOuterJoinWalker,
|
||||
relationShardList);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* UpdateWhereClauseForOuterJoin
|
||||
*
|
||||
|
|
@ -442,7 +471,6 @@ UpdateRelationToShardNames(Node *node, List *relationShardList)
|
|||
/* want to look at all RTEs, even in subqueries, CTEs and such */
|
||||
if (IsA(node, Query))
|
||||
{
|
||||
UpdateWhereClauseForOuterJoin((Query *) node, relationShardList); /* TODO, check this again, we might want to skip this for fast path queries */
|
||||
return query_tree_walker((Query *) node, UpdateRelationToShardNames,
|
||||
relationShardList, QTW_EXAMINE_RTES_BEFORE);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -75,17 +75,6 @@
|
|||
#endif
|
||||
|
||||
|
||||
/* RouterPlanType is used to determine the router plan to invoke */
|
||||
typedef enum RouterPlanType
|
||||
{
|
||||
INSERT_SELECT_INTO_CITUS_TABLE,
|
||||
INSERT_SELECT_INTO_LOCAL_TABLE,
|
||||
DML_QUERY,
|
||||
SELECT_QUERY,
|
||||
MERGE_QUERY,
|
||||
REPLAN_WITH_BOUND_PARAMETERS
|
||||
} RouterPlanType;
|
||||
|
||||
static List *plannerRestrictionContextList = NIL;
|
||||
int MultiTaskQueryLogLevel = CITUS_LOG_LEVEL_OFF; /* multi-task query log level */
|
||||
static uint64 NextPlanId = 1;
|
||||
|
|
@ -1097,7 +1086,8 @@ CreateDistributedPlan(uint64 planId, bool allowRecursivePlanning, Query *origina
|
|||
* set_plan_references>add_rtes_to_flat_rtable>add_rte_to_flat_rtable.
|
||||
*/
|
||||
List *subPlanList = GenerateSubplansForSubqueriesAndCTEs(planId, originalQuery,
|
||||
plannerRestrictionContext);
|
||||
plannerRestrictionContext,
|
||||
routerPlan);
|
||||
|
||||
/*
|
||||
* If subqueries were recursively planned then we need to replan the query
|
||||
|
|
|
|||
|
|
@ -766,7 +766,8 @@ DistributedInsertSelectSupported(Query *queryTree, RangeTblEntry *insertRte,
|
|||
{
|
||||
/* first apply toplevel pushdown checks to SELECT query */
|
||||
error =
|
||||
DeferErrorIfUnsupportedSubqueryPushdown(subquery, plannerRestrictionContext);
|
||||
DeferErrorIfUnsupportedSubqueryPushdown(subquery, plannerRestrictionContext,
|
||||
true);
|
||||
if (error)
|
||||
{
|
||||
return error;
|
||||
|
|
|
|||
|
|
@ -1149,7 +1149,8 @@ DeferErrorIfRoutableMergeNotSupported(Query *query, List *rangeTableList,
|
|||
{
|
||||
deferredError =
|
||||
DeferErrorIfUnsupportedSubqueryPushdown(query,
|
||||
plannerRestrictionContext);
|
||||
plannerRestrictionContext,
|
||||
true);
|
||||
if (deferredError)
|
||||
{
|
||||
ereport(DEBUG1, (errmsg("Sub-query is not pushable, try repartitioning")));
|
||||
|
|
|
|||
|
|
@ -168,13 +168,15 @@ static uint32 HashPartitionCount(void);
|
|||
static Job * BuildJobTreeTaskList(Job *jobTree,
|
||||
PlannerRestrictionContext *plannerRestrictionContext);
|
||||
static bool IsInnerTableOfOuterJoin(RelationRestriction *relationRestriction,
|
||||
Bitmapset *distributedTables);
|
||||
Bitmapset *distributedTables,
|
||||
bool *outerPartHasDistributedTable);
|
||||
static void ErrorIfUnsupportedShardDistribution(Query *query);
|
||||
static Task * QueryPushdownTaskCreate(Query *originalQuery, int shardIndex,
|
||||
RelationRestrictionContext *restrictionContext,
|
||||
uint32 taskId,
|
||||
TaskType taskType,
|
||||
bool modifyRequiresCoordinatorEvaluation,
|
||||
bool updateQualsForOuterJoin,
|
||||
DeferredErrorMessage **planningError);
|
||||
static List * SqlTaskList(Job *job);
|
||||
static bool DependsOnHashPartitionJob(Job *job);
|
||||
|
|
@ -2248,6 +2250,8 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
|
|||
}
|
||||
|
||||
/* In the second loop, populate taskRequiredForShardIndex */
|
||||
bool updateQualsForOuterJoin = false;
|
||||
bool outerPartHasDistributedTable = false;
|
||||
forboth_ptr(prunedShardList, prunedRelationShardList,
|
||||
relationRestriction, relationRestrictionContext->relationRestrictionList)
|
||||
{
|
||||
|
|
@ -2270,9 +2274,21 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
|
|||
* the table is part of the non-outer side of the join and the outer side has a
|
||||
* distributed table.
|
||||
*/
|
||||
if (IsInnerTableOfOuterJoin(relationRestriction, distributedTableIndex))
|
||||
if (IsInnerTableOfOuterJoin(relationRestriction, distributedTableIndex,
|
||||
&outerPartHasDistributedTable))
|
||||
{
|
||||
continue;
|
||||
if (outerPartHasDistributedTable)
|
||||
{
|
||||
/* we can skip the shards from this relation restriction */
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* The outer part does not include distributed tables, we can not skip shards.
|
||||
* Also, we will possibly update the quals of the outer relation for recurring join push down, mark here.
|
||||
*/
|
||||
updateQualsForOuterJoin = true;
|
||||
}
|
||||
}
|
||||
|
||||
ShardInterval *shardInterval = NULL;
|
||||
|
|
@ -2305,6 +2321,7 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
|
|||
taskIdIndex,
|
||||
taskType,
|
||||
modifyRequiresCoordinatorEvaluation,
|
||||
updateQualsForOuterJoin,
|
||||
planningError);
|
||||
if (*planningError != NULL)
|
||||
{
|
||||
|
|
@ -2337,13 +2354,16 @@ QueryPushdownSqlTaskList(Query *query, uint64 jobId,
|
|||
* RelationRestriction if the table accessed for this relation is
|
||||
* a) in an outer join
|
||||
* b) on the inner part of said join
|
||||
* c) the outer part of the join has a distributed table
|
||||
*
|
||||
* The function also sets outerPartHasDistributedTable if the outer part
|
||||
* of the corresponding join has a distributed table.
|
||||
*
|
||||
* The function returns true only if all three conditions above hold true.
|
||||
*/
|
||||
static bool
|
||||
IsInnerTableOfOuterJoin(RelationRestriction *relationRestriction,
|
||||
Bitmapset *distributedTables)
|
||||
Bitmapset *distributedTables,
|
||||
bool *outerPartHasDistributedTable)
|
||||
{
|
||||
RestrictInfo *joinInfo = NULL;
|
||||
foreach_declared_ptr(joinInfo, relationRestriction->relOptInfo->joininfo)
|
||||
|
|
@ -2364,14 +2384,12 @@ IsInnerTableOfOuterJoin(RelationRestriction *relationRestriction,
|
|||
if (!isInOuter)
|
||||
{
|
||||
/* this table is joined in the inner part of an outer join */
|
||||
/* check if the outer part has a distributed relation */
|
||||
bool outerPartHasDistributedTable = bms_overlap(joinInfo->outer_relids,
|
||||
distributedTables);
|
||||
if (outerPartHasDistributedTable)
|
||||
{
|
||||
/* this is an inner table of an outer join with a distributed table */
|
||||
return true;
|
||||
}
|
||||
/* set if the outer part has a distributed relation */
|
||||
*outerPartHasDistributedTable = bms_overlap(joinInfo->outer_relids,
|
||||
distributedTables);
|
||||
|
||||
/* this is an inner table of an outer join */
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2487,6 +2505,7 @@ static Task *
|
|||
QueryPushdownTaskCreate(Query *originalQuery, int shardIndex,
|
||||
RelationRestrictionContext *restrictionContext, uint32 taskId,
|
||||
TaskType taskType, bool modifyRequiresCoordinatorEvaluation,
|
||||
bool updateQualsForOuterJoin,
|
||||
DeferredErrorMessage **planningError)
|
||||
{
|
||||
Query *taskQuery = copyObject(originalQuery);
|
||||
|
|
@ -2579,6 +2598,10 @@ QueryPushdownTaskCreate(Query *originalQuery, int shardIndex,
|
|||
*/
|
||||
UpdateRelationToShardNames((Node *) taskQuery, relationShardList);
|
||||
|
||||
if (updateQualsForOuterJoin)
|
||||
{
|
||||
UpdateWhereClauseForOuterJoinWalker((Node *) taskQuery, relationShardList);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ands are made implicit during shard pruning, as predicate comparison and
|
||||
|
|
|
|||
|
|
@ -1321,7 +1321,8 @@ MultiShardUpdateDeleteSupported(Query *originalQuery,
|
|||
{
|
||||
errorMessage = DeferErrorIfUnsupportedSubqueryPushdown(
|
||||
originalQuery,
|
||||
plannerRestrictionContext);
|
||||
plannerRestrictionContext,
|
||||
true);
|
||||
}
|
||||
|
||||
return errorMessage;
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ static bool WindowPartitionOnDistributionColumn(Query *query);
|
|||
static DeferredErrorMessage * DeferErrorIfFromClauseRecurs(Query *queryTree);
|
||||
static RecurringTuplesType FromClauseRecurringTupleType(Query *queryTree);
|
||||
static DeferredErrorMessage * DeferredErrorIfUnsupportedRecurringTuplesJoin(
|
||||
PlannerRestrictionContext *plannerRestrictionContext);
|
||||
PlannerRestrictionContext *plannerRestrictionContext, bool plannerPhase);
|
||||
static DeferredErrorMessage * DeferErrorIfUnsupportedTableCombination(Query *queryTree);
|
||||
static DeferredErrorMessage * DeferErrorIfSubqueryRequiresMerge(Query *subqueryTree, bool
|
||||
lateral,
|
||||
|
|
@ -536,9 +536,16 @@ SubqueryMultiNodeTree(Query *originalQuery, Query *queryTree,
|
|||
RaiseDeferredError(unsupportedQueryError, ERROR);
|
||||
}
|
||||
|
||||
/*
|
||||
* We reach here at the third step of the planning, thus we already checked for pushed down
|
||||
* feasibility of recurring outer joins, at this step the unsupported outer join check should
|
||||
* only generate an error when there is a lateral subquery.
|
||||
*/
|
||||
DeferredErrorMessage *subqueryPushdownError = DeferErrorIfUnsupportedSubqueryPushdown(
|
||||
originalQuery,
|
||||
plannerRestrictionContext);
|
||||
plannerRestrictionContext,
|
||||
false);
|
||||
|
||||
if (subqueryPushdownError != NULL)
|
||||
{
|
||||
RaiseDeferredError(subqueryPushdownError, ERROR);
|
||||
|
|
@ -561,7 +568,8 @@ SubqueryMultiNodeTree(Query *originalQuery, Query *queryTree,
|
|||
DeferredErrorMessage *
|
||||
DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
|
||||
PlannerRestrictionContext *
|
||||
plannerRestrictionContext)
|
||||
plannerRestrictionContext,
|
||||
bool plannerPhase)
|
||||
{
|
||||
bool outerMostQueryHasLimit = false;
|
||||
ListCell *subqueryCell = NULL;
|
||||
|
|
@ -613,7 +621,8 @@ DeferErrorIfUnsupportedSubqueryPushdown(Query *originalQuery,
|
|||
return error;
|
||||
}
|
||||
|
||||
error = DeferredErrorIfUnsupportedRecurringTuplesJoin(plannerRestrictionContext);
|
||||
error = DeferredErrorIfUnsupportedRecurringTuplesJoin(plannerRestrictionContext,
|
||||
plannerPhase);
|
||||
if (error)
|
||||
{
|
||||
return error;
|
||||
|
|
@ -771,12 +780,17 @@ FromClauseRecurringTupleType(Query *queryTree)
|
|||
* DeferredErrorIfUnsupportedRecurringTuplesJoin returns a DeferredError if
|
||||
* there exists a join between a recurring rel (such as reference tables
|
||||
* and intermediate_results) and a non-recurring rel (such as distributed tables
|
||||
* and subqueries that we can push-down to worker nodes) that can return an
|
||||
* incorrect result set due to recurring tuples coming from the recurring rel.
|
||||
* and subqueries that we can push-down to worker nodes) when plannerPhase is
|
||||
* true, so that we try to recursively plan these joins.
|
||||
* During recursive planning phase, we either replace those with recursive plans
|
||||
* or leave them if it is safe to push-down.
|
||||
* During the logical planning phase (plannerPhase is false), we only check if
|
||||
* such queries have lateral subqueries.
|
||||
*/
|
||||
static DeferredErrorMessage *
|
||||
DeferredErrorIfUnsupportedRecurringTuplesJoin(
|
||||
PlannerRestrictionContext *plannerRestrictionContext)
|
||||
PlannerRestrictionContext *plannerRestrictionContext,
|
||||
bool plannerPhase)
|
||||
{
|
||||
List *joinRestrictionList =
|
||||
plannerRestrictionContext->joinRestrictionContext->joinRestrictionList;
|
||||
|
|
@ -829,6 +843,16 @@ DeferredErrorIfUnsupportedRecurringTuplesJoin(
|
|||
|
||||
if (RelationInfoContainsOnlyRecurringTuples(plannerInfo, outerrelRelids))
|
||||
{
|
||||
if (plannerPhase)
|
||||
{
|
||||
/*
|
||||
* We have not yet tried to recursively plan this join, we should
|
||||
* defer an error.
|
||||
*/
|
||||
recurType = FetchFirstRecurType(plannerInfo, outerrelRelids);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Inner side contains distributed rels but the outer side only
|
||||
* contains recurring rels, might be an unsupported lateral outer
|
||||
|
|
|
|||
|
|
@ -110,6 +110,7 @@ struct RecursivePlanningContextInternal
|
|||
List *subPlanList;
|
||||
PlannerRestrictionContext *plannerRestrictionContext;
|
||||
bool restrictionEquivalenceCheck;
|
||||
bool forceRecursivePlanning;
|
||||
};
|
||||
|
||||
/* track depth of current recursive planner query */
|
||||
|
|
@ -214,7 +215,8 @@ static bool hasPseudoconstantQuals(
|
|||
*/
|
||||
List *
|
||||
GenerateSubplansForSubqueriesAndCTEs(uint64 planId, Query *originalQuery,
|
||||
PlannerRestrictionContext *plannerRestrictionContext)
|
||||
PlannerRestrictionContext *plannerRestrictionContext,
|
||||
RouterPlanType routerPlan)
|
||||
{
|
||||
RecursivePlanningContext context;
|
||||
|
||||
|
|
@ -228,6 +230,17 @@ GenerateSubplansForSubqueriesAndCTEs(uint64 planId, Query *originalQuery,
|
|||
context.planId = planId;
|
||||
context.subPlanList = NIL;
|
||||
context.plannerRestrictionContext = plannerRestrictionContext;
|
||||
context.forceRecursivePlanning = false;
|
||||
|
||||
/*
|
||||
* Force recursive planning of recurring outer joins for these queries
|
||||
* since the planning error from the previous step is generated prior to
|
||||
* the actual planning attempt.
|
||||
*/
|
||||
if (routerPlan == DML_QUERY)
|
||||
{
|
||||
context.forceRecursivePlanning = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculating the distribution key equality upfront is a trade-off for us.
|
||||
|
|
@ -756,7 +769,8 @@ RecursivelyPlanRecurringTupleOuterJoinWalker(Node *node, Query *query,
|
|||
/* <recurring> left join <distributed> */
|
||||
if (leftNodeRecurs && !rightNodeRecurs)
|
||||
{
|
||||
if (chainedJoin || !CheckPushDownFeasibilityOuterJoin(joinExpr,
|
||||
if (recursivePlanningContext->forceRecursivePlanning ||
|
||||
chainedJoin || !CheckPushDownFeasibilityOuterJoin(joinExpr,
|
||||
query))
|
||||
{
|
||||
ereport(DEBUG1, (errmsg("recursively planning right side of "
|
||||
|
|
@ -787,7 +801,8 @@ RecursivelyPlanRecurringTupleOuterJoinWalker(Node *node, Query *query,
|
|||
/* <distributed> right join <recurring> */
|
||||
if (!leftNodeRecurs && rightNodeRecurs)
|
||||
{
|
||||
if (chainedJoin || !CheckPushDownFeasibilityOuterJoin(joinExpr,
|
||||
if (recursivePlanningContext->forceRecursivePlanning ||
|
||||
chainedJoin || !CheckPushDownFeasibilityOuterJoin(joinExpr,
|
||||
query))
|
||||
{
|
||||
ereport(DEBUG1, (errmsg("recursively planning left side of "
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
extern void RebuildQueryStrings(Job *workerJob);
|
||||
extern bool UpdateRelationToShardNames(Node *node, List *relationShardList);
|
||||
extern void UpdateWhereClauseForOuterJoin(Query *query, List *relationShardList);
|
||||
extern bool UpdateWhereClauseForOuterJoinWalker(Node *node, List *relationShardList);
|
||||
Node * DefineQualsForShardInterval(RelationShard *relationShard, int attnum, int
|
||||
outerRtIndex);
|
||||
extern void SetTaskQueryIfShouldLazyDeparse(Task *task, Query *query);
|
||||
|
|
|
|||
|
|
@ -33,6 +33,18 @@
|
|||
extern int PlannerLevel;
|
||||
|
||||
|
||||
/* RouterPlanType is used to determine the router plan to invoke */
|
||||
typedef enum RouterPlanType
|
||||
{
|
||||
INSERT_SELECT_INTO_CITUS_TABLE,
|
||||
INSERT_SELECT_INTO_LOCAL_TABLE,
|
||||
DML_QUERY,
|
||||
SELECT_QUERY,
|
||||
MERGE_QUERY,
|
||||
REPLAN_WITH_BOUND_PARAMETERS
|
||||
} RouterPlanType;
|
||||
|
||||
|
||||
typedef struct RelationRestrictionContext
|
||||
{
|
||||
bool allReferenceTables;
|
||||
|
|
|
|||
|
|
@ -37,11 +37,12 @@ extern MultiNode * SubqueryMultiNodeTree(Query *originalQuery,
|
|||
Query *queryTree,
|
||||
PlannerRestrictionContext *
|
||||
plannerRestrictionContext);
|
||||
extern DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryPushdown(Query *
|
||||
originalQuery,
|
||||
PlannerRestrictionContext
|
||||
*
|
||||
plannerRestrictionContext);
|
||||
extern DeferredErrorMessage * DeferErrorIfUnsupportedSubqueryPushdown(
|
||||
Query *originalQuery,
|
||||
PlannerRestrictionContext
|
||||
*
|
||||
plannerRestrictionContext,
|
||||
bool plannerPhase);
|
||||
extern DeferredErrorMessage * DeferErrorIfCannotPushdownSubquery(Query *subqueryTree,
|
||||
bool
|
||||
outerMostQueryHasLimit);
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "pg_version_constants.h"
|
||||
|
||||
#include "distributed/distributed_planner.h"
|
||||
#include "distributed/errormessage.h"
|
||||
#include "distributed/log_utils.h"
|
||||
#include "distributed/relation_restriction_equivalence.h"
|
||||
|
|
@ -33,7 +34,8 @@ extern PlannerRestrictionContext * GetPlannerRestrictionContext(
|
|||
RecursivePlanningContext *recursivePlanningContext);
|
||||
extern List * GenerateSubplansForSubqueriesAndCTEs(uint64 planId, Query *originalQuery,
|
||||
PlannerRestrictionContext *
|
||||
plannerRestrictionContext);
|
||||
plannerRestrictionContext,
|
||||
RouterPlanType routerPlan);
|
||||
extern char * GenerateResultId(uint64 planId, uint32 subPlanId);
|
||||
extern Query * BuildSubPlanResultQuery(List *targetEntryList, List *columnAliasList,
|
||||
char *resultId);
|
||||
|
|
|
|||
|
|
@ -760,15 +760,28 @@ DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_133000
|
|||
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300001 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL)
|
||||
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300002 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL)
|
||||
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (multi_insert_select.raw_events_first_13300003 raw_events_first LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE (raw_events_first.user_id IS NOT NULL)
|
||||
SET client_min_messages to debug3;
|
||||
INSERT INTO agg_events (user_id)
|
||||
SELECT
|
||||
raw_events_second.user_id
|
||||
FROM
|
||||
reference_table LEFT JOIN raw_events_second ON reference_table.user_id = raw_events_second.user_id;
|
||||
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300004 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id IS NOT NULL) AND ((reference_table.user_id IS NULL) OR ((btint4cmp('-2147483648'::integer, hashint4(reference_table.user_id)) OPERATOR(pg_catalog.<) 0) AND (btint4cmp(hashint4(reference_table.user_id), '-1073741825'::integer) OPERATOR(pg_catalog.<=) 0))))
|
||||
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300005 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id IS NOT NULL) AND ((btint4cmp('-1073741824'::integer, hashint4(reference_table.user_id)) OPERATOR(pg_catalog.<) 0) AND (btint4cmp(hashint4(reference_table.user_id), '-1'::integer) OPERATOR(pg_catalog.<=) 0)))
|
||||
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300006 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id IS NOT NULL) AND ((btint4cmp(0, hashint4(reference_table.user_id)) OPERATOR(pg_catalog.<) 0) AND (btint4cmp(hashint4(reference_table.user_id), 1073741823) OPERATOR(pg_catalog.<=) 0)))
|
||||
DEBUG: distributed statement: INSERT INTO multi_insert_select.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_second.user_id FROM (multi_insert_select.reference_table_13300012 reference_table LEFT JOIN multi_insert_select.raw_events_second_13300007 raw_events_second ON ((reference_table.user_id OPERATOR(pg_catalog.=) raw_events_second.user_id))) WHERE ((raw_events_second.user_id IS NOT NULL) AND ((btint4cmp(1073741824, hashint4(reference_table.user_id)) OPERATOR(pg_catalog.<) 0) AND (btint4cmp(hashint4(reference_table.user_id), 2147483647) OPERATOR(pg_catalog.<=) 0)))
|
||||
DEBUG: no shard pruning constraints on raw_events_second found
|
||||
DEBUG: shard count after pruning for raw_events_second: 4
|
||||
DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table
|
||||
DEBUG: no shard pruning constraints on raw_events_second found
|
||||
DEBUG: shard count after pruning for raw_events_second: 4
|
||||
DEBUG: Router planner cannot handle multi-shard select queries
|
||||
DEBUG: a push down safe left join with recurring left side
|
||||
DEBUG: no shard pruning constraints on raw_events_second found
|
||||
DEBUG: shard count after pruning for raw_events_second: 4
|
||||
DEBUG: assigned task to node localhost:xxxxx
|
||||
DEBUG: assigned task to node localhost:xxxxx
|
||||
DEBUG: assigned task to node localhost:xxxxx
|
||||
DEBUG: assigned task to node localhost:xxxxx
|
||||
DEBUG: performing repartitioned INSERT ... SELECT
|
||||
DEBUG: partitioning SELECT query by column index 0 with name 'user_id'
|
||||
SET client_min_messages to debug2;
|
||||
INSERT INTO agg_events (user_id)
|
||||
SELECT
|
||||
raw_events_first.user_id
|
||||
|
|
@ -3372,17 +3385,21 @@ $$);
|
|||
Task Count: 1
|
||||
(4 rows)
|
||||
|
||||
-- verify that insert select can be pushed down when we have reference table in outside of outer join.
|
||||
SELECT coordinator_plan($$
|
||||
-- verify that insert select cannot be pushed down when we have reference table in outside of outer join in a chained-join.
|
||||
SELECT coordinator_plan($$
|
||||
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT a.id FROM dist_table_5 a LEFT JOIN ref_table_1 b ON (true) RIGHT JOIN ref_table_1 c ON (true);
|
||||
$$);
|
||||
coordinator_plan
|
||||
coordinator_plan
|
||||
---------------------------------------------------------------------
|
||||
Custom Scan (Citus Adaptive)
|
||||
Task Count: 4
|
||||
(2 rows)
|
||||
Custom Scan (Citus INSERT ... SELECT)
|
||||
INSERT/SELECT method: pull to coordinator
|
||||
-> Custom Scan (Citus Adaptive)
|
||||
-> Distributed Subplan XXX_1
|
||||
-> Custom Scan (Citus Adaptive)
|
||||
Task Count: 4
|
||||
(6 rows)
|
||||
|
||||
-- verify that insert select can be pushed down when we have reference table in outside of left join.
|
||||
-- verify that insert select can be pushed down when we have reference table in outside of outer join.
|
||||
SELECT coordinator_plan($$
|
||||
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM ref_table_1 LEFT JOIN dist_table_5 USING(id);
|
||||
$$);
|
||||
|
|
@ -3394,7 +3411,7 @@ $$);
|
|||
Task Count: 4
|
||||
(4 rows)
|
||||
|
||||
-- verify that insert select cannot be pushed down when we have reference table in outside of left join and joined on non-distribution column.
|
||||
-- verify that insert select cannot be pushed down when we have reference table in outside of left join and joined on non-partition column.
|
||||
SELECT coordinator_plan($$
|
||||
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT ref_table_1.id FROM ref_table_1 LEFT JOIN dist_table_5 ON ref_table_1.id = dist_table_5.id2;
|
||||
$$);
|
||||
|
|
|
|||
|
|
@ -152,9 +152,9 @@ SELECT
|
|||
FROM
|
||||
users_table RIGHT JOIN users_reference_table USING (user_id)
|
||||
WHERE
|
||||
users_reference_table.value_2 IN
|
||||
(users_reference_table.value_2, random()*0) IN
|
||||
(SELECT
|
||||
value_2
|
||||
value_2, 0
|
||||
FROM
|
||||
events_table
|
||||
WHERE
|
||||
|
|
|
|||
|
|
@ -1996,8 +1996,7 @@ WITH cte AS (
|
|||
)
|
||||
SELECT (a+5)*2, b FROM cte;
|
||||
DEBUG: CTE cte is going to be inlined via distributed planning
|
||||
DEBUG: cannot push down this subquery
|
||||
DETAIL: Distinct on columns without partition column is currently unsupported
|
||||
DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT DISTINCT reference_table.a, 1 AS b FROM (query_single_shard_table.distributed_table RIGHT JOIN query_single_shard_table.reference_table USING (a))
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT ((a OPERATOR(pg_catalog.+) 5) OPERATOR(pg_catalog.*) 2) AS user_id, b AS value_1 FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte
|
||||
DEBUG: Collecting INSERT ... SELECT results on coordinator
|
||||
|
|
|
|||
|
|
@ -1820,8 +1820,10 @@ BEGIN;
|
|||
) q
|
||||
WHERE dist_5.a = q.a
|
||||
RETURNING *;
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT t1.a, t1.b FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT DISTINCT ON (t2.a) t2.a, t2.b FROM recurring_outer_join.dist_1 t2 WHERE (EXISTS (SELECT t4.a, t4.b FROM recurring_outer_join.dist_1 t4 WHERE (t4.a OPERATOR(pg_catalog.=) t2.a))) ORDER BY t2.a, t2.b) t3 USING (a))
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: DELETE FROM recurring_outer_join.dist_5 USING (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) q WHERE (dist_5.a OPERATOR(pg_catalog.=) q.a) RETURNING dist_5.a, dist_5.b, q.a, q.b
|
||||
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
|
||||
DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT DISTINCT ON (a) a, b FROM recurring_outer_join.dist_1 t2 WHERE (EXISTS (SELECT t4.a, t4.b FROM recurring_outer_join.dist_1 t4 WHERE (t4.a OPERATOR(pg_catalog.=) t2.a))) ORDER BY a, b
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: DELETE FROM recurring_outer_join.dist_5 USING (SELECT t1.a, t1.b FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t3 USING (a))) q WHERE (dist_5.a OPERATOR(pg_catalog.=) q.a) RETURNING dist_5.a, dist_5.b, q.a, q.b
|
||||
a | b | a | b
|
||||
---------------------------------------------------------------------
|
||||
1 | 10 | 1 | 11
|
||||
|
|
@ -1849,8 +1851,10 @@ BEGIN;
|
|||
USING (a)
|
||||
)
|
||||
RETURNING *;
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT t1.a FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT t2.a, t2.b FROM recurring_outer_join.dist_1 t2 WHERE (EXISTS (SELECT t4.a, t4.b FROM recurring_outer_join.dist_1 t4 WHERE (t4.a OPERATOR(pg_catalog.=) t2.a)))) t3 USING (a))
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE recurring_outer_join.dist_5 SET b = 10 WHERE (a OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer))) RETURNING a, b
|
||||
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
|
||||
DEBUG: recursively planning the distributed subquery since it is part of a distributed join node that is outer joined with a recurring rel
|
||||
DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM recurring_outer_join.dist_1 t2 WHERE (EXISTS (SELECT t4.a, t4.b FROM recurring_outer_join.dist_1 t4 WHERE (t4.a OPERATOR(pg_catalog.=) t2.a)))
|
||||
DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE recurring_outer_join.dist_5 SET b = 10 WHERE (a OPERATOR(pg_catalog.=) ANY (SELECT t1.a FROM (recurring_outer_join.ref_1 t1 LEFT JOIN (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) t3 USING (a)))) RETURNING a, b
|
||||
a | b
|
||||
---------------------------------------------------------------------
|
||||
1 | 10
|
||||
|
|
@ -1871,8 +1875,7 @@ BEGIN;
|
|||
FROM ref_1 t1
|
||||
LEFT JOIN dist_1 t2
|
||||
ON (t1.a = t2.a);
|
||||
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
||||
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
|
||||
DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table
|
||||
DEBUG: performing repartitioned INSERT ... SELECT
|
||||
ROLLBACK;
|
||||
-- INSERT .. SELECT: pull to coordinator
|
||||
|
|
@ -1883,8 +1886,7 @@ BEGIN;
|
|||
FROM ref_1 t1
|
||||
LEFT JOIN dist_1 t2
|
||||
ON (t1.b = t2.b);
|
||||
DEBUG: cannot perform distributed INSERT INTO ... SELECT because the partition columns in the source table and subquery do not match
|
||||
DETAIL: The target table's partition column should correspond to a partition column in the subquery.
|
||||
DEBUG: cannot perform a lateral outer join when a distributed subquery references a reference table
|
||||
DEBUG: recursively planning right side of the left join since the outer side is a recurring rel
|
||||
DEBUG: recursively planning distributed relation "dist_1" "t2" since it is part of a distributed join node that is outer joined with a recurring rel
|
||||
DEBUG: Wrapping relation "dist_1" "t2" to a subquery
|
||||
|
|
|
|||
|
|
@ -577,12 +577,14 @@ FROM
|
|||
FROM
|
||||
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id;
|
||||
|
||||
SET client_min_messages to debug3;
|
||||
INSERT INTO agg_events (user_id)
|
||||
SELECT
|
||||
raw_events_second.user_id
|
||||
FROM
|
||||
reference_table LEFT JOIN raw_events_second ON reference_table.user_id = raw_events_second.user_id;
|
||||
|
||||
SET client_min_messages to debug2;
|
||||
INSERT INTO agg_events (user_id)
|
||||
SELECT
|
||||
raw_events_first.user_id
|
||||
|
|
@ -2397,17 +2399,17 @@ SELECT coordinator_plan($$
|
|||
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id, (SELECT id FROM ref_table_1 WHERE id = 1) FROM ref_table_1;
|
||||
$$);
|
||||
|
||||
-- verify that insert select can be pushed down when we have reference table in outside of outer join.
|
||||
-- verify that insert select cannot be pushed down when we have reference table in outside of outer join in a chained-join.
|
||||
SELECT coordinator_plan($$
|
||||
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT a.id FROM dist_table_5 a LEFT JOIN ref_table_1 b ON (true) RIGHT JOIN ref_table_1 c ON (true);
|
||||
$$);
|
||||
|
||||
-- verify that insert select can be pushed down when we have reference table in outside of left join.
|
||||
-- verify that insert select can be pushed down when we have reference table in outside of outer join.
|
||||
SELECT coordinator_plan($$
|
||||
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT id FROM ref_table_1 LEFT JOIN dist_table_5 USING(id);
|
||||
$$);
|
||||
|
||||
-- verify that insert select cannot be pushed down when we have reference table in outside of left join and joined on non-distribution column.
|
||||
-- verify that insert select cannot be pushed down when we have reference table in outside of left join and joined on non-partition column.
|
||||
SELECT coordinator_plan($$
|
||||
EXPLAIN (COSTS FALSE) INSERT INTO dist_table_5 SELECT ref_table_1.id FROM ref_table_1 LEFT JOIN dist_table_5 ON ref_table_1.id = dist_table_5.id2;
|
||||
$$);
|
||||
|
|
|
|||
|
|
@ -132,9 +132,9 @@ SELECT
|
|||
FROM
|
||||
users_table RIGHT JOIN users_reference_table USING (user_id)
|
||||
WHERE
|
||||
users_reference_table.value_2 IN
|
||||
(users_reference_table.value_2, random()*0) IN
|
||||
(SELECT
|
||||
value_2
|
||||
value_2, 0
|
||||
FROM
|
||||
events_table
|
||||
WHERE
|
||||
|
|
|
|||
Loading…
Reference in New Issue