Include all relevant relations in the ExtractRangeTableRelationWalker (#3135)

We've changed the logic for pulling RTE_RELATIONs in #3109 and
non-colocated subquery joins and partitioned tables.
@onurctirtir found this steps where I traced back and found the issues.

While looking into it in more detail, we decided to expand the list in a
way that the callers get all the relevant RTE_RELATIONs RELKIND_RELATION,
RELKIND_PARTITIONED_TABLE, RELKIND_FOREIGN_TABLE and RELKIND_MATVIEW.
These are all relation kinds that Citus planner is aware of.
pull/3151/head^2
Önder Kalacı 2019-11-01 16:06:58 +01:00 committed by GitHub
parent d3f68bf44f
commit ffd89e4e01
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 127 additions and 18 deletions

View File

@ -995,8 +995,8 @@ HasUnsupportedJoinWalker(Node *node, void *context)
static bool
ErrorHintRequired(const char *errorHint, Query *queryTree)
{
List *rangeTableList = NIL;
ListCell *rangeTableCell = NULL;
List *distributedRelationIdList = DistributedRelationIdList(queryTree);
ListCell *relationIdCell = NULL;
List *colocationIdList = NIL;
if (errorHint == NULL)
@ -1004,11 +1004,9 @@ ErrorHintRequired(const char *errorHint, Query *queryTree)
return false;
}
ExtractRangeTableRelationWalker((Node *) queryTree, &rangeTableList);
foreach(rangeTableCell, rangeTableList)
foreach(relationIdCell, distributedRelationIdList)
{
RangeTblEntry *rte = (RangeTblEntry *) lfirst(rangeTableCell);
Oid relationId = rte->relid;
Oid relationId = lfirst_oid(relationIdCell);
char partitionMethod = PartitionMethod(relationId);
if (partitionMethod == DISTRIBUTE_BY_NONE)
{

View File

@ -2216,7 +2216,7 @@ static void
ErrorIfUnsupportedShardDistribution(Query *query)
{
Oid firstTableRelationId = InvalidOid;
List *relationIdList = RelationIdList(query);
List *relationIdList = DistributedRelationIdList(query);
List *nonReferenceRelations = NIL;
ListCell *relationIdCell = NULL;
uint32 relationIndex = 0;

View File

@ -1686,7 +1686,7 @@ RelationRestrictionPartitionKeyIndex(RelationRestriction *relationRestriction)
* RelationIdList returns list of unique relation ids in query tree.
*/
List *
RelationIdList(Query *query)
DistributedRelationIdList(Query *query)
{
List *rangeTableList = NIL;
List *tableEntryList = NIL;
@ -1701,6 +1701,11 @@ RelationIdList(Query *query)
TableEntry *tableEntry = (TableEntry *) lfirst(tableEntryCell);
Oid relationId = tableEntry->relationId;
if (!IsDistributedTable(relationId))
{
continue;
}
relationIdList = list_append_unique_oid(relationIdList, relationId);
}

View File

@ -17,6 +17,10 @@
#include "distributed/version_compat.h"
#include "nodes/nodeFuncs.h"
static bool CitusQueryableRangeTableRelation(RangeTblEntry *rangeTableEntry);
/*
* ExtractRangeTableList walks over a tree to gather entries.
* Execution is parameterized by passing walkerMode flag via ExtractRangeTableWalkerContext
@ -40,12 +44,9 @@ ExtractRangeTableList(Node *node, ExtractRangeTableWalkerContext *context)
{
RangeTblEntry *rangeTable = (RangeTblEntry *) node;
/* make sure that we are extracting only relation entries if walkerMode is set to EXTRACT_RELATION_ENTRIES*/
if (walkerMode == EXTRACT_ALL_ENTRIES || (walkerMode ==
EXTRACT_RELATION_ENTRIES &&
rangeTable->rtekind == RTE_RELATION &&
rangeTable->relkind ==
RELKIND_RELATION))
if (walkerMode == EXTRACT_ALL_ENTRIES ||
(walkerMode == EXTRACT_RELATION_ENTRIES &&
CitusQueryableRangeTableRelation(rangeTable)))
{
(*rangeTableRelationList) = lappend(*rangeTableRelationList, rangeTable);
}
@ -81,9 +82,43 @@ ExtractRangeTableList(Node *node, ExtractRangeTableWalkerContext *context)
}
/*
* CitusQueryableRangeTableRelation returns true if the input range table
* entry is a relation and it can be used in a distributed query, including
* local tables and materialized views as well.
*/
static bool
CitusQueryableRangeTableRelation(RangeTblEntry *rangeTableEntry)
{
char relationKind = '\0';
if (rangeTableEntry->rtekind != RTE_RELATION)
{
/* we're only interested in relations */
return false;
}
relationKind = rangeTableEntry->relkind;
if (relationKind == RELKIND_RELATION || relationKind == RELKIND_PARTITIONED_TABLE ||
relationKind == RELKIND_FOREIGN_TABLE || relationKind == RELKIND_MATVIEW)
{
/*
* RELKIND_VIEW are automatically replaced with a subquery in
* the query tree, so we ignore them here.
*
* RELKIND_MATVIEW is equivalent of a local table in postgres.
*/
return true;
}
return false;
}
/*
* ExtractRangeTableRelationWalker gathers all range table relation entries
* in a query.
* in a query. The caller is responsible for checking whether the returned
* entries are distributed or not.
*/
bool
ExtractRangeTableRelationWalker(Node *node, List **rangeTableRelationList)

View File

@ -17,7 +17,7 @@
/* Enum to define execution flow of ExtractRangeTableList */
typedef enum ExtractRangeTableMode
{
EXTRACT_RELATION_ENTRIES,
EXTRACT_RELATION_ENTRIES, /* inclduding local, foreign and partitioned tables */
EXTRACT_ALL_ENTRIES
} ExtractRangeTableMode;

View File

@ -31,7 +31,7 @@ extern List * GenerateAllAttributeEquivalences(PlannerRestrictionContext *
plannerRestrictionContext);
extern uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext);
extern List * RelationIdList(Query *query);
extern List * DistributedRelationIdList(Query *query);
extern PlannerRestrictionContext * FilterPlannerRestrictionForQuery(
PlannerRestrictionContext *plannerRestrictionContext,
Query *query);

View File

@ -319,6 +319,19 @@ INSERT INTO small VALUES(14, 14);
-- delete statement with CTE
WITH all_small_view_ids AS (SELECT id FROM small_view)
DELETE FROM large_partitioned WHERE id in (SELECT * FROM all_small_view_ids);
-- make sure that materialized view in a CTE/subquery can be joined with a distributed table
WITH cte AS (SELECT *, random() FROM small_view) SELECT count(*) FROM cte JOIN small USING(id);
count
-------
4
(1 row)
SELECT count(*) FROM (SELECT *, random() FROM small_view) as subquery JOIN small USING(id);
count
-------
4
(1 row)
DROP TABLE large_partitioned;
DROP TABLE small CASCADE;
NOTICE: drop cascades to materialized view small_view

View File

@ -1054,8 +1054,44 @@ DEBUG: Plan is router executable
DEBUG: generating subplan 109_6 for subquery SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('109_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint) INTERSECT SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('109_5'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint)
DEBUG: Router planner cannot handle multi-shard select queries
ERROR: cannot pushdown the subquery
-- make sure that non-colocated subquery joins work fine in
-- modifications
CREATE TABLE table1 (id int, tenant_id int);
CREATE VIEW table1_view AS SELECT * from table1 where id < 100;
CREATE TABLE table2 (id int, tenant_id int) partition by range(tenant_id);
CREATE TABLE table2_p1 PARTITION OF table2 FOR VALUES FROM (1) TO (10);
-- modifications on the partitons are only allowed with rep=1
SET citus.shard_replication_factor TO 1;
SELECT create_distributed_table('table2','tenant_id');
create_distributed_table
--------------------------
(1 row)
SELECT create_distributed_table('table1','tenant_id');
create_distributed_table
--------------------------
(1 row)
-- all of the above queries are non-colocated subquery joins
-- because the views are replaced with subqueries
UPDATE table2 SET id=20 FROM table1_view WHERE table1_view.id=table2.id;
DEBUG: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan 117_1 for subquery SELECT table1.id, table1.tenant_id FROM non_colocated_subquery.table1 WHERE (table1.id OPERATOR(pg_catalog.<) 100)
DEBUG: Plan 117 query after replacing subqueries and CTEs: UPDATE non_colocated_subquery.table2 SET id = 20 FROM (SELECT intermediate_result.id, intermediate_result.tenant_id FROM read_intermediate_result('117_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, tenant_id integer)) table1_view WHERE (table1_view.id OPERATOR(pg_catalog.=) table2.id)
DEBUG: Creating router plan
DEBUG: Plan is router executable
UPDATE table2_p1 SET id=20 FROM table1_view WHERE table1_view.id=table2_p1.id;
DEBUG: complex joins are only supported when all distributed tables are joined on their distribution columns with equal operator
DEBUG: Router planner cannot handle multi-shard select queries
DEBUG: generating subplan 119_1 for subquery SELECT table1.id, table1.tenant_id FROM non_colocated_subquery.table1 WHERE (table1.id OPERATOR(pg_catalog.<) 100)
DEBUG: Plan 119 query after replacing subqueries and CTEs: UPDATE non_colocated_subquery.table2_p1 SET id = 20 FROM (SELECT intermediate_result.id, intermediate_result.tenant_id FROM read_intermediate_result('119_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, tenant_id integer)) table1_view WHERE (table1_view.id OPERATOR(pg_catalog.=) table2_p1.id)
DEBUG: Creating router plan
DEBUG: Plan is router executable
RESET client_min_messages;
DROP FUNCTION explain_json_2(text);
SET search_path TO 'public';
DROP SCHEMA non_colocated_subquery CASCADE;
NOTICE: drop cascades to 2 other objects
NOTICE: drop cascades to 5 other objects

View File

@ -246,5 +246,9 @@ INSERT INTO small VALUES(14, 14);
WITH all_small_view_ids AS (SELECT id FROM small_view)
DELETE FROM large_partitioned WHERE id in (SELECT * FROM all_small_view_ids);
-- make sure that materialized view in a CTE/subquery can be joined with a distributed table
WITH cte AS (SELECT *, random() FROM small_view) SELECT count(*) FROM cte JOIN small USING(id);
SELECT count(*) FROM (SELECT *, random() FROM small_view) as subquery JOIN small USING(id);
DROP TABLE large_partitioned;
DROP TABLE small CASCADE;

View File

@ -782,6 +782,24 @@ SELECT count(*) FROM events_table WHERE user_id NOT IN
);
-- make sure that non-colocated subquery joins work fine in
-- modifications
CREATE TABLE table1 (id int, tenant_id int);
CREATE VIEW table1_view AS SELECT * from table1 where id < 100;
CREATE TABLE table2 (id int, tenant_id int) partition by range(tenant_id);
CREATE TABLE table2_p1 PARTITION OF table2 FOR VALUES FROM (1) TO (10);
-- modifications on the partitons are only allowed with rep=1
SET citus.shard_replication_factor TO 1;
SELECT create_distributed_table('table2','tenant_id');
SELECT create_distributed_table('table1','tenant_id');
-- all of the above queries are non-colocated subquery joins
-- because the views are replaced with subqueries
UPDATE table2 SET id=20 FROM table1_view WHERE table1_view.id=table2.id;
UPDATE table2_p1 SET id=20 FROM table1_view WHERE table1_view.id=table2_p1.id;
RESET client_min_messages;
DROP FUNCTION explain_json_2(text);