Merge pull request #1860 from citusdata/needs_distributed_planning

Allow queries with local tables in NeedsDistributedPlanning
pull/1859/merge
Marco Slot 2017-12-08 10:11:07 +01:00 committed by GitHub
commit 7544e91c87
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 161 additions and 84 deletions

View File

@ -12,8 +12,8 @@
#include <float.h>
#include <limits.h>
#include "catalog/pg_class.h"
#include "catalog/pg_type.h"
#include "distributed/citus_nodefuncs.h"
#include "distributed/citus_nodes.h"
#include "distributed/insert_select_planner.h"
@ -41,6 +41,7 @@ int MultiTaskQueryLogLevel = MULTI_TASK_QUERY_INFO_OFF; /* multi-task query log
/* local function forward declarations */
static bool NeedsDistributedPlanningWalker(Node *node, void *context);
static PlannedStmt * CreateDistributedPlan(PlannedStmt *localPlan, Query *originalQuery,
Query *query, ParamListInfo boundParams,
PlannerRestrictionContext *
@ -75,20 +76,34 @@ distributed_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
PlannerRestrictionContext *plannerRestrictionContext = NULL;
bool setPartitionedTablesInherited = false;
/*
* standard_planner scribbles on it's input, but for deparsing we need the
* unmodified form. Note that we keep RTE_RELATIONs with their identities
* set, which doesn't break our goals, but, prevents us keeping an extra copy
* of the query tree. Note that we copy the query tree once we're sure it's a
* distributed query.
*/
if (needsDistributedPlanning)
{
setPartitionedTablesInherited = false;
/*
* Inserting into a local table needs to go through the regular postgres
* planner/executor, but the SELECT needs to go through Citus. We currently
* don't have a way of doing both things and therefore error out, but do
* have a handy tip for users.
*/
if (InsertSelectIntoLocalTable(parse))
{
ereport(ERROR, (errmsg("cannot INSERT rows from a distributed query into a "
"local table"),
errhint("Consider using CREATE TEMPORARY TABLE tmp AS "
"SELECT ... and inserting from the temporary "
"table.")));
}
/*
* standard_planner scribbles on it's input, but for deparsing we need the
* unmodified form. Note that we keep RTE_RELATIONs with their identities
* set, which doesn't break our goals, but, prevents us keeping an extra copy
* of the query tree. Note that we copy the query tree once we're sure it's a
* distributed query.
*/
AssignRTEIdentities(parse);
originalQuery = copyObject(parse);
setPartitionedTablesInherited = false;
AdjustPartitioningForDistributedPlanning(parse, setPartitionedTablesInherited);
}
@ -147,6 +162,75 @@ distributed_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
}
/*
* NeedsDistributedPlanning returns true if the Citus extension is loaded and
* the query contains a distributed table.
*
* This function allows queries containing local tables to pass through the
* distributed planner. How to handle local tables is a decision that should
* be made within the planner
*/
bool
NeedsDistributedPlanning(Query *query)
{
CmdType commandType = query->commandType;
if (commandType != CMD_SELECT && commandType != CMD_INSERT &&
commandType != CMD_UPDATE && commandType != CMD_DELETE)
{
return false;
}
if (!CitusHasBeenLoaded())
{
return false;
}
if (!NeedsDistributedPlanningWalker((Node *) query, NULL))
{
return false;
}
return true;
}
/*
* NeedsDistributedPlanningWalker checks if the query contains any distributed
* tables.
*/
static bool
NeedsDistributedPlanningWalker(Node *node, void *context)
{
if (node == NULL)
{
return false;
}
if (IsA(node, Query))
{
Query *query = (Query *) node;
ListCell *rangeTableCell = NULL;
foreach(rangeTableCell, query->rtable)
{
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell);
Oid relationId = rangeTableEntry->relid;
if (IsDistributedTable(relationId))
{
return true;
}
}
return query_tree_walker(query, NeedsDistributedPlanningWalker, NULL, 0);
}
else
{
return expression_tree_walker(node, NeedsDistributedPlanningWalker, NULL);
}
}
/*
* AssignRTEIdentities function modifies query tree by adding RTE identities to the
* RTE_RELATIONs.

View File

@ -3478,71 +3478,6 @@ FindNodesOfType(MultiNode *node, int type)
}
/*
* NeedsDistributedPlanning checks if the passed in query is a query running
* on a distributed table. If it is, we start distributed planning.
*
* For distributed relations it also assigns identifiers to the relevant RTEs.
*/
bool
NeedsDistributedPlanning(Query *queryTree)
{
CmdType commandType = queryTree->commandType;
List *rangeTableList = NIL;
ListCell *rangeTableCell = NULL;
bool hasLocalRelation = false;
bool hasDistributedRelation = false;
if (commandType != CMD_SELECT && commandType != CMD_INSERT &&
commandType != CMD_UPDATE && commandType != CMD_DELETE)
{
return false;
}
/*
* We can handle INSERT INTO distributed_table SELECT ... even if the SELECT
* part references local tables, so skip the remaining checks.
*/
if (InsertSelectIntoDistributedTable(queryTree))
{
return true;
}
/* extract range table entries for simple relations only */
ExtractRangeTableRelationWalker((Node *) queryTree, &rangeTableList);
foreach(rangeTableCell, rangeTableList)
{
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell);
/* check if relation is local or distributed */
Oid relationId = rangeTableEntry->relid;
if (IsDistributedTable(relationId))
{
hasDistributedRelation = true;
}
else
{
hasLocalRelation = true;
}
}
if (hasLocalRelation && hasDistributedRelation)
{
if (InsertSelectIntoLocalTable(queryTree))
{
ereport(ERROR, (errmsg("cannot INSERT rows from a distributed query into a "
"local table")));
}
ereport(ERROR, (errmsg("cannot plan queries which include both local and "
"distributed relations")));
}
return hasDistributedRelation;
}
/*
* ExtractRangeTableRelationWalker gathers all range table relation entries
* in a query.

View File

@ -2485,8 +2485,15 @@ MultiRouterPlannableQuery(Query *query, RelationRestrictionContext *restrictionC
{
/* only hash partitioned tables are supported */
Oid distributedTableId = rte->relid;
char partitionMethod = PartitionMethod(distributedTableId);
char partitionMethod = 0;
if (!IsDistributedTable(distributedTableId))
{
/* local tables cannot be read from workers */
return false;
}
partitionMethod = PartitionMethod(distributedTableId);
if (!(partitionMethod == DISTRIBUTE_BY_HASH || partitionMethod ==
DISTRIBUTE_BY_NONE || partitionMethod == DISTRIBUTE_BY_RANGE))
{

View File

@ -73,6 +73,7 @@ typedef struct RelationShard
extern PlannedStmt * distributed_planner(Query *parse, int cursorOptions,
ParamListInfo boundParams);
extern bool NeedsDistributedPlanning(Query *query);
extern struct DistributedPlan * GetDistributedPlan(CustomScan *node);
extern void multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo,
Index index, RangeTblEntry *rte);

View File

@ -192,7 +192,6 @@ extern PlannerRestrictionContext * FilterPlannerRestrictionForQuery(
Query *query);
extern bool SafeToPushdownWindowFunction(Query *query, StringInfo *errorDetail);
extern bool TargetListOnPartitionColumn(Query *query, List *targetEntryList);
extern bool NeedsDistributedPlanning(Query *queryTree);
extern MultiNode * ParentNode(MultiNode *multiNode);
extern MultiNode * ChildNode(MultiUnaryNode *multiNode);
extern MultiNode * GrandChildNode(MultiUnaryNode *multiNode);

View File

@ -7,6 +7,7 @@ CREATE TABLE test_table_1(id int);
INSERT INTO test_table_1
SELECT user_id FROM users_table;
ERROR: cannot INSERT rows from a distributed query into a local table
HINT: Consider using CREATE TEMPORARY TABLE tmp AS SELECT ... and inserting from the temporary table.
DROP TABLE test_table_1;
------------------------------------
------------------------------------

View File

@ -295,7 +295,7 @@ CREATE TABLE bidders ( name text, id bigint );
DELETE FROM limit_orders USING bidders WHERE limit_orders.id = 246 AND
limit_orders.bidder_id = bidders.id AND
bidders.name = 'Bernie Madoff';
ERROR: cannot plan queries which include both local and distributed relations
ERROR: relation bidders is not distributed
-- commands containing a CTE are unsupported
WITH deleted_orders AS (INSERT INTO limit_orders DEFAULT VALUES RETURNING *)
DELETE FROM limit_orders;
@ -429,7 +429,7 @@ UPDATE limit_orders SET limit_price = 0.00 FROM bidders
WHERE limit_orders.id = 246 AND
limit_orders.bidder_id = bidders.id AND
bidders.name = 'Bernie Madoff';
ERROR: cannot plan queries which include both local and distributed relations
ERROR: relation bidders is not distributed
-- commands containing a CTE are unsupported
WITH deleted_orders AS (INSERT INTO limit_orders DEFAULT VALUES RETURNING *)
UPDATE limit_orders SET symbol = 'GM';

View File

@ -165,7 +165,7 @@ CREATE TABLE bidders ( name text, id bigint );
DELETE FROM limit_orders_mx USING bidders WHERE limit_orders_mx.id = 246 AND
limit_orders_mx.bidder_id = bidders.id AND
bidders.name = 'Bernie Madoff';
ERROR: cannot plan queries which include both local and distributed relations
ERROR: relation bidders is not distributed
-- commands containing a CTE are unsupported
WITH deleted_orders AS (INSERT INTO limit_orders_mx DEFAULT VALUES RETURNING *)
DELETE FROM limit_orders_mx;
@ -235,7 +235,7 @@ UPDATE limit_orders_mx SET limit_price = 0.00 FROM bidders
WHERE limit_orders_mx.id = 246 AND
limit_orders_mx.bidder_id = bidders.id AND
bidders.name = 'Bernie Madoff';
ERROR: cannot plan queries which include both local and distributed relations
ERROR: relation bidders is not distributed
-- commands containing a CTE are unsupported
WITH deleted_orders AS (INSERT INTO limit_orders_mx DEFAULT VALUES RETURNING *)
UPDATE limit_orders_mx SET symbol = 'GM';

View File

@ -175,14 +175,14 @@ HINT: Consider using an equality filter on the distributed table's partition co
-- queries using CTEs are unsupported
WITH long_names AS ( SELECT id FROM authors WHERE char_length(name) > 15 )
SELECT title FROM articles;
ERROR: cannot plan queries which include both local and distributed relations
ERROR: relation authors is not distributed
-- queries which involve functions in FROM clause are unsupported.
SELECT * FROM articles, position('om' in 'Thomas');
ERROR: could not run distributed query with complex table expressions
HINT: Consider using an equality filter on the distributed table's partition column.
-- subqueries are not supported in WHERE clause in Citus
SELECT * FROM articles WHERE author_id IN (SELECT id FROM authors WHERE name LIKE '%a');
ERROR: cannot plan queries which include both local and distributed relations
ERROR: relation authors is not distributed
-- subqueries are supported in FROM clause
SELECT articles.id,test.word_count
FROM articles, (SELECT id, word_count FROM articles) AS test WHERE test.id = articles.id
@ -248,10 +248,10 @@ ERROR: could not run distributed query with subquery outside the FROM and WHERE
HINT: Consider using an equality filter on the distributed table's partition column.
-- joins are not supported between local and distributed tables
SELECT title, authors.name FROM authors, articles WHERE authors.id = articles.author_id;
ERROR: cannot plan queries which include both local and distributed relations
ERROR: relation authors is not distributed
-- inner joins are not supported (I think)
SELECT * FROM (articles INNER JOIN authors ON articles.id = authors.id);
ERROR: cannot plan queries which include both local and distributed relations
ERROR: relation authors is not distributed
-- test use of EXECUTE statements within plpgsql
DO $sharded_execute$
BEGIN

View File

@ -183,6 +183,7 @@ SELECT count(*) FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey =
(1 row)
SET citus.task_executor_type to DEFAULT;
-- materialized views work
-- insert into... select works with views
CREATE TABLE temp_lineitem(LIKE lineitem_hash_part);
SELECT create_distributed_table('temp_lineitem', 'l_orderkey', 'hash', 'lineitem_hash_part');
@ -206,6 +207,35 @@ SELECT count(*) FROM temp_lineitem;
1706
(1 row)
-- can create and query materialized views
CREATE MATERIALIZED VIEW mode_counts
AS SELECT l_shipmode, count(*) FROM temp_lineitem GROUP BY l_shipmode;
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
l_shipmode | count
------------+-------
AIR | 1706
(1 row)
-- materialized views are local, cannot join with distributed tables
SELECT count(*) FROM mode_counts JOIN temp_lineitem USING (l_shipmode);
ERROR: relation mode_counts is not distributed
-- new data is not immediately reflected in the view
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems;
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
l_shipmode | count
------------+-------
AIR | 1706
(1 row)
-- refresh updates the materialised view with new data
REFRESH MATERIALIZED VIEW mode_counts;
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
l_shipmode | count
------------+-------
AIR | 3412
(1 row)
DROP MATERIALIZED VIEW mode_counts;
SET citus.task_executor_type to "task-tracker";
-- single view repartition subqueries are not supported
SELECT l_suppkey, count(*) FROM

View File

@ -86,6 +86,7 @@ SET citus.task_executor_type to "task-tracker";
SELECT count(*) FROM priority_orders JOIN air_shipped_lineitems ON (o_custkey = l_suppkey);
SET citus.task_executor_type to DEFAULT;
-- materialized views work
-- insert into... select works with views
CREATE TABLE temp_lineitem(LIKE lineitem_hash_part);
SELECT create_distributed_table('temp_lineitem', 'l_orderkey', 'hash', 'lineitem_hash_part');
@ -95,6 +96,25 @@ SELECT count(*) FROM temp_lineitem;
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems WHERE l_shipmode = 'MAIL';
SELECT count(*) FROM temp_lineitem;
-- can create and query materialized views
CREATE MATERIALIZED VIEW mode_counts
AS SELECT l_shipmode, count(*) FROM temp_lineitem GROUP BY l_shipmode;
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
-- materialized views are local, cannot join with distributed tables
SELECT count(*) FROM mode_counts JOIN temp_lineitem USING (l_shipmode);
-- new data is not immediately reflected in the view
INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems;
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
-- refresh updates the materialised view with new data
REFRESH MATERIALIZED VIEW mode_counts;
SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10;
DROP MATERIALIZED VIEW mode_counts;
SET citus.task_executor_type to "task-tracker";
-- single view repartition subqueries are not supported