Prototype support for IN (const, list) and = ANY(const, b, c) pruning.

pull/1378/head
Andres Freund 2017-05-02 11:08:02 -07:00
parent ef6d3587b6
commit bc150fd524
5 changed files with 120 additions and 58 deletions

View File

@ -61,6 +61,7 @@
#include "distributed/pg_dist_partition.h" #include "distributed/pg_dist_partition.h"
#include "distributed/worker_protocol.h" #include "distributed/worker_protocol.h"
#include "nodes/nodeFuncs.h" #include "nodes/nodeFuncs.h"
#include "nodes/makefuncs.h"
#include "optimizer/clauses.h" #include "optimizer/clauses.h"
#include "utils/catcache.h" #include "utils/catcache.h"
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
@ -547,24 +548,116 @@ PrunableExpressionsWalker(Node *node, ClauseWalkerContext *context)
Node *strippedLeftOpExpression = strip_implicit_coercions(leftOpExpression); Node *strippedLeftOpExpression = strip_implicit_coercions(leftOpExpression);
bool usingEqualityOperator = OperatorImplementsEquality( bool usingEqualityOperator = OperatorImplementsEquality(
arrayOperatorExpression->opno); arrayOperatorExpression->opno);
Expr *arrayArgument = (Expr *) lsecond(arrayOperatorExpression->args);
/* /*
* Citus cannot prune hash-distributed shards with ANY/ALL. We show a NOTICE * Found partcol = ANY(const, value, s); or parcol IN (const,b,c);
* if the expression is ANY/ALL performed on the partition column with equality.
*
* TODO: this'd now be easy to implement, similar to the OR_EXPR case
* above, except that one would push an appropriately constructed
* OpExpr(LHS = $array_element) as continueAt.
*/ */
if (usingEqualityOperator && strippedLeftOpExpression != NULL && if (usingEqualityOperator && strippedLeftOpExpression != NULL &&
equal(strippedLeftOpExpression, context->partitionColumn)) equal(strippedLeftOpExpression, context->partitionColumn) &&
IsA(arrayArgument, Const))
{ {
ereport(NOTICE, (errmsg("cannot use shard pruning with " ArrayType *array;
"ANY/ALL (array expression)"), int16 typlen;
errhint("Consider rewriting the expression with " bool typbyval;
"OR/AND clauses."))); char typalign;
Oid element_type;
char *s;
bits8 *bitmap;
int bitmask;
int i;
int nitems;
/*
* FIXME: use array_iter_setup() / array_iter_next(), instead of
* open-coding array iteration.
*/
array = DatumGetArrayTypeP(((Const *) arrayArgument)->constvalue);
element_type = ARR_ELEMTYPE(array);
get_typlenbyvalalign(element_type,
&typlen,
&typbyval,
&typalign);
s = (char *) ARR_DATA_PTR(array);
bitmap = ARR_NULLBITMAP(array);
bitmask = 1;
nitems = ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array));
/*
* Treat ScalarArrayOp as a logn list of ORs and treat it the same
* way as BOOL_OR above.
*/
for (i = 0; i < nitems; i++)
{
OpExpr *op;
PendingPruningInstance *instance =
palloc0(sizeof(PendingPruningInstance));
Datum arg;
bool argnull;
Const *c;
/* Get array element, checking for NULL */
if (bitmap && (*bitmap & bitmask) == 0)
{
arg = (Datum) 0;
argnull = true;
}
else
{
arg = fetch_att(s, typbyval, typlen);
argnull = false;
s = att_addlength_pointer(s, typlen, s);
s = (char *) att_align_nominal(s, typalign);
} }
/* advance bitmap pointer if any */
if (bitmap)
{
bitmask <<= 1;
if (bitmask == 0x100)
{
bitmap++;
bitmask = 1;
}
}
/* build partcol = arrayelem operator */
op = makeNode(OpExpr);
op->opno = arrayOperatorExpression->opno;
op->opfuncid = arrayOperatorExpression->opfuncid;
op->inputcollid = arrayOperatorExpression->inputcollid;
op->opresulttype = BOOLOID; /* FIXME: */
op->opcollid = DEFAULT_COLLATION_OID;
op->location = -1;
c = makeConst(element_type, -1,
DEFAULT_COLLATION_OID,
typlen,
arg,
argnull,
typbyval);
op->args = list_make2(strippedLeftOpExpression, c);
/* and continue later */
instance->instance = context->currentPruningInstance;
instance->continueAt = (Node *) op;
/*
* Signal that this instance is not to be used for pruning on
* its own. Once the pending instance is processed, it'll be
* used.
*/
instance->instance->isPartial = true;
context->pendingInstances = lappend(context->pendingInstances, instance);
}
}
else
{
/* /*
* Mark expression as added, so we'll fail pruning if there's no ANDed * Mark expression as added, so we'll fail pruning if there's no ANDed
* restrictions that we can deal with. * restrictions that we can deal with.
@ -575,6 +668,7 @@ PrunableExpressionsWalker(Node *node, ClauseWalkerContext *context)
prune); prune);
prune->addedToPruningInstances = true; prune->addedToPruningInstances = true;
} }
}
return false; return false;
} }

View File

@ -192,10 +192,6 @@ DEBUG: Plan is router executable
-- a notice message when used with the partition column -- a notice message when used with the partition column
SELECT count(*) FROM orders_hash_partitioned SELECT count(*) FROM orders_hash_partitioned
WHERE o_orderkey = ANY ('{1,2,3}'); WHERE o_orderkey = ANY ('{1,2,3}');
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
count count
------- -------
0 0

View File

@ -764,18 +764,10 @@ DEBUG: Plan is router executable
FROM FROM
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_first.user_id IN (19, 20, 21); WHERE raw_events_first.user_id IN (19, 20, 21);
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300000 raw_events_first LEFT JOIN public.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_first.user_id = ANY (ARRAY[19, 20, 21])) AND ((worker_hash(raw_events_first.user_id) >= '-2147483648'::integer) AND (worker_hash(raw_events_first.user_id) <= '-1073741825'::integer))) DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300000 raw_events_first LEFT JOIN public.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_first.user_id = ANY (ARRAY[19, 20, 21])) AND ((worker_hash(raw_events_first.user_id) >= '-2147483648'::integer) AND (worker_hash(raw_events_first.user_id) <= '-1073741825'::integer)))
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300001 raw_events_first LEFT JOIN public.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_first.user_id = ANY (ARRAY[19, 20, 21])) AND ((worker_hash(raw_events_first.user_id) >= '-1073741824'::integer) AND (worker_hash(raw_events_first.user_id) <= '-1'::integer))) DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300001 raw_events_first LEFT JOIN public.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_first.user_id = ANY (ARRAY[19, 20, 21])) AND ((worker_hash(raw_events_first.user_id) >= '-1073741824'::integer) AND (worker_hash(raw_events_first.user_id) <= '-1'::integer)))
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300002 raw_events_first LEFT JOIN public.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_first.user_id = ANY (ARRAY[19, 20, 21])) AND ((worker_hash(raw_events_first.user_id) >= 0) AND (worker_hash(raw_events_first.user_id) <= 1073741823))) DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300002 raw_events_first LEFT JOIN public.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_first.user_id = ANY (ARRAY[19, 20, 21])) AND ((worker_hash(raw_events_first.user_id) >= 0) AND (worker_hash(raw_events_first.user_id) <= 1073741823)))
NOTICE: cannot use shard pruning with ANY/ALL (array expression) DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM ((SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_first(user_id, "time", value_1, value_2, value_3, value_4) LEFT JOIN public.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_first.user_id = ANY (ARRAY[19, 20, 21])) AND ((worker_hash(raw_events_first.user_id) >= 1073741824) AND (worker_hash(raw_events_first.user_id) <= 2147483647)))
HINT: Consider rewriting the expression with OR/AND clauses.
DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300003 raw_events_first LEFT JOIN public.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_first.user_id = ANY (ARRAY[19, 20, 21])) AND ((worker_hash(raw_events_first.user_id) >= 1073741824) AND (worker_hash(raw_events_first.user_id) <= 2147483647)))
DEBUG: Plan is router executable DEBUG: Plan is router executable
INSERT INTO agg_events (user_id) INSERT INTO agg_events (user_id)
@ -784,18 +776,10 @@ DEBUG: Plan is router executable
FROM FROM
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.user_id
WHERE raw_events_second.user_id IN (19, 20, 21); WHERE raw_events_second.user_id IN (19, 20, 21);
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300000 raw_events_first JOIN public.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_second.user_id = ANY (ARRAY[19, 20, 21])) AND ((worker_hash(raw_events_first.user_id) >= '-2147483648'::integer) AND (worker_hash(raw_events_first.user_id) <= '-1073741825'::integer))) DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300000 raw_events_first JOIN public.raw_events_second_13300004 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_second.user_id = ANY (ARRAY[19, 20, 21])) AND ((worker_hash(raw_events_first.user_id) >= '-2147483648'::integer) AND (worker_hash(raw_events_first.user_id) <= '-1073741825'::integer)))
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300001 raw_events_first JOIN public.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_second.user_id = ANY (ARRAY[19, 20, 21])) AND ((worker_hash(raw_events_first.user_id) >= '-1073741824'::integer) AND (worker_hash(raw_events_first.user_id) <= '-1'::integer))) DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300001 raw_events_first JOIN public.raw_events_second_13300005 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_second.user_id = ANY (ARRAY[19, 20, 21])) AND ((worker_hash(raw_events_first.user_id) >= '-1073741824'::integer) AND (worker_hash(raw_events_first.user_id) <= '-1'::integer)))
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300002 raw_events_first JOIN public.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_second.user_id = ANY (ARRAY[19, 20, 21])) AND ((worker_hash(raw_events_first.user_id) >= 0) AND (worker_hash(raw_events_first.user_id) <= 1073741823))) DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300002 raw_events_first JOIN public.raw_events_second_13300006 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_second.user_id = ANY (ARRAY[19, 20, 21])) AND ((worker_hash(raw_events_first.user_id) >= 0) AND (worker_hash(raw_events_first.user_id) <= 1073741823)))
NOTICE: cannot use shard pruning with ANY/ALL (array expression) DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300003 raw_events_first JOIN (SELECT NULL::integer AS user_id, NULL::timestamp without time zone AS "time", NULL::integer AS value_1, NULL::integer AS value_2, NULL::double precision AS value_3, NULL::bigint AS value_4 WHERE false) raw_events_second(user_id, "time", value_1, value_2, value_3, value_4) ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_second.user_id = ANY (ARRAY[19, 20, 21])) AND ((worker_hash(raw_events_first.user_id) >= 1073741824) AND (worker_hash(raw_events_first.user_id) <= 2147483647)))
HINT: Consider rewriting the expression with OR/AND clauses.
DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM (public.raw_events_first_13300003 raw_events_first JOIN public.raw_events_second_13300007 raw_events_second ON ((raw_events_first.user_id = raw_events_second.user_id))) WHERE ((raw_events_second.user_id = ANY (ARRAY[19, 20, 21])) AND ((worker_hash(raw_events_first.user_id) >= 1073741824) AND (worker_hash(raw_events_first.user_id) <= 2147483647)))
DEBUG: Plan is router executable DEBUG: Plan is router executable
-- the following is a very tricky query for Citus -- the following is a very tricky query for Citus

View File

@ -198,10 +198,8 @@ SELECT * FROM articles_hash_mx WHERE author_id <= 1;
(5 rows) (5 rows)
SELECT * FROM articles_hash_mx WHERE author_id IN (1, 3); SELECT * FROM articles_hash_mx WHERE author_id IN (1, 3);
NOTICE: cannot use shard pruning with ANY/ALL (array expression) DEBUG: Creating router plan
HINT: Consider rewriting the expression with OR/AND clauses. DEBUG: Plan is router executable
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
id | author_id | title | word_count id | author_id | title | word_count
----+-----------+--------------+------------ ----+-----------+--------------+------------
1 | 1 | arsenous | 9572 1 | 1 | arsenous | 9572
@ -1381,10 +1379,6 @@ DROP MATERIALIZED VIEW mv_articles_hash_mx;
SET client_min_messages to 'DEBUG2'; SET client_min_messages to 'DEBUG2';
CREATE MATERIALIZED VIEW mv_articles_hash_mx_error AS CREATE MATERIALIZED VIEW mv_articles_hash_mx_error AS
SELECT * FROM articles_hash_mx WHERE author_id in (1,2); SELECT * FROM articles_hash_mx WHERE author_id in (1,2);
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
-- router planner/executor is disabled for task-tracker executor -- router planner/executor is disabled for task-tracker executor
-- following query is router plannable, but router planner is disabled -- following query is router plannable, but router planner is disabled

View File

@ -257,10 +257,8 @@ SELECT * FROM articles_hash WHERE author_id <= 1;
(5 rows) (5 rows)
SELECT * FROM articles_hash WHERE author_id IN (1, 3); SELECT * FROM articles_hash WHERE author_id IN (1, 3);
NOTICE: cannot use shard pruning with ANY/ALL (array expression) DEBUG: Creating router plan
HINT: Consider rewriting the expression with OR/AND clauses. DEBUG: Plan is router executable
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
id | author_id | title | word_count id | author_id | title | word_count
----+-----------+--------------+------------ ----+-----------+--------------+------------
1 | 1 | arsenous | 9572 1 | 1 | arsenous | 9572
@ -2076,10 +2074,6 @@ SELECT * FROM mv_articles_hash_empty;
CREATE MATERIALIZED VIEW mv_articles_hash_data AS CREATE MATERIALIZED VIEW mv_articles_hash_data AS
SELECT * FROM articles_hash WHERE author_id in (1,2); SELECT * FROM articles_hash WHERE author_id in (1,2);
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
NOTICE: cannot use shard pruning with ANY/ALL (array expression)
HINT: Consider rewriting the expression with OR/AND clauses.
SELECT * FROM mv_articles_hash_data; SELECT * FROM mv_articles_hash_data;
id | author_id | title | word_count id | author_id | title | word_count
----+-----------+--------------+------------ ----+-----------+--------------+------------