From 41f1466cf8d51ea85ca567e44274e02583d47f83 Mon Sep 17 00:00:00 2001 From: Murat Tuncer Date: Fri, 2 Feb 2018 12:41:47 +0300 Subject: [PATCH] Fix count distinct using field select on top level query We were allowing count distict queries even if they were not directly on columns if the query is grouped on distribution column. When performing these checks we were skipping subqueries because they also perform this check in a more concise manner. We relied on oid SUBQUERY_RELATION_ID (10000) to decide if a given RTE relation id denotes a subquery, however, we also use SUBQUERY_PUSHDOWN_RELATION_ID (10001) for some subqueries. We skip both type of subqueries with this change. --- .../planner/multi_logical_optimizer.c | 5 +- .../input/multi_complex_count_distinct.source | 60 ++++++++++++ .../multi_complex_count_distinct.source | 96 +++++++++++++++++++ 3 files changed, 160 insertions(+), 1 deletion(-) diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index 958c0b262..1d224d7cb 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -2888,6 +2888,8 @@ ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression, * AggregateDistinctColumn checks if the given aggregate expression's distinct * clause is on a single column. If it is, the function finds and returns that * column. Otherwise, the function returns null. + * The function expects to find a single column here, no FieldSelect or other + * expressions are accepted as a column. */ static Var * AggregateDistinctColumn(Aggref *aggregateExpression) @@ -2941,7 +2943,8 @@ TablePartitioningSupportsDistinct(List *tableNodeList, MultiExtendedOp *opNode, char partitionMethod = 0; List *shardList = NIL; - if (relationId == SUBQUERY_RELATION_ID) + if (relationId == SUBQUERY_RELATION_ID || + relationId == SUBQUERY_PUSHDOWN_RELATION_ID) { return true; } diff --git a/src/test/regress/input/multi_complex_count_distinct.source b/src/test/regress/input/multi_complex_count_distinct.source index e98dc7191..955f7993f 100644 --- a/src/test/regress/input/multi_complex_count_distinct.source +++ b/src/test/regress/input/multi_complex_count_distinct.source @@ -433,6 +433,66 @@ SELECT * 2 DESC,1 DESC LIMIT 10; +-- distinct on non-var (type cast/field select) columns are also +-- supported if grouped on distribution column +-- random is added to prevent flattening by postgresql +SELECT + l_orderkey, count(a::int), count(distinct a::int) + FROM ( + SELECT l_orderkey, l_orderkey * 1.5 a, random() b + FROM lineitem_hash) sub + GROUP BY 1 + ORDER BY 1 DESC + LIMIT 5; + +SELECT user_id, + count(sub.a::int), + count(DISTINCT sub.a::int), + count(DISTINCT (sub).a) +FROM + (SELECT user_id, + unnest(ARRAY[user_id * 1.5])a, + random() b + FROM users_table + ) sub +GROUP BY 1 +ORDER BY 1 DESC +LIMIT 5; + +CREATE TYPE test_item AS +( + id INTEGER, + duration INTEGER +); +SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS +( + id INTEGER, + duration INTEGER +)$$) ORDER BY nodeport; + +CREATE TABLE test_count_distinct_array (key int, value int , value_arr test_item[]); +SELECT create_distributed_table('test_count_distinct_array', 'key'); + +INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i; + +SELECT + key, + count(DISTINCT value), + count(DISTINCT (item)."id"), + count(DISTINCT (item)."id" * 3) +FROM + ( + SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array + ) as sub +GROUP BY 1 +ORDER BY 1 DESC +LIMIT 5; + +DROP TABLE test_count_distinct_array; + +DROP TYPE test_item; +SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport; + -- other distinct aggregate are not supported SELECT * FROM ( diff --git a/src/test/regress/output/multi_complex_count_distinct.source b/src/test/regress/output/multi_complex_count_distinct.source index b9925842b..77d3143fd 100644 --- a/src/test/regress/output/multi_complex_count_distinct.source +++ b/src/test/regress/output/multi_complex_count_distinct.source @@ -899,6 +899,102 @@ SELECT * REG AIR | 1607 (7 rows) +-- distinct on non-var (type cast/field select) columns are also +-- supported if grouped on distribution column +-- random is added to prevent flattening by postgresql +SELECT + l_orderkey, count(a::int), count(distinct a::int) + FROM ( + SELECT l_orderkey, l_orderkey * 1.5 a, random() b + FROM lineitem_hash) sub + GROUP BY 1 + ORDER BY 1 DESC + LIMIT 5; + l_orderkey | count | count +------------+-------+------- + 14947 | 2 | 1 + 14946 | 2 | 1 + 14945 | 6 | 1 + 14944 | 2 | 1 + 14919 | 1 | 1 +(5 rows) + +SELECT user_id, + count(sub.a::int), + count(DISTINCT sub.a::int), + count(DISTINCT (sub).a) +FROM + (SELECT user_id, + unnest(ARRAY[user_id * 1.5])a, + random() b + FROM users_table + ) sub +GROUP BY 1 +ORDER BY 1 DESC +LIMIT 5; + user_id | count | count | count +---------+-------+-------+------- + 6 | 10 | 1 | 1 + 5 | 26 | 1 | 1 + 4 | 23 | 1 | 1 + 3 | 17 | 1 | 1 + 2 | 18 | 1 | 1 +(5 rows) + +CREATE TYPE test_item AS +( + id INTEGER, + duration INTEGER +); +SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS +( + id INTEGER, + duration INTEGER +)$$) ORDER BY nodeport; + nodename | nodeport | success | result +-----------+----------+---------+------------- + localhost | 57637 | t | CREATE TYPE + localhost | 57638 | t | CREATE TYPE +(2 rows) + +CREATE TABLE test_count_distinct_array (key int, value int , value_arr test_item[]); +SELECT create_distributed_table('test_count_distinct_array', 'key'); + create_distributed_table +-------------------------- + +(1 row) + +INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i; +SELECT + key, + count(DISTINCT value), + count(DISTINCT (item)."id"), + count(DISTINCT (item)."id" * 3) +FROM + ( + SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array + ) as sub +GROUP BY 1 +ORDER BY 1 DESC +LIMIT 5; + key | count | count | count +------+-------+-------+------- + 1000 | 1 | 1 | 1 + 999 | 1 | 1 | 1 + 998 | 1 | 1 | 1 + 997 | 1 | 1 | 1 + 996 | 1 | 1 | 1 +(5 rows) + +DROP TABLE test_count_distinct_array; +DROP TYPE test_item; +SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport; + nodename | nodeport | success | result +-----------+----------+---------+----------- + localhost | 57637 | t | DROP TYPE + localhost | 57638 | t | DROP TYPE +(2 rows) + -- other distinct aggregate are not supported SELECT * FROM (