diff --git a/src/backend/distributed/planner/multi_logical_optimizer.c b/src/backend/distributed/planner/multi_logical_optimizer.c index 958c0b262..1d224d7cb 100644 --- a/src/backend/distributed/planner/multi_logical_optimizer.c +++ b/src/backend/distributed/planner/multi_logical_optimizer.c @@ -2888,6 +2888,8 @@ ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression, * AggregateDistinctColumn checks if the given aggregate expression's distinct * clause is on a single column. If it is, the function finds and returns that * column. Otherwise, the function returns null. + * The function expects to find a single column here, no FieldSelect or other + * expressions are accepted as a column. */ static Var * AggregateDistinctColumn(Aggref *aggregateExpression) @@ -2941,7 +2943,8 @@ TablePartitioningSupportsDistinct(List *tableNodeList, MultiExtendedOp *opNode, char partitionMethod = 0; List *shardList = NIL; - if (relationId == SUBQUERY_RELATION_ID) + if (relationId == SUBQUERY_RELATION_ID || + relationId == SUBQUERY_PUSHDOWN_RELATION_ID) { return true; } diff --git a/src/test/regress/input/multi_complex_count_distinct.source b/src/test/regress/input/multi_complex_count_distinct.source index e98dc7191..955f7993f 100644 --- a/src/test/regress/input/multi_complex_count_distinct.source +++ b/src/test/regress/input/multi_complex_count_distinct.source @@ -433,6 +433,66 @@ SELECT * 2 DESC,1 DESC LIMIT 10; +-- distinct on non-var (type cast/field select) columns are also +-- supported if grouped on distribution column +-- random is added to prevent flattening by postgresql +SELECT + l_orderkey, count(a::int), count(distinct a::int) + FROM ( + SELECT l_orderkey, l_orderkey * 1.5 a, random() b + FROM lineitem_hash) sub + GROUP BY 1 + ORDER BY 1 DESC + LIMIT 5; + +SELECT user_id, + count(sub.a::int), + count(DISTINCT sub.a::int), + count(DISTINCT (sub).a) +FROM + (SELECT user_id, + unnest(ARRAY[user_id * 1.5])a, + random() b + FROM users_table + ) sub +GROUP BY 1 +ORDER BY 1 DESC +LIMIT 5; + +CREATE TYPE test_item AS +( + id INTEGER, + duration INTEGER +); +SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS +( + id INTEGER, + duration INTEGER +)$$) ORDER BY nodeport; + +CREATE TABLE test_count_distinct_array (key int, value int , value_arr test_item[]); +SELECT create_distributed_table('test_count_distinct_array', 'key'); + +INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i; + +SELECT + key, + count(DISTINCT value), + count(DISTINCT (item)."id"), + count(DISTINCT (item)."id" * 3) +FROM + ( + SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array + ) as sub +GROUP BY 1 +ORDER BY 1 DESC +LIMIT 5; + +DROP TABLE test_count_distinct_array; + +DROP TYPE test_item; +SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport; + -- other distinct aggregate are not supported SELECT * FROM ( diff --git a/src/test/regress/output/multi_complex_count_distinct.source b/src/test/regress/output/multi_complex_count_distinct.source index b9925842b..77d3143fd 100644 --- a/src/test/regress/output/multi_complex_count_distinct.source +++ b/src/test/regress/output/multi_complex_count_distinct.source @@ -899,6 +899,102 @@ SELECT * REG AIR | 1607 (7 rows) +-- distinct on non-var (type cast/field select) columns are also +-- supported if grouped on distribution column +-- random is added to prevent flattening by postgresql +SELECT + l_orderkey, count(a::int), count(distinct a::int) + FROM ( + SELECT l_orderkey, l_orderkey * 1.5 a, random() b + FROM lineitem_hash) sub + GROUP BY 1 + ORDER BY 1 DESC + LIMIT 5; + l_orderkey | count | count +------------+-------+------- + 14947 | 2 | 1 + 14946 | 2 | 1 + 14945 | 6 | 1 + 14944 | 2 | 1 + 14919 | 1 | 1 +(5 rows) + +SELECT user_id, + count(sub.a::int), + count(DISTINCT sub.a::int), + count(DISTINCT (sub).a) +FROM + (SELECT user_id, + unnest(ARRAY[user_id * 1.5])a, + random() b + FROM users_table + ) sub +GROUP BY 1 +ORDER BY 1 DESC +LIMIT 5; + user_id | count | count | count +---------+-------+-------+------- + 6 | 10 | 1 | 1 + 5 | 26 | 1 | 1 + 4 | 23 | 1 | 1 + 3 | 17 | 1 | 1 + 2 | 18 | 1 | 1 +(5 rows) + +CREATE TYPE test_item AS +( + id INTEGER, + duration INTEGER +); +SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS +( + id INTEGER, + duration INTEGER +)$$) ORDER BY nodeport; + nodename | nodeport | success | result +-----------+----------+---------+------------- + localhost | 57637 | t | CREATE TYPE + localhost | 57638 | t | CREATE TYPE +(2 rows) + +CREATE TABLE test_count_distinct_array (key int, value int , value_arr test_item[]); +SELECT create_distributed_table('test_count_distinct_array', 'key'); + create_distributed_table +-------------------------- + +(1 row) + +INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i; +SELECT + key, + count(DISTINCT value), + count(DISTINCT (item)."id"), + count(DISTINCT (item)."id" * 3) +FROM + ( + SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array + ) as sub +GROUP BY 1 +ORDER BY 1 DESC +LIMIT 5; + key | count | count | count +------+-------+-------+------- + 1000 | 1 | 1 | 1 + 999 | 1 | 1 | 1 + 998 | 1 | 1 | 1 + 997 | 1 | 1 | 1 + 996 | 1 | 1 | 1 +(5 rows) + +DROP TABLE test_count_distinct_array; +DROP TYPE test_item; +SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport; + nodename | nodeport | success | result +-----------+----------+---------+----------- + localhost | 57637 | t | DROP TYPE + localhost | 57638 | t | DROP TYPE +(2 rows) + -- other distinct aggregate are not supported SELECT * FROM (