Merge pull request #1993 from citusdata/subquery_pushdown_count_distinct

Fix count distinct using field select on top level query
pull/1998/head
Burak Yücesoy 2018-02-06 15:06:54 +03:00 committed by GitHub
commit cf5d258043
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 166 additions and 7 deletions

View File

@ -2888,6 +2888,8 @@ ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression,
* AggregateDistinctColumn checks if the given aggregate expression's distinct * AggregateDistinctColumn checks if the given aggregate expression's distinct
* clause is on a single column. If it is, the function finds and returns that * clause is on a single column. If it is, the function finds and returns that
* column. Otherwise, the function returns null. * column. Otherwise, the function returns null.
* The function expects to find a single column here, no FieldSelect or other
* expressions are accepted as a column.
*/ */
static Var * static Var *
AggregateDistinctColumn(Aggref *aggregateExpression) AggregateDistinctColumn(Aggref *aggregateExpression)
@ -2941,7 +2943,8 @@ TablePartitioningSupportsDistinct(List *tableNodeList, MultiExtendedOp *opNode,
char partitionMethod = 0; char partitionMethod = 0;
List *shardList = NIL; List *shardList = NIL;
if (relationId == SUBQUERY_RELATION_ID) if (relationId == SUBQUERY_RELATION_ID ||
relationId == SUBQUERY_PUSHDOWN_RELATION_ID)
{ {
return true; return true;
} }

View File

@ -329,16 +329,16 @@ SELECT drop_and_recreate_partitioned_table('multi_column_partitioned');
-- partitions and their ranges -- partitions and their ranges
ALTER TABLE multi_column_partitioned ATTACH PARTITION multi_column_partition_1 FOR VALUES FROM (1, 10, '250') TO (1, 20, '250'); ALTER TABLE multi_column_partitioned ATTACH PARTITION multi_column_partition_1 FOR VALUES FROM (1, 10, '250') TO (1, 20, '250');
SELECT generate_alter_table_attach_partition_command('multi_column_partition_1'); SELECT generate_alter_table_attach_partition_command('multi_column_partition_1');
generate_alter_table_attach_partition_command generate_alter_table_attach_partition_command
-------------------------------------------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------------------------------------------------------
ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_1 FOR VALUES FROM (1, 10, 250) TO (1, 20, 250); ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_1 FOR VALUES FROM (1, 10, '250') TO (1, 20, '250');
(1 row) (1 row)
ALTER TABLE multi_column_partitioned ATTACH PARTITION multi_column_partition_2 FOR VALUES FROM (10, 1000, '2500') TO (MAXVALUE, MAXVALUE, MAXVALUE); ALTER TABLE multi_column_partitioned ATTACH PARTITION multi_column_partition_2 FOR VALUES FROM (10, 1000, '2500') TO (MAXVALUE, MAXVALUE, MAXVALUE);
SELECT generate_alter_table_attach_partition_command('multi_column_partition_2'); SELECT generate_alter_table_attach_partition_command('multi_column_partition_2');
generate_alter_table_attach_partition_command generate_alter_table_attach_partition_command
------------------------------------------------------------------------------------------------------------------------------------------------------------------ --------------------------------------------------------------------------------------------------------------------------------------------------------------------
ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_2 FOR VALUES FROM (10, 1000, 2500) TO (MAXVALUE, MAXVALUE, MAXVALUE); ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_2 FOR VALUES FROM (10, 1000, '2500') TO (MAXVALUE, MAXVALUE, MAXVALUE);
(1 row) (1 row)
SELECT generate_alter_table_detach_partition_command('multi_column_partition_2'); SELECT generate_alter_table_detach_partition_command('multi_column_partition_2');

View File

@ -433,6 +433,66 @@ SELECT *
2 DESC,1 DESC 2 DESC,1 DESC
LIMIT 10; LIMIT 10;
-- distinct on non-var (type cast/field select) columns are also
-- supported if grouped on distribution column
-- random is added to prevent flattening by postgresql
SELECT
l_orderkey, count(a::int), count(distinct a::int)
FROM (
SELECT l_orderkey, l_orderkey * 1.5 a, random() b
FROM lineitem_hash) sub
GROUP BY 1
ORDER BY 1 DESC
LIMIT 5;
SELECT user_id,
count(sub.a::int),
count(DISTINCT sub.a::int),
count(DISTINCT (sub).a)
FROM
(SELECT user_id,
unnest(ARRAY[user_id * 1.5])a,
random() b
FROM users_table
) sub
GROUP BY 1
ORDER BY 1 DESC
LIMIT 5;
CREATE TYPE test_item AS
(
id INTEGER,
duration INTEGER
);
SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS
(
id INTEGER,
duration INTEGER
)$$) ORDER BY nodeport;
CREATE TABLE test_count_distinct_array (key int, value int , value_arr test_item[]);
SELECT create_distributed_table('test_count_distinct_array', 'key');
INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i;
SELECT
key,
count(DISTINCT value),
count(DISTINCT (item)."id"),
count(DISTINCT (item)."id" * 3)
FROM
(
SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array
) as sub
GROUP BY 1
ORDER BY 1 DESC
LIMIT 5;
DROP TABLE test_count_distinct_array;
DROP TYPE test_item;
SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport;
-- other distinct aggregate are not supported -- other distinct aggregate are not supported
SELECT * SELECT *
FROM ( FROM (

View File

@ -899,6 +899,102 @@ SELECT *
REG AIR | 1607 REG AIR | 1607
(7 rows) (7 rows)
-- distinct on non-var (type cast/field select) columns are also
-- supported if grouped on distribution column
-- random is added to prevent flattening by postgresql
SELECT
l_orderkey, count(a::int), count(distinct a::int)
FROM (
SELECT l_orderkey, l_orderkey * 1.5 a, random() b
FROM lineitem_hash) sub
GROUP BY 1
ORDER BY 1 DESC
LIMIT 5;
l_orderkey | count | count
------------+-------+-------
14947 | 2 | 1
14946 | 2 | 1
14945 | 6 | 1
14944 | 2 | 1
14919 | 1 | 1
(5 rows)
SELECT user_id,
count(sub.a::int),
count(DISTINCT sub.a::int),
count(DISTINCT (sub).a)
FROM
(SELECT user_id,
unnest(ARRAY[user_id * 1.5])a,
random() b
FROM users_table
) sub
GROUP BY 1
ORDER BY 1 DESC
LIMIT 5;
user_id | count | count | count
---------+-------+-------+-------
6 | 10 | 1 | 1
5 | 26 | 1 | 1
4 | 23 | 1 | 1
3 | 17 | 1 | 1
2 | 18 | 1 | 1
(5 rows)
CREATE TYPE test_item AS
(
id INTEGER,
duration INTEGER
);
SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS
(
id INTEGER,
duration INTEGER
)$$) ORDER BY nodeport;
nodename | nodeport | success | result
-----------+----------+---------+-------------
localhost | 57637 | t | CREATE TYPE
localhost | 57638 | t | CREATE TYPE
(2 rows)
CREATE TABLE test_count_distinct_array (key int, value int , value_arr test_item[]);
SELECT create_distributed_table('test_count_distinct_array', 'key');
create_distributed_table
--------------------------
(1 row)
INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i;
SELECT
key,
count(DISTINCT value),
count(DISTINCT (item)."id"),
count(DISTINCT (item)."id" * 3)
FROM
(
SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array
) as sub
GROUP BY 1
ORDER BY 1 DESC
LIMIT 5;
key | count | count | count
------+-------+-------+-------
1000 | 1 | 1 | 1
999 | 1 | 1 | 1
998 | 1 | 1 | 1
997 | 1 | 1 | 1
996 | 1 | 1 | 1
(5 rows)
DROP TABLE test_count_distinct_array;
DROP TYPE test_item;
SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport;
nodename | nodeport | success | result
-----------+----------+---------+-----------
localhost | 57637 | t | DROP TYPE
localhost | 57638 | t | DROP TYPE
(2 rows)
-- other distinct aggregate are not supported -- other distinct aggregate are not supported
SELECT * SELECT *
FROM ( FROM (