mirror of https://github.com/citusdata/citus.git
Merge pull request #1993 from citusdata/subquery_pushdown_count_distinct
Fix count distinct using field select on top level querypull/1998/head
commit
cf5d258043
|
@ -2888,6 +2888,8 @@ ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression,
|
|||
* AggregateDistinctColumn checks if the given aggregate expression's distinct
|
||||
* clause is on a single column. If it is, the function finds and returns that
|
||||
* column. Otherwise, the function returns null.
|
||||
* The function expects to find a single column here, no FieldSelect or other
|
||||
* expressions are accepted as a column.
|
||||
*/
|
||||
static Var *
|
||||
AggregateDistinctColumn(Aggref *aggregateExpression)
|
||||
|
@ -2941,7 +2943,8 @@ TablePartitioningSupportsDistinct(List *tableNodeList, MultiExtendedOp *opNode,
|
|||
char partitionMethod = 0;
|
||||
List *shardList = NIL;
|
||||
|
||||
if (relationId == SUBQUERY_RELATION_ID)
|
||||
if (relationId == SUBQUERY_RELATION_ID ||
|
||||
relationId == SUBQUERY_PUSHDOWN_RELATION_ID)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -329,16 +329,16 @@ SELECT drop_and_recreate_partitioned_table('multi_column_partitioned');
|
|||
-- partitions and their ranges
|
||||
ALTER TABLE multi_column_partitioned ATTACH PARTITION multi_column_partition_1 FOR VALUES FROM (1, 10, '250') TO (1, 20, '250');
|
||||
SELECT generate_alter_table_attach_partition_command('multi_column_partition_1');
|
||||
generate_alter_table_attach_partition_command
|
||||
--------------------------------------------------------------------------------------------------------------------------------------------
|
||||
ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_1 FOR VALUES FROM (1, 10, 250) TO (1, 20, 250);
|
||||
generate_alter_table_attach_partition_command
|
||||
------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_1 FOR VALUES FROM (1, 10, '250') TO (1, 20, '250');
|
||||
(1 row)
|
||||
|
||||
ALTER TABLE multi_column_partitioned ATTACH PARTITION multi_column_partition_2 FOR VALUES FROM (10, 1000, '2500') TO (MAXVALUE, MAXVALUE, MAXVALUE);
|
||||
SELECT generate_alter_table_attach_partition_command('multi_column_partition_2');
|
||||
generate_alter_table_attach_partition_command
|
||||
------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_2 FOR VALUES FROM (10, 1000, 2500) TO (MAXVALUE, MAXVALUE, MAXVALUE);
|
||||
generate_alter_table_attach_partition_command
|
||||
--------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_2 FOR VALUES FROM (10, 1000, '2500') TO (MAXVALUE, MAXVALUE, MAXVALUE);
|
||||
(1 row)
|
||||
|
||||
SELECT generate_alter_table_detach_partition_command('multi_column_partition_2');
|
||||
|
|
|
@ -433,6 +433,66 @@ SELECT *
|
|||
2 DESC,1 DESC
|
||||
LIMIT 10;
|
||||
|
||||
-- distinct on non-var (type cast/field select) columns are also
|
||||
-- supported if grouped on distribution column
|
||||
-- random is added to prevent flattening by postgresql
|
||||
SELECT
|
||||
l_orderkey, count(a::int), count(distinct a::int)
|
||||
FROM (
|
||||
SELECT l_orderkey, l_orderkey * 1.5 a, random() b
|
||||
FROM lineitem_hash) sub
|
||||
GROUP BY 1
|
||||
ORDER BY 1 DESC
|
||||
LIMIT 5;
|
||||
|
||||
SELECT user_id,
|
||||
count(sub.a::int),
|
||||
count(DISTINCT sub.a::int),
|
||||
count(DISTINCT (sub).a)
|
||||
FROM
|
||||
(SELECT user_id,
|
||||
unnest(ARRAY[user_id * 1.5])a,
|
||||
random() b
|
||||
FROM users_table
|
||||
) sub
|
||||
GROUP BY 1
|
||||
ORDER BY 1 DESC
|
||||
LIMIT 5;
|
||||
|
||||
CREATE TYPE test_item AS
|
||||
(
|
||||
id INTEGER,
|
||||
duration INTEGER
|
||||
);
|
||||
SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS
|
||||
(
|
||||
id INTEGER,
|
||||
duration INTEGER
|
||||
)$$) ORDER BY nodeport;
|
||||
|
||||
CREATE TABLE test_count_distinct_array (key int, value int , value_arr test_item[]);
|
||||
SELECT create_distributed_table('test_count_distinct_array', 'key');
|
||||
|
||||
INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i;
|
||||
|
||||
SELECT
|
||||
key,
|
||||
count(DISTINCT value),
|
||||
count(DISTINCT (item)."id"),
|
||||
count(DISTINCT (item)."id" * 3)
|
||||
FROM
|
||||
(
|
||||
SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array
|
||||
) as sub
|
||||
GROUP BY 1
|
||||
ORDER BY 1 DESC
|
||||
LIMIT 5;
|
||||
|
||||
DROP TABLE test_count_distinct_array;
|
||||
|
||||
DROP TYPE test_item;
|
||||
SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport;
|
||||
|
||||
-- other distinct aggregate are not supported
|
||||
SELECT *
|
||||
FROM (
|
||||
|
|
|
@ -899,6 +899,102 @@ SELECT *
|
|||
REG AIR | 1607
|
||||
(7 rows)
|
||||
|
||||
-- distinct on non-var (type cast/field select) columns are also
|
||||
-- supported if grouped on distribution column
|
||||
-- random is added to prevent flattening by postgresql
|
||||
SELECT
|
||||
l_orderkey, count(a::int), count(distinct a::int)
|
||||
FROM (
|
||||
SELECT l_orderkey, l_orderkey * 1.5 a, random() b
|
||||
FROM lineitem_hash) sub
|
||||
GROUP BY 1
|
||||
ORDER BY 1 DESC
|
||||
LIMIT 5;
|
||||
l_orderkey | count | count
|
||||
------------+-------+-------
|
||||
14947 | 2 | 1
|
||||
14946 | 2 | 1
|
||||
14945 | 6 | 1
|
||||
14944 | 2 | 1
|
||||
14919 | 1 | 1
|
||||
(5 rows)
|
||||
|
||||
SELECT user_id,
|
||||
count(sub.a::int),
|
||||
count(DISTINCT sub.a::int),
|
||||
count(DISTINCT (sub).a)
|
||||
FROM
|
||||
(SELECT user_id,
|
||||
unnest(ARRAY[user_id * 1.5])a,
|
||||
random() b
|
||||
FROM users_table
|
||||
) sub
|
||||
GROUP BY 1
|
||||
ORDER BY 1 DESC
|
||||
LIMIT 5;
|
||||
user_id | count | count | count
|
||||
---------+-------+-------+-------
|
||||
6 | 10 | 1 | 1
|
||||
5 | 26 | 1 | 1
|
||||
4 | 23 | 1 | 1
|
||||
3 | 17 | 1 | 1
|
||||
2 | 18 | 1 | 1
|
||||
(5 rows)
|
||||
|
||||
CREATE TYPE test_item AS
|
||||
(
|
||||
id INTEGER,
|
||||
duration INTEGER
|
||||
);
|
||||
SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS
|
||||
(
|
||||
id INTEGER,
|
||||
duration INTEGER
|
||||
)$$) ORDER BY nodeport;
|
||||
nodename | nodeport | success | result
|
||||
-----------+----------+---------+-------------
|
||||
localhost | 57637 | t | CREATE TYPE
|
||||
localhost | 57638 | t | CREATE TYPE
|
||||
(2 rows)
|
||||
|
||||
CREATE TABLE test_count_distinct_array (key int, value int , value_arr test_item[]);
|
||||
SELECT create_distributed_table('test_count_distinct_array', 'key');
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i;
|
||||
SELECT
|
||||
key,
|
||||
count(DISTINCT value),
|
||||
count(DISTINCT (item)."id"),
|
||||
count(DISTINCT (item)."id" * 3)
|
||||
FROM
|
||||
(
|
||||
SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array
|
||||
) as sub
|
||||
GROUP BY 1
|
||||
ORDER BY 1 DESC
|
||||
LIMIT 5;
|
||||
key | count | count | count
|
||||
------+-------+-------+-------
|
||||
1000 | 1 | 1 | 1
|
||||
999 | 1 | 1 | 1
|
||||
998 | 1 | 1 | 1
|
||||
997 | 1 | 1 | 1
|
||||
996 | 1 | 1 | 1
|
||||
(5 rows)
|
||||
|
||||
DROP TABLE test_count_distinct_array;
|
||||
DROP TYPE test_item;
|
||||
SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport;
|
||||
nodename | nodeport | success | result
|
||||
-----------+----------+---------+-----------
|
||||
localhost | 57637 | t | DROP TYPE
|
||||
localhost | 57638 | t | DROP TYPE
|
||||
(2 rows)
|
||||
|
||||
-- other distinct aggregate are not supported
|
||||
SELECT *
|
||||
FROM (
|
||||
|
|
Loading…
Reference in New Issue