mirror of https://github.com/citusdata/citus.git
Merge pull request #1993 from citusdata/subquery_pushdown_count_distinct
Fix count distinct using field select on top level querypull/1998/head
commit
cf5d258043
|
@ -2888,6 +2888,8 @@ ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression,
|
||||||
* AggregateDistinctColumn checks if the given aggregate expression's distinct
|
* AggregateDistinctColumn checks if the given aggregate expression's distinct
|
||||||
* clause is on a single column. If it is, the function finds and returns that
|
* clause is on a single column. If it is, the function finds and returns that
|
||||||
* column. Otherwise, the function returns null.
|
* column. Otherwise, the function returns null.
|
||||||
|
* The function expects to find a single column here, no FieldSelect or other
|
||||||
|
* expressions are accepted as a column.
|
||||||
*/
|
*/
|
||||||
static Var *
|
static Var *
|
||||||
AggregateDistinctColumn(Aggref *aggregateExpression)
|
AggregateDistinctColumn(Aggref *aggregateExpression)
|
||||||
|
@ -2941,7 +2943,8 @@ TablePartitioningSupportsDistinct(List *tableNodeList, MultiExtendedOp *opNode,
|
||||||
char partitionMethod = 0;
|
char partitionMethod = 0;
|
||||||
List *shardList = NIL;
|
List *shardList = NIL;
|
||||||
|
|
||||||
if (relationId == SUBQUERY_RELATION_ID)
|
if (relationId == SUBQUERY_RELATION_ID ||
|
||||||
|
relationId == SUBQUERY_PUSHDOWN_RELATION_ID)
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -329,16 +329,16 @@ SELECT drop_and_recreate_partitioned_table('multi_column_partitioned');
|
||||||
-- partitions and their ranges
|
-- partitions and their ranges
|
||||||
ALTER TABLE multi_column_partitioned ATTACH PARTITION multi_column_partition_1 FOR VALUES FROM (1, 10, '250') TO (1, 20, '250');
|
ALTER TABLE multi_column_partitioned ATTACH PARTITION multi_column_partition_1 FOR VALUES FROM (1, 10, '250') TO (1, 20, '250');
|
||||||
SELECT generate_alter_table_attach_partition_command('multi_column_partition_1');
|
SELECT generate_alter_table_attach_partition_command('multi_column_partition_1');
|
||||||
generate_alter_table_attach_partition_command
|
generate_alter_table_attach_partition_command
|
||||||
--------------------------------------------------------------------------------------------------------------------------------------------
|
------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_1 FOR VALUES FROM (1, 10, 250) TO (1, 20, 250);
|
ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_1 FOR VALUES FROM (1, 10, '250') TO (1, 20, '250');
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
ALTER TABLE multi_column_partitioned ATTACH PARTITION multi_column_partition_2 FOR VALUES FROM (10, 1000, '2500') TO (MAXVALUE, MAXVALUE, MAXVALUE);
|
ALTER TABLE multi_column_partitioned ATTACH PARTITION multi_column_partition_2 FOR VALUES FROM (10, 1000, '2500') TO (MAXVALUE, MAXVALUE, MAXVALUE);
|
||||||
SELECT generate_alter_table_attach_partition_command('multi_column_partition_2');
|
SELECT generate_alter_table_attach_partition_command('multi_column_partition_2');
|
||||||
generate_alter_table_attach_partition_command
|
generate_alter_table_attach_partition_command
|
||||||
------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_2 FOR VALUES FROM (10, 1000, 2500) TO (MAXVALUE, MAXVALUE, MAXVALUE);
|
ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_2 FOR VALUES FROM (10, 1000, '2500') TO (MAXVALUE, MAXVALUE, MAXVALUE);
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT generate_alter_table_detach_partition_command('multi_column_partition_2');
|
SELECT generate_alter_table_detach_partition_command('multi_column_partition_2');
|
||||||
|
|
|
@ -433,6 +433,66 @@ SELECT *
|
||||||
2 DESC,1 DESC
|
2 DESC,1 DESC
|
||||||
LIMIT 10;
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- distinct on non-var (type cast/field select) columns are also
|
||||||
|
-- supported if grouped on distribution column
|
||||||
|
-- random is added to prevent flattening by postgresql
|
||||||
|
SELECT
|
||||||
|
l_orderkey, count(a::int), count(distinct a::int)
|
||||||
|
FROM (
|
||||||
|
SELECT l_orderkey, l_orderkey * 1.5 a, random() b
|
||||||
|
FROM lineitem_hash) sub
|
||||||
|
GROUP BY 1
|
||||||
|
ORDER BY 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
SELECT user_id,
|
||||||
|
count(sub.a::int),
|
||||||
|
count(DISTINCT sub.a::int),
|
||||||
|
count(DISTINCT (sub).a)
|
||||||
|
FROM
|
||||||
|
(SELECT user_id,
|
||||||
|
unnest(ARRAY[user_id * 1.5])a,
|
||||||
|
random() b
|
||||||
|
FROM users_table
|
||||||
|
) sub
|
||||||
|
GROUP BY 1
|
||||||
|
ORDER BY 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
CREATE TYPE test_item AS
|
||||||
|
(
|
||||||
|
id INTEGER,
|
||||||
|
duration INTEGER
|
||||||
|
);
|
||||||
|
SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS
|
||||||
|
(
|
||||||
|
id INTEGER,
|
||||||
|
duration INTEGER
|
||||||
|
)$$) ORDER BY nodeport;
|
||||||
|
|
||||||
|
CREATE TABLE test_count_distinct_array (key int, value int , value_arr test_item[]);
|
||||||
|
SELECT create_distributed_table('test_count_distinct_array', 'key');
|
||||||
|
|
||||||
|
INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i;
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
key,
|
||||||
|
count(DISTINCT value),
|
||||||
|
count(DISTINCT (item)."id"),
|
||||||
|
count(DISTINCT (item)."id" * 3)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array
|
||||||
|
) as sub
|
||||||
|
GROUP BY 1
|
||||||
|
ORDER BY 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
DROP TABLE test_count_distinct_array;
|
||||||
|
|
||||||
|
DROP TYPE test_item;
|
||||||
|
SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport;
|
||||||
|
|
||||||
-- other distinct aggregate are not supported
|
-- other distinct aggregate are not supported
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM (
|
FROM (
|
||||||
|
|
|
@ -899,6 +899,102 @@ SELECT *
|
||||||
REG AIR | 1607
|
REG AIR | 1607
|
||||||
(7 rows)
|
(7 rows)
|
||||||
|
|
||||||
|
-- distinct on non-var (type cast/field select) columns are also
|
||||||
|
-- supported if grouped on distribution column
|
||||||
|
-- random is added to prevent flattening by postgresql
|
||||||
|
SELECT
|
||||||
|
l_orderkey, count(a::int), count(distinct a::int)
|
||||||
|
FROM (
|
||||||
|
SELECT l_orderkey, l_orderkey * 1.5 a, random() b
|
||||||
|
FROM lineitem_hash) sub
|
||||||
|
GROUP BY 1
|
||||||
|
ORDER BY 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
l_orderkey | count | count
|
||||||
|
------------+-------+-------
|
||||||
|
14947 | 2 | 1
|
||||||
|
14946 | 2 | 1
|
||||||
|
14945 | 6 | 1
|
||||||
|
14944 | 2 | 1
|
||||||
|
14919 | 1 | 1
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
SELECT user_id,
|
||||||
|
count(sub.a::int),
|
||||||
|
count(DISTINCT sub.a::int),
|
||||||
|
count(DISTINCT (sub).a)
|
||||||
|
FROM
|
||||||
|
(SELECT user_id,
|
||||||
|
unnest(ARRAY[user_id * 1.5])a,
|
||||||
|
random() b
|
||||||
|
FROM users_table
|
||||||
|
) sub
|
||||||
|
GROUP BY 1
|
||||||
|
ORDER BY 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
user_id | count | count | count
|
||||||
|
---------+-------+-------+-------
|
||||||
|
6 | 10 | 1 | 1
|
||||||
|
5 | 26 | 1 | 1
|
||||||
|
4 | 23 | 1 | 1
|
||||||
|
3 | 17 | 1 | 1
|
||||||
|
2 | 18 | 1 | 1
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
CREATE TYPE test_item AS
|
||||||
|
(
|
||||||
|
id INTEGER,
|
||||||
|
duration INTEGER
|
||||||
|
);
|
||||||
|
SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS
|
||||||
|
(
|
||||||
|
id INTEGER,
|
||||||
|
duration INTEGER
|
||||||
|
)$$) ORDER BY nodeport;
|
||||||
|
nodename | nodeport | success | result
|
||||||
|
-----------+----------+---------+-------------
|
||||||
|
localhost | 57637 | t | CREATE TYPE
|
||||||
|
localhost | 57638 | t | CREATE TYPE
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
CREATE TABLE test_count_distinct_array (key int, value int , value_arr test_item[]);
|
||||||
|
SELECT create_distributed_table('test_count_distinct_array', 'key');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i;
|
||||||
|
SELECT
|
||||||
|
key,
|
||||||
|
count(DISTINCT value),
|
||||||
|
count(DISTINCT (item)."id"),
|
||||||
|
count(DISTINCT (item)."id" * 3)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array
|
||||||
|
) as sub
|
||||||
|
GROUP BY 1
|
||||||
|
ORDER BY 1 DESC
|
||||||
|
LIMIT 5;
|
||||||
|
key | count | count | count
|
||||||
|
------+-------+-------+-------
|
||||||
|
1000 | 1 | 1 | 1
|
||||||
|
999 | 1 | 1 | 1
|
||||||
|
998 | 1 | 1 | 1
|
||||||
|
997 | 1 | 1 | 1
|
||||||
|
996 | 1 | 1 | 1
|
||||||
|
(5 rows)
|
||||||
|
|
||||||
|
DROP TABLE test_count_distinct_array;
|
||||||
|
DROP TYPE test_item;
|
||||||
|
SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport;
|
||||||
|
nodename | nodeport | success | result
|
||||||
|
-----------+----------+---------+-----------
|
||||||
|
localhost | 57637 | t | DROP TYPE
|
||||||
|
localhost | 57638 | t | DROP TYPE
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
-- other distinct aggregate are not supported
|
-- other distinct aggregate are not supported
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM (
|
FROM (
|
||||||
|
|
Loading…
Reference in New Issue