Merge pull request #1993 from citusdata/subquery_pushdown_count_distinct

Fix count distinct using field select on top level query
2018-02-06 15:06:54 +03:00 · 2018-02-06 15:06:54 +03:00 · cf5d258043
parent aba2f47cdf 678223224b
commit cf5d258043
4 changed files with 166 additions and 7 deletions
--- a/src/backend/distributed/planner/multi_logical_optimizer.c
+++ b/src/backend/distributed/planner/multi_logical_optimizer.c
@ -2888,6 +2888,8 @@ ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression,
 * AggregateDistinctColumn checks if the given aggregate expression's distinct
 * clause is on a single column. If it is, the function finds and returns that
 * column. Otherwise, the function returns null.
+ * The function expects to find a single column here, no FieldSelect or other
+ * expressions are accepted as a column.
 */
 static Var *
 AggregateDistinctColumn(Aggref *aggregateExpression)
@ -2941,7 +2943,8 @@ TablePartitioningSupportsDistinct(List *tableNodeList, MultiExtendedOp *opNode,
 		char partitionMethod = 0;
 		List *shardList = NIL;

-		if (relationId == SUBQUERY_RELATION_ID)
+		if (relationId == SUBQUERY_RELATION_ID ||
+			relationId == SUBQUERY_PUSHDOWN_RELATION_ID)
 		{
 			return true;
 		}
--- a/src/test/regress/expected/multi_partitioning_utils.out
+++ b/src/test/regress/expected/multi_partitioning_utils.out
@ -329,16 +329,16 @@ SELECT drop_and_recreate_partitioned_table('multi_column_partitioned');
 -- partitions and their ranges
 ALTER TABLE multi_column_partitioned ATTACH PARTITION multi_column_partition_1 FOR VALUES FROM (1, 10, '250') TO (1, 20, '250');
 SELECT generate_alter_table_attach_partition_command('multi_column_partition_1');
-                                               generate_alter_table_attach_partition_command                                                
--------------------------------------------------------------------------------------------------------------------------------------------
- ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_1 FOR VALUES FROM (1, 10, 250) TO (1, 20, 250);
+                                                 generate_alter_table_attach_partition_command                                                  
+------------------------------------------------------------------------------------------------------------------------------------------------
+ ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_1 FOR VALUES FROM (1, 10, '250') TO (1, 20, '250');
 (1 row)

 ALTER TABLE multi_column_partitioned ATTACH PARTITION multi_column_partition_2 FOR VALUES FROM (10, 1000, '2500') TO (MAXVALUE, MAXVALUE, MAXVALUE);
 SELECT generate_alter_table_attach_partition_command('multi_column_partition_2');
-                                                          generate_alter_table_attach_partition_command                                                           
------------------------------------------------------------------------------------------------------------------------------------------------------------------
- ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_2 FOR VALUES FROM (10, 1000, 2500) TO (MAXVALUE, MAXVALUE, MAXVALUE);
+                                                           generate_alter_table_attach_partition_command                                                            
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_2 FOR VALUES FROM (10, 1000, '2500') TO (MAXVALUE, MAXVALUE, MAXVALUE);
 (1 row)

 SELECT generate_alter_table_detach_partition_command('multi_column_partition_2');
--- a/src/test/regress/input/multi_complex_count_distinct.source
+++ b/src/test/regress/input/multi_complex_count_distinct.source
@ -433,6 +433,66 @@ SELECT *
 		2 DESC,1 DESC
 	LIMIT 10;

+-- distinct on non-var (type cast/field select) columns are also
+-- supported if grouped on distribution column
+-- random is added to prevent flattening by postgresql
+SELECT 
+	l_orderkey, count(a::int), count(distinct a::int)
+	FROM (
+		SELECT l_orderkey, l_orderkey * 1.5 a, random() b
+			FROM lineitem_hash) sub
+	GROUP BY 1
+	ORDER BY 1 DESC
+	LIMIT 5;
+
+SELECT user_id,
+       count(sub.a::int),
+       count(DISTINCT sub.a::int),
+       count(DISTINCT (sub).a)
+FROM
+  (SELECT user_id,
+          unnest(ARRAY[user_id * 1.5])a,
+          random() b
+   FROM users_table
+   ) sub
+GROUP BY 1
+ORDER BY 1 DESC
+LIMIT 5;
+
+CREATE TYPE test_item AS
+(
+  id       INTEGER,
+  duration INTEGER
+);
+SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS
+(
+  id       INTEGER,
+  duration INTEGER
+)$$) ORDER BY nodeport;
+
+CREATE TABLE test_count_distinct_array (key int, value int ,  value_arr test_item[]);
+SELECT create_distributed_table('test_count_distinct_array', 'key');
+
+INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i;
+
+SELECT
+	key,
+	count(DISTINCT value),
+	count(DISTINCT (item)."id"),
+	count(DISTINCT (item)."id" * 3)
+FROM
+	(
+		SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array
+	) as sub
+GROUP BY 1
+ORDER BY 1 DESC
+LIMIT 5;
+
+DROP TABLE test_count_distinct_array;
+
+DROP TYPE test_item;
+SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport;
+
 -- other distinct aggregate are not supported
 SELECT *
 	FROM (
--- a/src/test/regress/output/multi_complex_count_distinct.source
+++ b/src/test/regress/output/multi_complex_count_distinct.source
@ -899,6 +899,102 @@ SELECT *
 REG AIR    |  1607
 (7 rows)

+-- distinct on non-var (type cast/field select) columns are also
+-- supported if grouped on distribution column
+-- random is added to prevent flattening by postgresql
+SELECT 
+	l_orderkey, count(a::int), count(distinct a::int)
+	FROM (
+		SELECT l_orderkey, l_orderkey * 1.5 a, random() b
+			FROM lineitem_hash) sub
+	GROUP BY 1
+	ORDER BY 1 DESC
+	LIMIT 5;
+ l_orderkey | count | count 
+------------+-------+-------
+      14947 |     2 |     1
+      14946 |     2 |     1
+      14945 |     6 |     1
+      14944 |     2 |     1
+      14919 |     1 |     1
+(5 rows)
+
+SELECT user_id,
+       count(sub.a::int),
+       count(DISTINCT sub.a::int),
+       count(DISTINCT (sub).a)
+FROM
+  (SELECT user_id,
+          unnest(ARRAY[user_id * 1.5])a,
+          random() b
+   FROM users_table
+   ) sub
+GROUP BY 1
+ORDER BY 1 DESC
+LIMIT 5;
+ user_id | count | count | count 
+---------+-------+-------+-------
+       6 |    10 |     1 |     1
+       5 |    26 |     1 |     1
+       4 |    23 |     1 |     1
+       3 |    17 |     1 |     1
+       2 |    18 |     1 |     1
+(5 rows)
+
+CREATE TYPE test_item AS
+(
+  id       INTEGER,
+  duration INTEGER
+);
+SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS
+(
+  id       INTEGER,
+  duration INTEGER
+)$$) ORDER BY nodeport;
+ nodename  | nodeport | success |   result    
+-----------+----------+---------+-------------
+ localhost |    57637 | t       | CREATE TYPE
+ localhost |    57638 | t       | CREATE TYPE
+(2 rows)
+
+CREATE TABLE test_count_distinct_array (key int, value int ,  value_arr test_item[]);
+SELECT create_distributed_table('test_count_distinct_array', 'key');
+ create_distributed_table 
+--------------------------
+ 
+(1 row)
+
+INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i;
+SELECT
+	key,
+	count(DISTINCT value),
+	count(DISTINCT (item)."id"),
+	count(DISTINCT (item)."id" * 3)
+FROM
+	(
+		SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array
+	) as sub
+GROUP BY 1
+ORDER BY 1 DESC
+LIMIT 5;
+ key  | count | count | count 
+------+-------+-------+-------
+ 1000 |     1 |     1 |     1
+  999 |     1 |     1 |     1
+  998 |     1 |     1 |     1
+  997 |     1 |     1 |     1
+  996 |     1 |     1 |     1
+(5 rows)
+
+DROP TABLE test_count_distinct_array;
+DROP TYPE test_item;
+SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport;
+ nodename  | nodeport | success |  result   
+-----------+----------+---------+-----------
+ localhost |    57637 | t       | DROP TYPE
+ localhost |    57638 | t       | DROP TYPE
+(2 rows)
+
 -- other distinct aggregate are not supported
 SELECT *
 	FROM (