Merge pull request #1993 from citusdata/subquery_pushdown_count_distinct

Fix count distinct using field select on top level query
2018-02-06 15:06:54 +03:00 · 2018-02-06 15:06:54 +03:00 · cf5d258043
parent aba2f47cdf 678223224b
commit cf5d258043
4 changed files with 166 additions and 7 deletions
--- a/src/backend/distributed/planner/multi_logical_optimizer.c
+++ b/src/backend/distributed/planner/multi_logical_optimizer.c
@ -2888,6 +2888,8 @@ ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression,
 * AggregateDistinctColumn checks if the given aggregate expression's distinct
 * clause is on a single column. If it is, the function finds and returns that
 * column. Otherwise, the function returns null.
 * The function expects to find a single column here, no FieldSelect or other
 * expressions are accepted as a column.
 */
 static Var *
 AggregateDistinctColumn(Aggref *aggregateExpression)
@ -2941,7 +2943,8 @@ TablePartitioningSupportsDistinct(List *tableNodeList, MultiExtendedOp *opNode,
 		char partitionMethod = 0;
 		List *shardList = NIL;
-		if (relationId == SUBQUERY_RELATION_ID)
+		if (relationId == SUBQUERY_RELATION_ID ||
 			relationId == SUBQUERY_PUSHDOWN_RELATION_ID)
 		{
 			return true;
 		}
--- a/src/test/regress/expected/multi_partitioning_utils.out
+++ b/src/test/regress/expected/multi_partitioning_utils.out
@ -329,16 +329,16 @@ SELECT drop_and_recreate_partitioned_table('multi_column_partitioned');
 -- partitions and their ranges
 ALTER TABLE multi_column_partitioned ATTACH PARTITION multi_column_partition_1 FOR VALUES FROM (1, 10, '250') TO (1, 20, '250');
 SELECT generate_alter_table_attach_partition_command('multi_column_partition_1');
-                                               generate_alter_table_attach_partition_command                                                
+                                                 generate_alter_table_attach_partition_command                                                  
--------------------------------------------------------------------------------------------------------------------------------------------
+------------------------------------------------------------------------------------------------------------------------------------------------
- ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_1 FOR VALUES FROM (1, 10, 250) TO (1, 20, 250);
+ ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_1 FOR VALUES FROM (1, 10, '250') TO (1, 20, '250');
 (1 row)
 ALTER TABLE multi_column_partitioned ATTACH PARTITION multi_column_partition_2 FOR VALUES FROM (10, 1000, '2500') TO (MAXVALUE, MAXVALUE, MAXVALUE);
 SELECT generate_alter_table_attach_partition_command('multi_column_partition_2');
-                                                          generate_alter_table_attach_partition_command                                                           
+                                                           generate_alter_table_attach_partition_command                                                            
------------------------------------------------------------------------------------------------------------------------------------------------------------------
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------
- ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_2 FOR VALUES FROM (10, 1000, 2500) TO (MAXVALUE, MAXVALUE, MAXVALUE);
+ ALTER TABLE public.multi_column_partitioned ATTACH PARTITION public.multi_column_partition_2 FOR VALUES FROM (10, 1000, '2500') TO (MAXVALUE, MAXVALUE, MAXVALUE);
 (1 row)
 SELECT generate_alter_table_detach_partition_command('multi_column_partition_2');
--- a/src/test/regress/input/multi_complex_count_distinct.source
+++ b/src/test/regress/input/multi_complex_count_distinct.source
@ -433,6 +433,66 @@ SELECT *
 		2 DESC,1 DESC
 	LIMIT 10;
 -- distinct on non-var (type cast/field select) columns are also
 -- supported if grouped on distribution column
 -- random is added to prevent flattening by postgresql
 SELECT 
 	l_orderkey, count(a::int), count(distinct a::int)
 	FROM (
 		SELECT l_orderkey, l_orderkey * 1.5 a, random() b
 			FROM lineitem_hash) sub
 	GROUP BY 1
 	ORDER BY 1 DESC
 	LIMIT 5;
 SELECT user_id,
       count(sub.a::int),
       count(DISTINCT sub.a::int),
       count(DISTINCT (sub).a)
 FROM
  (SELECT user_id,
          unnest(ARRAY[user_id * 1.5])a,
          random() b
   FROM users_table
   ) sub
 GROUP BY 1
 ORDER BY 1 DESC
 LIMIT 5;
 CREATE TYPE test_item AS
 (
  id       INTEGER,
  duration INTEGER
 );
 SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS
 (
  id       INTEGER,
  duration INTEGER
 )$$) ORDER BY nodeport;
 CREATE TABLE test_count_distinct_array (key int, value int ,  value_arr test_item[]);
 SELECT create_distributed_table('test_count_distinct_array', 'key');
 INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i;
 SELECT
 	key,
 	count(DISTINCT value),
 	count(DISTINCT (item)."id"),
 	count(DISTINCT (item)."id" * 3)
 FROM
 	(
 		SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array
 	) as sub
 GROUP BY 1
 ORDER BY 1 DESC
 LIMIT 5;
 DROP TABLE test_count_distinct_array;
 DROP TYPE test_item;
 SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport;
 -- other distinct aggregate are not supported
 SELECT *
 	FROM (
--- a/src/test/regress/output/multi_complex_count_distinct.source
+++ b/src/test/regress/output/multi_complex_count_distinct.source
@ -899,6 +899,102 @@ SELECT *
 REG AIR    |  1607
 (7 rows)
 -- distinct on non-var (type cast/field select) columns are also
 -- supported if grouped on distribution column
 -- random is added to prevent flattening by postgresql
 SELECT 
 	l_orderkey, count(a::int), count(distinct a::int)
 	FROM (
 		SELECT l_orderkey, l_orderkey * 1.5 a, random() b
 			FROM lineitem_hash) sub
 	GROUP BY 1
 	ORDER BY 1 DESC
 	LIMIT 5;
 l_orderkey | count | count 
 ------------+-------+-------
      14947 |     2 |     1
      14946 |     2 |     1
      14945 |     6 |     1
      14944 |     2 |     1
      14919 |     1 |     1
 (5 rows)
 SELECT user_id,
       count(sub.a::int),
       count(DISTINCT sub.a::int),
       count(DISTINCT (sub).a)
 FROM
  (SELECT user_id,
          unnest(ARRAY[user_id * 1.5])a,
          random() b
   FROM users_table
   ) sub
 GROUP BY 1
 ORDER BY 1 DESC
 LIMIT 5;
 user_id | count | count | count 
 ---------+-------+-------+-------
       6 |    10 |     1 |     1
       5 |    26 |     1 |     1
       4 |    23 |     1 |     1
       3 |    17 |     1 |     1
       2 |    18 |     1 |     1
 (5 rows)
 CREATE TYPE test_item AS
 (
  id       INTEGER,
  duration INTEGER
 );
 SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS
 (
  id       INTEGER,
  duration INTEGER
 )$$) ORDER BY nodeport;
 nodename  | nodeport | success |   result    
 -----------+----------+---------+-------------
 localhost |    57637 | t       | CREATE TYPE
 localhost |    57638 | t       | CREATE TYPE
 (2 rows)
 CREATE TABLE test_count_distinct_array (key int, value int ,  value_arr test_item[]);
 SELECT create_distributed_table('test_count_distinct_array', 'key');
 create_distributed_table 
 --------------------------
 (1 row)
 INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i;
 SELECT
 	key,
 	count(DISTINCT value),
 	count(DISTINCT (item)."id"),
 	count(DISTINCT (item)."id" * 3)
 FROM
 	(
 		SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array
 	) as sub
 GROUP BY 1
 ORDER BY 1 DESC
 LIMIT 5;
 key  | count | count | count 
 ------+-------+-------+-------
 1000 |     1 |     1 |     1
  999 |     1 |     1 |     1
  998 |     1 |     1 |     1
  997 |     1 |     1 |     1
  996 |     1 |     1 |     1
 (5 rows)
 DROP TABLE test_count_distinct_array;
 DROP TYPE test_item;
 SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport;
 nodename  | nodeport | success |  result   
 -----------+----------+---------+-----------
 localhost |    57637 | t       | DROP TYPE
 localhost |    57638 | t       | DROP TYPE
 (2 rows)
 -- other distinct aggregate are not supported
 SELECT *
 	FROM (