Fix count distinct using field select on top level query

We were allowing count distict queries even if they were not directly on columns if the query is grouped on distribution column. When performing these checks we were skipping subqueries because they also perform this check in a more concise manner. We relied on oid SUBQUERY_RELATION_ID (10000) to decide if a given RTE relation id denotes a subquery, however, we also use SUBQUERY_PUSHDOWN_RELATION_ID (10001) for some subqueries. We skip both type of subqueries with this change.
2018-02-02 12:41:47 +03:00 · 2018-02-02 12:41:47 +03:00 · 41f1466cf8
parent 4e42600cb5
commit 41f1466cf8
3 changed files with 160 additions and 1 deletions
--- a/src/backend/distributed/planner/multi_logical_optimizer.c
+++ b/src/backend/distributed/planner/multi_logical_optimizer.c
@ -2888,6 +2888,8 @@ ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression,
 * AggregateDistinctColumn checks if the given aggregate expression's distinct
 * clause is on a single column. If it is, the function finds and returns that
 * column. Otherwise, the function returns null.
+ * The function expects to find a single column here, no FieldSelect or other
+ * expressions are accepted as a column.
 */
 static Var *
 AggregateDistinctColumn(Aggref *aggregateExpression)
@ -2941,7 +2943,8 @@ TablePartitioningSupportsDistinct(List *tableNodeList, MultiExtendedOp *opNode,
 		char partitionMethod = 0;
 		List *shardList = NIL;

-		if (relationId == SUBQUERY_RELATION_ID)
+		if (relationId == SUBQUERY_RELATION_ID ||
+			relationId == SUBQUERY_PUSHDOWN_RELATION_ID)
 		{
 			return true;
 		}
--- a/src/test/regress/input/multi_complex_count_distinct.source
+++ b/src/test/regress/input/multi_complex_count_distinct.source
@ -433,6 +433,66 @@ SELECT *
 		2 DESC,1 DESC
 	LIMIT 10;

+-- distinct on non-var (type cast/field select) columns are also
+-- supported if grouped on distribution column
+-- random is added to prevent flattening by postgresql
+SELECT 
+	l_orderkey, count(a::int), count(distinct a::int)
+	FROM (
+		SELECT l_orderkey, l_orderkey * 1.5 a, random() b
+			FROM lineitem_hash) sub
+	GROUP BY 1
+	ORDER BY 1 DESC
+	LIMIT 5;
+
+SELECT user_id,
+       count(sub.a::int),
+       count(DISTINCT sub.a::int),
+       count(DISTINCT (sub).a)
+FROM
+  (SELECT user_id,
+          unnest(ARRAY[user_id * 1.5])a,
+          random() b
+   FROM users_table
+   ) sub
+GROUP BY 1
+ORDER BY 1 DESC
+LIMIT 5;
+
+CREATE TYPE test_item AS
+(
+  id       INTEGER,
+  duration INTEGER
+);
+SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS
+(
+  id       INTEGER,
+  duration INTEGER
+)$$) ORDER BY nodeport;
+
+CREATE TABLE test_count_distinct_array (key int, value int ,  value_arr test_item[]);
+SELECT create_distributed_table('test_count_distinct_array', 'key');
+
+INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i;
+
+SELECT
+	key,
+	count(DISTINCT value),
+	count(DISTINCT (item)."id"),
+	count(DISTINCT (item)."id" * 3)
+FROM
+	(
+		SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array
+	) as sub
+GROUP BY 1
+ORDER BY 1 DESC
+LIMIT 5;
+
+DROP TABLE test_count_distinct_array;
+
+DROP TYPE test_item;
+SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport;
+
 -- other distinct aggregate are not supported
 SELECT *
 	FROM (
--- a/src/test/regress/output/multi_complex_count_distinct.source
+++ b/src/test/regress/output/multi_complex_count_distinct.source
@ -899,6 +899,102 @@ SELECT *
 REG AIR    |  1607
 (7 rows)

+-- distinct on non-var (type cast/field select) columns are also
+-- supported if grouped on distribution column
+-- random is added to prevent flattening by postgresql
+SELECT 
+	l_orderkey, count(a::int), count(distinct a::int)
+	FROM (
+		SELECT l_orderkey, l_orderkey * 1.5 a, random() b
+			FROM lineitem_hash) sub
+	GROUP BY 1
+	ORDER BY 1 DESC
+	LIMIT 5;
+ l_orderkey | count | count 
+------------+-------+-------
+      14947 |     2 |     1
+      14946 |     2 |     1
+      14945 |     6 |     1
+      14944 |     2 |     1
+      14919 |     1 |     1
+(5 rows)
+
+SELECT user_id,
+       count(sub.a::int),
+       count(DISTINCT sub.a::int),
+       count(DISTINCT (sub).a)
+FROM
+  (SELECT user_id,
+          unnest(ARRAY[user_id * 1.5])a,
+          random() b
+   FROM users_table
+   ) sub
+GROUP BY 1
+ORDER BY 1 DESC
+LIMIT 5;
+ user_id | count | count | count 
+---------+-------+-------+-------
+       6 |    10 |     1 |     1
+       5 |    26 |     1 |     1
+       4 |    23 |     1 |     1
+       3 |    17 |     1 |     1
+       2 |    18 |     1 |     1
+(5 rows)
+
+CREATE TYPE test_item AS
+(
+  id       INTEGER,
+  duration INTEGER
+);
+SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS
+(
+  id       INTEGER,
+  duration INTEGER
+)$$) ORDER BY nodeport;
+ nodename  | nodeport | success |   result    
+-----------+----------+---------+-------------
+ localhost |    57637 | t       | CREATE TYPE
+ localhost |    57638 | t       | CREATE TYPE
+(2 rows)
+
+CREATE TABLE test_count_distinct_array (key int, value int ,  value_arr test_item[]);
+SELECT create_distributed_table('test_count_distinct_array', 'key');
+ create_distributed_table 
+--------------------------
+ 
+(1 row)
+
+INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i;
+SELECT
+	key,
+	count(DISTINCT value),
+	count(DISTINCT (item)."id"),
+	count(DISTINCT (item)."id" * 3)
+FROM
+	(
+		SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array
+	) as sub
+GROUP BY 1
+ORDER BY 1 DESC
+LIMIT 5;
+ key  | count | count | count 
+------+-------+-------+-------
+ 1000 |     1 |     1 |     1
+  999 |     1 |     1 |     1
+  998 |     1 |     1 |     1
+  997 |     1 |     1 |     1
+  996 |     1 |     1 |     1
+(5 rows)
+
+DROP TABLE test_count_distinct_array;
+DROP TYPE test_item;
+SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport;
+ nodename  | nodeport | success |  result   
+-----------+----------+---------+-----------
+ localhost |    57637 | t       | DROP TYPE
+ localhost |    57638 | t       | DROP TYPE
+(2 rows)
+
 -- other distinct aggregate are not supported
 SELECT *
 	FROM (