Fix count distinct using field select on top level query

We were allowing count distict queries even if they were
not directly on columns if the query is grouped on
distribution column.

When performing these checks we were skipping subqueries
because they also perform this check in a more concise manner.
We relied on oid SUBQUERY_RELATION_ID (10000) to decide if
a given RTE relation id denotes a subquery, however, we also
use SUBQUERY_PUSHDOWN_RELATION_ID (10001) for some subqueries.

We skip both type of subqueries with this change.
pull/2079/head
Murat Tuncer 2018-02-02 12:41:47 +03:00 committed by velioglu
parent 4e42600cb5
commit 41f1466cf8
3 changed files with 160 additions and 1 deletions

View File

@ -2888,6 +2888,8 @@ ErrorIfUnsupportedAggregateDistinct(Aggref *aggregateExpression,
* AggregateDistinctColumn checks if the given aggregate expression's distinct
* clause is on a single column. If it is, the function finds and returns that
* column. Otherwise, the function returns null.
* The function expects to find a single column here, no FieldSelect or other
* expressions are accepted as a column.
*/
static Var *
AggregateDistinctColumn(Aggref *aggregateExpression)
@ -2941,7 +2943,8 @@ TablePartitioningSupportsDistinct(List *tableNodeList, MultiExtendedOp *opNode,
char partitionMethod = 0;
List *shardList = NIL;
if (relationId == SUBQUERY_RELATION_ID)
if (relationId == SUBQUERY_RELATION_ID ||
relationId == SUBQUERY_PUSHDOWN_RELATION_ID)
{
return true;
}

View File

@ -433,6 +433,66 @@ SELECT *
2 DESC,1 DESC
LIMIT 10;
-- distinct on non-var (type cast/field select) columns are also
-- supported if grouped on distribution column
-- random is added to prevent flattening by postgresql
SELECT
l_orderkey, count(a::int), count(distinct a::int)
FROM (
SELECT l_orderkey, l_orderkey * 1.5 a, random() b
FROM lineitem_hash) sub
GROUP BY 1
ORDER BY 1 DESC
LIMIT 5;
SELECT user_id,
count(sub.a::int),
count(DISTINCT sub.a::int),
count(DISTINCT (sub).a)
FROM
(SELECT user_id,
unnest(ARRAY[user_id * 1.5])a,
random() b
FROM users_table
) sub
GROUP BY 1
ORDER BY 1 DESC
LIMIT 5;
CREATE TYPE test_item AS
(
id INTEGER,
duration INTEGER
);
SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS
(
id INTEGER,
duration INTEGER
)$$) ORDER BY nodeport;
CREATE TABLE test_count_distinct_array (key int, value int , value_arr test_item[]);
SELECT create_distributed_table('test_count_distinct_array', 'key');
INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i;
SELECT
key,
count(DISTINCT value),
count(DISTINCT (item)."id"),
count(DISTINCT (item)."id" * 3)
FROM
(
SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array
) as sub
GROUP BY 1
ORDER BY 1 DESC
LIMIT 5;
DROP TABLE test_count_distinct_array;
DROP TYPE test_item;
SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport;
-- other distinct aggregate are not supported
SELECT *
FROM (

View File

@ -899,6 +899,102 @@ SELECT *
REG AIR | 1607
(7 rows)
-- distinct on non-var (type cast/field select) columns are also
-- supported if grouped on distribution column
-- random is added to prevent flattening by postgresql
SELECT
l_orderkey, count(a::int), count(distinct a::int)
FROM (
SELECT l_orderkey, l_orderkey * 1.5 a, random() b
FROM lineitem_hash) sub
GROUP BY 1
ORDER BY 1 DESC
LIMIT 5;
l_orderkey | count | count
------------+-------+-------
14947 | 2 | 1
14946 | 2 | 1
14945 | 6 | 1
14944 | 2 | 1
14919 | 1 | 1
(5 rows)
SELECT user_id,
count(sub.a::int),
count(DISTINCT sub.a::int),
count(DISTINCT (sub).a)
FROM
(SELECT user_id,
unnest(ARRAY[user_id * 1.5])a,
random() b
FROM users_table
) sub
GROUP BY 1
ORDER BY 1 DESC
LIMIT 5;
user_id | count | count | count
---------+-------+-------+-------
6 | 10 | 1 | 1
5 | 26 | 1 | 1
4 | 23 | 1 | 1
3 | 17 | 1 | 1
2 | 18 | 1 | 1
(5 rows)
CREATE TYPE test_item AS
(
id INTEGER,
duration INTEGER
);
SELECT * FROM run_command_on_workers($$CREATE TYPE test_item AS
(
id INTEGER,
duration INTEGER
)$$) ORDER BY nodeport;
nodename | nodeport | success | result
-----------+----------+---------+-------------
localhost | 57637 | t | CREATE TYPE
localhost | 57638 | t | CREATE TYPE
(2 rows)
CREATE TABLE test_count_distinct_array (key int, value int , value_arr test_item[]);
SELECT create_distributed_table('test_count_distinct_array', 'key');
create_distributed_table
--------------------------
(1 row)
INSERT INTO test_count_distinct_array SELECT i, i, ARRAY[(i,i)::test_item] FROM generate_Series(0, 1000) i;
SELECT
key,
count(DISTINCT value),
count(DISTINCT (item)."id"),
count(DISTINCT (item)."id" * 3)
FROM
(
SELECT key, unnest(value_arr) as item, value FROM test_count_distinct_array
) as sub
GROUP BY 1
ORDER BY 1 DESC
LIMIT 5;
key | count | count | count
------+-------+-------+-------
1000 | 1 | 1 | 1
999 | 1 | 1 | 1
998 | 1 | 1 | 1
997 | 1 | 1 | 1
996 | 1 | 1 | 1
(5 rows)
DROP TABLE test_count_distinct_array;
DROP TYPE test_item;
SELECT * FROM run_command_on_workers($$DROP TYPE test_item$$) ORDER BY nodeport;
nodename | nodeport | success | result
-----------+----------+---------+-----------
localhost | 57637 | t | DROP TYPE
localhost | 57638 | t | DROP TYPE
(2 rows)
-- other distinct aggregate are not supported
SELECT *
FROM (