-- -- MULTI_HAVING_PUSHDOWN -- SET citus.next_shard_id TO 590000; CREATE TABLE lineitem_hash (LIKE lineitem); SELECT create_distributed_table('lineitem_hash', 'l_orderkey', 'hash'); create_distributed_table -------------------------- (1 row) CREATE TABLE orders_hash (LIKE orders); SELECT create_distributed_table('orders_hash', 'o_orderkey', 'hash'); create_distributed_table -------------------------- (1 row) -- push down when table is distributed by hash and grouped by partition column EXPLAIN (COSTS FALSE) SELECT l_orderkey, sum(l_extendedprice * l_discount) as revenue FROM lineitem_hash GROUP BY l_orderkey HAVING sum(l_quantity) > 24 ORDER BY 2 DESC, 1 ASC LIMIT 3; QUERY PLAN -------------------------------------------------------------------------------------------------------- Limit -> Sort Sort Key: sum((sum(remote_scan.revenue))) DESC, remote_scan.l_orderkey -> HashAggregate Group Key: remote_scan.l_orderkey Filter: (sum(remote_scan.worker_column_3) > '24'::numeric) -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> Limit -> Sort Sort Key: (sum((l_extendedprice * l_discount))) DESC, l_orderkey -> HashAggregate Group Key: l_orderkey Filter: (sum(l_quantity) > '24'::numeric) -> Seq Scan on lineitem_hash_590000 lineitem_hash (18 rows) -- but don't push down when table is distributed by append EXPLAIN (COSTS FALSE) SELECT l_orderkey, sum(l_extendedprice * l_discount) as revenue FROM lineitem GROUP BY l_orderkey HAVING sum(l_quantity) > 24 ORDER BY 2 DESC, 1 ASC LIMIT 3; QUERY PLAN -------------------------------------------------------------------------------- Limit -> Sort Sort Key: sum((sum(remote_scan.revenue))) DESC, remote_scan.l_orderkey -> HashAggregate Group Key: remote_scan.l_orderkey Filter: (sum(remote_scan.worker_column_3) > '24'::numeric) -> Custom Scan (Citus Real-Time) Task Count: 2 Tasks Shown: One of 2 -> Task Node: host=localhost port=57637 dbname=regression -> HashAggregate Group Key: l_orderkey -> Seq Scan on lineitem_290001 lineitem (14 rows) -- and don't push down when not grouped by partition column EXPLAIN (COSTS FALSE) SELECT l_shipmode, sum(l_extendedprice * l_discount) as revenue FROM lineitem_hash GROUP BY l_shipmode HAVING sum(l_quantity) > 24 ORDER BY 2 DESC, 1 ASC LIMIT 3; QUERY PLAN ------------------------------------------------------------------------------------ Limit -> Sort Sort Key: sum((sum(remote_scan.revenue))) DESC, remote_scan.l_shipmode -> HashAggregate Group Key: remote_scan.l_shipmode Filter: (sum(remote_scan.worker_column_3) > '24'::numeric) -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> HashAggregate Group Key: l_shipmode -> Seq Scan on lineitem_hash_590000 lineitem_hash (14 rows) -- push down if grouped by multiple rows one of which is partition column EXPLAIN (COSTS FALSE) SELECT l_shipmode, l_orderkey, sum(l_extendedprice * l_discount) as revenue FROM lineitem_hash GROUP BY l_shipmode, l_orderkey HAVING sum(l_quantity) > 24 ORDER BY 3 DESC, 1, 2 LIMIT 3; QUERY PLAN -------------------------------------------------------------------------------------------------------------------- Limit -> Sort Sort Key: sum((sum(remote_scan.revenue))) DESC, remote_scan.l_shipmode, remote_scan.l_orderkey -> HashAggregate Group Key: remote_scan.l_shipmode, remote_scan.l_orderkey Filter: (sum(remote_scan.worker_column_4) > '24'::numeric) -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> Limit -> Sort Sort Key: (sum((l_extendedprice * l_discount))) DESC, l_shipmode, l_orderkey -> HashAggregate Group Key: l_shipmode, l_orderkey Filter: (sum(l_quantity) > '24'::numeric) -> Seq Scan on lineitem_hash_590000 lineitem_hash (18 rows) -- couple more checks with joins EXPLAIN (COSTS FALSE) SELECT sum(l_extendedprice * l_discount) as revenue FROM lineitem_hash, orders_hash WHERE o_orderkey = l_orderkey GROUP BY l_orderkey, o_orderkey, l_shipmode HAVING sum(l_quantity) > 24 ORDER BY 1 DESC LIMIT 3; QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------------- Limit -> Sort Sort Key: sum((sum(remote_scan.revenue))) DESC -> HashAggregate Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3, remote_scan.worker_column_4 Filter: (sum(remote_scan.worker_column_5) > '24'::numeric) -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> Limit -> Sort Sort Key: (sum((lineitem_hash.l_extendedprice * lineitem_hash.l_discount))) DESC -> HashAggregate Group Key: lineitem_hash.l_orderkey, orders_hash.o_orderkey, lineitem_hash.l_shipmode Filter: (sum(lineitem_hash.l_quantity) > '24'::numeric) -> Hash Join Hash Cond: (orders_hash.o_orderkey = lineitem_hash.l_orderkey) -> Seq Scan on orders_hash_590004 orders_hash -> Hash -> Seq Scan on lineitem_hash_590000 lineitem_hash (22 rows) EXPLAIN (COSTS FALSE) SELECT sum(l_extendedprice * l_discount) as revenue FROM lineitem_hash, orders_hash WHERE o_orderkey = l_orderkey GROUP BY l_shipmode, o_clerk HAVING sum(l_quantity) > 24 ORDER BY 1 DESC LIMIT 3; QUERY PLAN ------------------------------------------------------------------------------------------------------ Limit -> Sort Sort Key: sum((sum(remote_scan.revenue))) DESC -> HashAggregate Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3 Filter: (sum(remote_scan.worker_column_4) > '24'::numeric) -> Custom Scan (Citus Real-Time) Task Count: 4 Tasks Shown: One of 4 -> Task Node: host=localhost port=57637 dbname=regression -> HashAggregate Group Key: lineitem_hash.l_shipmode, orders_hash.o_clerk -> Hash Join Hash Cond: (orders_hash.o_orderkey = lineitem_hash.l_orderkey) -> Seq Scan on orders_hash_590004 orders_hash -> Hash -> Seq Scan on lineitem_hash_590000 lineitem_hash (18 rows) DROP TABLE lineitem_hash; DROP TABLE orders_hash;