-- -- MULTI_LIMIT_CLAUSE -- CREATE TABLE lineitem_hash (LIKE lineitem); SELECT create_distributed_table('lineitem_hash', 'l_orderkey', 'hash'); INSERT INTO lineitem_hash SELECT * FROM lineitem; -- Display debug messages on limit clause push down. SET client_min_messages TO DEBUG1; -- Check that we can correctly handle the Limit clause in distributed queries. -- Note that we don't have the limit optimization enabled for these queries, and -- will end up fetching all rows to the master database. SELECT count(*) count_quantity, l_quantity FROM lineitem WHERE l_quantity < 32.0 GROUP BY l_quantity ORDER BY count_quantity ASC, l_quantity ASC; SELECT count(*) count_quantity, l_quantity FROM lineitem WHERE l_quantity < 32.0 GROUP BY l_quantity ORDER BY count_quantity DESC, l_quantity DESC; SELECT count(*) count_quantity, l_quantity FROM lineitem WHERE l_quantity < 32.0 GROUP BY l_quantity ORDER BY count_quantity ASC, l_quantity ASC LIMIT 5; SELECT count(*) count_quantity, l_quantity FROM lineitem WHERE l_quantity < 32.0 GROUP BY l_quantity ORDER BY count_quantity ASC, l_quantity ASC LIMIT 10; SELECT count(*) count_quantity, l_quantity FROM lineitem WHERE l_quantity < 32.0 GROUP BY l_quantity ORDER BY count_quantity DESC, l_quantity DESC LIMIT 10; -- Check that we can handle limits for simple sort clauses. We order by columns -- in the first two tests, and then by a simple expression in the last test. SELECT min(l_orderkey) FROM lineitem; SELECT l_orderkey FROM lineitem ORDER BY l_orderkey ASC LIMIT 1; SELECT max(l_orderkey) FROM lineitem; SELECT l_orderkey FROM lineitem ORDER BY l_orderkey DESC LIMIT 1; SELECT * FROM lineitem ORDER BY l_orderkey DESC, l_linenumber DESC LIMIT 3; SELECT max(extract(epoch from l_shipdate)) FROM lineitem; SELECT * FROM lineitem ORDER BY extract(epoch from l_shipdate) DESC, l_orderkey DESC LIMIT 3; -- Exercise the scenario where order by clauses don't have any aggregates, and -- that we can push down the limit as a result. Check that when this happens, we -- also sort on all group by clauses behind the covers. SELECT l_quantity, l_discount, avg(l_partkey) FROM lineitem GROUP BY l_quantity, l_discount ORDER BY l_quantity LIMIT 1; -- Results from the previous query should match this query's results. SELECT l_quantity, l_discount, avg(l_partkey) FROM lineitem GROUP BY l_quantity, l_discount ORDER BY l_quantity, l_discount LIMIT 1; -- We can push down LIMIT clause when we group by partition column of a hash -- partitioned table. SELECT l_orderkey, count(DISTINCT l_partkey) FROM lineitem_hash GROUP BY l_orderkey ORDER BY 2 DESC, 1 DESC LIMIT 5; SELECT l_orderkey FROM lineitem_hash GROUP BY l_orderkey ORDER BY l_orderkey LIMIT 5; -- Don't push down if not grouped by partition column. SELECT max(l_orderkey) FROM lineitem_hash GROUP BY l_linestatus ORDER BY 1 DESC LIMIT 2; -- Don't push down if table is distributed by append SELECT l_orderkey, max(l_shipdate) FROM lineitem GROUP BY l_orderkey ORDER BY 2 DESC, 1 LIMIT 5; -- Push down if grouped by multiple columns one of which is partition column. SELECT l_linestatus, l_orderkey, max(l_shipdate) FROM lineitem_hash GROUP BY l_linestatus, l_orderkey ORDER BY 3 DESC, 1, 2 LIMIT 5; -- Don't push down if grouped by multiple columns none of which is partition column. SELECT l_linestatus, l_shipmode, max(l_shipdate) FROM lineitem_hash GROUP BY l_linestatus, l_shipmode ORDER BY 3 DESC, 1, 2 LIMIT 5; -- Push down limit even if there is distinct on SELECT DISTINCT ON (l_orderkey, l_linenumber) l_orderkey, l_linenumber FROM lineitem_hash GROUP BY l_orderkey, l_linenumber ORDER BY l_orderkey, l_linenumber LIMIT 5; -- Don't push down limit when group by clause not included in distinct on SELECT DISTINCT ON (l_linenumber) l_orderkey, l_linenumber FROM lineitem_hash GROUP BY l_orderkey, l_linenumber ORDER BY l_linenumber, l_orderkey LIMIT 5; -- Push down limit when there is const in distinct on -- referring to a column such that group by clause -- list is contained in distinct on SELECT DISTINCT ON (l_linenumber, 1) l_orderkey, l_linenumber FROM lineitem_hash GROUP BY l_orderkey, l_linenumber ORDER BY l_linenumber, l_orderkey LIMIT 5; -- Don't push down limit when there is const expression in distinct on -- even if there is a group by on the expression -- This is due to fact that postgres removes (1+1) from distinct on -- clause but keeps it in group by list. SELECT DISTINCT ON (l_linenumber, 1+1, l_linenumber) l_orderkey, l_linenumber FROM lineitem_hash GROUP BY l_orderkey, (1+1), l_linenumber ORDER BY l_linenumber, (1+1), l_orderkey LIMIT 5; -- Don't push down limit when there is const reference -- does not point to a column to make distinct clause superset -- of group by SELECT DISTINCT ON (l_linenumber, 2) l_orderkey, l_linenumber FROM lineitem_hash GROUP BY l_orderkey, l_linenumber ORDER BY l_linenumber, l_orderkey LIMIT 5; -- Push down limit even when there is a column expression -- in distinct clause provided that distinct clause covers -- group by expression, and there is no aggregates in the query. SELECT DISTINCT ON (l_orderkey + 1) l_orderkey + 1 FROM lineitem_hash GROUP BY l_orderkey + 1 ORDER BY l_orderkey + 1 LIMIT 5; -- Limit is not pushed down when there are aggregates in the query -- This is because group by is not on distribution column itself -- but on an expression on distribution column SELECT DISTINCT ON (l_orderkey + 1, count(*)) l_orderkey + 1, count(*) FROM lineitem_hash GROUP BY l_orderkey + 1 ORDER BY l_orderkey + 1 , 2 LIMIT 5; -- same query with column instead of column expression, limit is pushed down -- because group by is on distribution column SELECT DISTINCT ON (l_orderkey, count(*)) l_orderkey, count(*) FROM lineitem_hash GROUP BY l_orderkey ORDER BY l_orderkey , 2 LIMIT 5; -- limit is not pushed down because distinct clause -- does not cover group by clause SELECT DISTINCT ON (count(*)) l_orderkey, count(*) FROM lineitem_hash GROUP BY l_orderkey ORDER BY 2 DESC, 1 LIMIT 2; -- push down limit if there is a window function in distinct on SELECT DISTINCT ON (l_orderkey, RANK() OVER (partition by l_orderkey)) l_orderkey, RANK() OVER (partition by l_orderkey) FROM lineitem_hash GROUP BY l_orderkey ORDER BY l_orderkey , 2 LIMIT 5; -- do not push down limit if there is an aggragete in distinct on -- we should be able to push this down, but query goes to subquery -- planner and we can't safely determine it is grouped by partition -- column. SELECT DISTINCT ON (l_orderkey, RANK() OVER (partition by l_orderkey)) l_orderkey, count(*), RANK() OVER (partition by l_orderkey) FROM lineitem_hash GROUP BY l_orderkey ORDER BY l_orderkey , 3, 2 LIMIT 5; -- limit is not pushed down due to same reason SELECT DISTINCT ON (l_orderkey, count(*) OVER (partition by l_orderkey)) l_orderkey, l_linenumber, count(*), count(*) OVER (partition by l_orderkey) FROM lineitem_hash GROUP BY l_orderkey, l_linenumber ORDER BY l_orderkey , count(*) OVER (partition by l_orderkey), count(*), l_linenumber LIMIT 5; -- limit is not pushed down since distinct clause is not superset of group clause SELECT DISTINCT ON (RANK() OVER (partition by l_orderkey)) l_orderkey, RANK() OVER (partition by l_orderkey) FROM lineitem_hash GROUP BY l_orderkey ORDER BY 2 DESC, 1 LIMIT 5; -- check if we can correctly push the limit when it is null SELECT l_orderkey FROM lineitem WHERE l_orderkey < 3 ORDER BY l_orderkey LIMIT null; SELECT l_orderkey FROM lineitem WHERE l_orderkey < 3 ORDER BY l_orderkey OFFSET 1 LIMIT null; SELECT count(*) FROM lineitem LIMIT null; SELECT count(*) FROM lineitem OFFSET 0 LIMIT null; -- check if we push the right limit when both offset and limit are given SELECT l_orderkey FROM lineitem WHERE l_orderkey < 3 ORDER BY l_orderkey OFFSET 1 LIMIT 3; SELECT l_orderkey FROM lineitem WHERE l_orderkey < 3 ORDER BY l_orderkey OFFSET null LIMIT 1; -- check if we can correctly push the limit when it is all SELECT l_orderkey FROM lineitem WHERE l_orderkey < 2 LIMIT all; SELECT l_orderkey FROM lineitem WHERE l_orderkey < 2 OFFSET 2 LIMIT all; SET client_min_messages TO NOTICE; -- non constants should not push down CREATE OR REPLACE FUNCTION my_limit() RETURNS INT AS $$ BEGIN RETURN 5; END; $$ language plpgsql VOLATILE; SELECT l_orderkey FROM lineitem_hash ORDER BY l_orderkey LIMIT my_limit(); SELECT l_orderkey FROM lineitem_hash ORDER BY l_orderkey LIMIT 10 OFFSET my_limit(); DROP FUNCTION my_limit(); -- subqueries should error out SELECT l_orderkey FROM lineitem_hash ORDER BY l_orderkey LIMIT (SELECT 10); SELECT l_orderkey FROM lineitem_hash ORDER BY l_orderkey LIMIT 10 OFFSET (SELECT 10); DROP TABLE lineitem_hash;