From 9cfadd79657f592d6dc1adcdd0058883b5eeae06 Mon Sep 17 00:00:00 2001 From: Naisila Puka <37271756+naisila@users.noreply.github.com> Date: Fri, 19 Aug 2022 18:21:13 +0300 Subject: [PATCH] Deletes unnecessary test outputs pt2 (#6214) --- .../multi_colocated_shard_rebalance_0.out | 788 -------- .../expected/multi_function_in_join_0.out | 265 --- .../expected/multi_poolinfo_usage_0.out | 60 - .../expected/multi_select_distinct.out | 16 +- .../expected/multi_select_distinct_1.out | 1569 ---------------- .../regress/expected/window_functions.out | 69 +- .../regress/expected/window_functions_0.out | 5 + .../regress/expected/window_functions_1.out | 1648 ----------------- src/test/regress/sql/window_functions.sql | 5 + 9 files changed, 47 insertions(+), 4378 deletions(-) delete mode 100644 src/test/regress/expected/multi_colocated_shard_rebalance_0.out delete mode 100644 src/test/regress/expected/multi_function_in_join_0.out delete mode 100644 src/test/regress/expected/multi_poolinfo_usage_0.out delete mode 100644 src/test/regress/expected/multi_select_distinct_1.out delete mode 100644 src/test/regress/expected/window_functions_1.out diff --git a/src/test/regress/expected/multi_colocated_shard_rebalance_0.out b/src/test/regress/expected/multi_colocated_shard_rebalance_0.out deleted file mode 100644 index 95871bc31..000000000 --- a/src/test/regress/expected/multi_colocated_shard_rebalance_0.out +++ /dev/null @@ -1,788 +0,0 @@ --- --- MULTI_COLOCATED_SHARD_REBALANCE --- -ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 13000000; -SET citus.shard_count TO 6; -SET citus.shard_replication_factor TO 1; --- create distributed tables -CREATE TABLE table1_group1 ( id int PRIMARY KEY); -SELECT create_distributed_table('table1_group1', 'id', 'hash'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE table2_group1 ( id int ); -SELECT create_distributed_table('table2_group1', 'id', 'hash'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SET citus.shard_count TO 8; -CREATE TABLE table5_groupX ( id int ); -SELECT create_distributed_table('table5_groupX', 'id', 'hash'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE table6_append ( id int ); -SELECT master_create_distributed_table('table6_append', 'id', 'append'); - master_create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT master_create_empty_shard('table6_append'); - master_create_empty_shard ---------------------------------------------------------------------- - 13000020 -(1 row) - -SELECT master_create_empty_shard('table6_append'); - master_create_empty_shard ---------------------------------------------------------------------- - 13000021 -(1 row) - --- Mark tables as non-mx tables, in order to be able to test master_copy_shard_placement -UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN - ('table1_group1'::regclass, 'table2_group1'::regclass, 'table5_groupX'::regclass); --- test copy --- test copying colocated shards --- status before shard copy -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport ---------------------------------------------------------------------- - 13000000 | table1_group1 | 57637 - 13000001 | table1_group1 | 57638 - 13000002 | table1_group1 | 57637 - 13000003 | table1_group1 | 57638 - 13000004 | table1_group1 | 57637 - 13000005 | table1_group1 | 57638 - 13000006 | table2_group1 | 57637 - 13000007 | table2_group1 | 57638 - 13000008 | table2_group1 | 57637 - 13000009 | table2_group1 | 57638 - 13000010 | table2_group1 | 57637 - 13000011 | table2_group1 | 57638 -(12 rows) - --- try to copy colocated shards without a replica identity -SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false); - master_copy_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- copy colocated shards -SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical'); -ERROR: shard xxxxx already exist in target placement --- status after shard copy -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport ---------------------------------------------------------------------- - 13000000 | table1_group1 | 57637 - 13000000 | table1_group1 | 57638 - 13000001 | table1_group1 | 57638 - 13000002 | table1_group1 | 57637 - 13000003 | table1_group1 | 57638 - 13000004 | table1_group1 | 57637 - 13000005 | table1_group1 | 57638 - 13000006 | table2_group1 | 57637 - 13000006 | table2_group1 | 57638 - 13000007 | table2_group1 | 57638 - 13000008 | table2_group1 | 57637 - 13000009 | table2_group1 | 57638 - 13000010 | table2_group1 | 57637 - 13000011 | table2_group1 | 57638 -(14 rows) - --- also connect worker to verify we successfully copied given shard (and other colocated shards) -\c - - - :worker_2_port -SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table1_group1_13000000'::regclass; - Column | Type | Modifiers ---------------------------------------------------------------------- - id | integer | not null -(1 row) - -SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table2_group1_13000006'::regclass; - Column | Type | Modifiers ---------------------------------------------------------------------- - id | integer | -(1 row) - -\c - - - :master_port --- copy colocated shards again to see error message -SELECT master_copy_shard_placement(13000000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical'); -ERROR: shard xxxxx already exist in target placement --- test copying NOT colocated shard --- status before shard copy -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - p.logicalrelid = 'table5_groupX'::regclass -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport ---------------------------------------------------------------------- - 13000012 | table5_groupx | 57637 - 13000013 | table5_groupx | 57638 - 13000014 | table5_groupx | 57637 - 13000015 | table5_groupx | 57638 - 13000016 | table5_groupx | 57637 - 13000017 | table5_groupx | 57638 - 13000018 | table5_groupx | 57637 - 13000019 | table5_groupx | 57638 -(8 rows) - --- copy NOT colocated shard -SELECT master_copy_shard_placement(13000012, 'localhost', :worker_1_port, 'localhost', :worker_2_port, false, 'force_logical'); - master_copy_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- status after shard copy -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - p.logicalrelid = 'table5_groupX'::regclass -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport ---------------------------------------------------------------------- - 13000012 | table5_groupx | 57637 - 13000012 | table5_groupx | 57638 - 13000013 | table5_groupx | 57638 - 13000014 | table5_groupx | 57637 - 13000015 | table5_groupx | 57638 - 13000016 | table5_groupx | 57637 - 13000017 | table5_groupx | 57638 - 13000018 | table5_groupx | 57637 - 13000019 | table5_groupx | 57638 -(9 rows) - --- test copying shard in append distributed table --- status before shard copy -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - p.logicalrelid = 'table6_append'::regclass -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport ---------------------------------------------------------------------- - 13000020 | table6_append | 57638 - 13000021 | table6_append | 57637 -(2 rows) - --- copy shard in append distributed table -SELECT master_copy_shard_placement(13000020, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false, 'force_logical'); - master_copy_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- status after shard copy -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - p.logicalrelid = 'table6_append'::regclass -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport ---------------------------------------------------------------------- - 13000020 | table6_append | 57637 - 13000020 | table6_append | 57638 - 13000021 | table6_append | 57637 -(3 rows) - --- test move --- test moving colocated shards --- status before shard move -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport ---------------------------------------------------------------------- - 13000000 | table1_group1 | 57637 - 13000000 | table1_group1 | 57638 - 13000001 | table1_group1 | 57638 - 13000002 | table1_group1 | 57637 - 13000003 | table1_group1 | 57638 - 13000004 | table1_group1 | 57637 - 13000005 | table1_group1 | 57638 - 13000006 | table2_group1 | 57637 - 13000006 | table2_group1 | 57638 - 13000007 | table2_group1 | 57638 - 13000008 | table2_group1 | 57637 - 13000009 | table2_group1 | 57638 - 13000010 | table2_group1 | 57637 - 13000011 | table2_group1 | 57638 -(14 rows) - --- move colocated shards -SELECT master_move_shard_placement(13000001, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical'); - master_move_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- status after shard move -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport ---------------------------------------------------------------------- - 13000000 | table1_group1 | 57637 - 13000000 | table1_group1 | 57638 - 13000001 | table1_group1 | 57637 - 13000002 | table1_group1 | 57637 - 13000003 | table1_group1 | 57638 - 13000004 | table1_group1 | 57637 - 13000005 | table1_group1 | 57638 - 13000006 | table2_group1 | 57637 - 13000006 | table2_group1 | 57638 - 13000007 | table2_group1 | 57637 - 13000008 | table2_group1 | 57637 - 13000009 | table2_group1 | 57638 - 13000010 | table2_group1 | 57637 - 13000011 | table2_group1 | 57638 -(14 rows) - --- also connect worker to verify we successfully moved given shard (and other colocated shards) -\c - - - :worker_1_port -SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table1_group1_13000001'::regclass; - Column | Type | Modifiers ---------------------------------------------------------------------- - id | integer | not null -(1 row) - -SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table2_group1_13000007'::regclass; - Column | Type | Modifiers ---------------------------------------------------------------------- - id | integer | -(1 row) - -\c - - - :master_port --- test moving NOT colocated shard --- status before shard move -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - p.logicalrelid = 'table5_groupX'::regclass -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport ---------------------------------------------------------------------- - 13000012 | table5_groupx | 57637 - 13000012 | table5_groupx | 57638 - 13000013 | table5_groupx | 57638 - 13000014 | table5_groupx | 57637 - 13000015 | table5_groupx | 57638 - 13000016 | table5_groupx | 57637 - 13000017 | table5_groupx | 57638 - 13000018 | table5_groupx | 57637 - 13000019 | table5_groupx | 57638 -(9 rows) - --- move NOT colocated shard -SELECT master_move_shard_placement(13000013, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical'); - master_move_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- status after shard move -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - p.logicalrelid = 'table5_groupX'::regclass -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport ---------------------------------------------------------------------- - 13000012 | table5_groupx | 57637 - 13000012 | table5_groupx | 57638 - 13000013 | table5_groupx | 57637 - 13000014 | table5_groupx | 57637 - 13000015 | table5_groupx | 57638 - 13000016 | table5_groupx | 57637 - 13000017 | table5_groupx | 57638 - 13000018 | table5_groupx | 57637 - 13000019 | table5_groupx | 57638 -(9 rows) - --- test moving shard in append distributed table --- status before shard move -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - p.logicalrelid = 'table6_append'::regclass -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport ---------------------------------------------------------------------- - 13000020 | table6_append | 57637 - 13000020 | table6_append | 57638 - 13000021 | table6_append | 57637 -(3 rows) - --- move shard in append distributed table -SELECT master_move_shard_placement(13000021, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical'); - master_move_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- status after shard move -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - p.logicalrelid = 'table6_append'::regclass -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport ---------------------------------------------------------------------- - 13000020 | table6_append | 57637 - 13000020 | table6_append | 57638 - 13000021 | table6_append | 57638 -(3 rows) - --- try to move shard from wrong node -SELECT master_move_shard_placement(13000021, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical'); -ERROR: could not find placement matching "localhost:xxxxx" -HINT: Confirm the placement still exists and try again. --- test shard move with foreign constraints -DROP TABLE IF EXISTS table1_group1, table2_group1; -SET citus.shard_count TO 6; -SET citus.shard_replication_factor TO 1; --- create distributed tables -CREATE TABLE table1_group1 ( id int PRIMARY KEY); -SELECT create_distributed_table('table1_group1', 'id', 'hash'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE table2_group1 ( id int, table1_id int, FOREIGN KEY(table1_id) REFERENCES table1_group1(id)); -SELECT create_distributed_table('table2_group1', 'table1_id', 'hash'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- Mark the tables as non-mx tables -UPDATE pg_dist_partition SET repmodel='c' WHERE logicalrelid IN - ('table1_group1'::regclass, 'table2_group1'::regclass); --- status before shard rebalance -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport ---------------------------------------------------------------------- - 13000022 | table1_group1 | 57637 - 13000023 | table1_group1 | 57638 - 13000024 | table1_group1 | 57637 - 13000025 | table1_group1 | 57638 - 13000026 | table1_group1 | 57637 - 13000027 | table1_group1 | 57638 - 13000028 | table2_group1 | 57637 - 13000029 | table2_group1 | 57638 - 13000030 | table2_group1 | 57637 - 13000031 | table2_group1 | 57638 - 13000032 | table2_group1 | 57637 - 13000033 | table2_group1 | 57638 -(12 rows) - -SELECT master_move_shard_placement(13000022, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'block_writes'); - master_move_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- status after shard rebalance -SELECT s.shardid, s.logicalrelid::regclass, sp.nodeport -FROM - pg_dist_partition p, pg_dist_shard s, pg_dist_shard_placement sp -WHERE - p.logicalrelid = s.logicalrelid AND - s.shardid = sp.shardid AND - colocationid = (SELECT colocationid FROM pg_dist_partition WHERE logicalrelid = 'table1_group1'::regclass) -ORDER BY s.shardid, sp.nodeport; - shardid | logicalrelid | nodeport ---------------------------------------------------------------------- - 13000022 | table1_group1 | 57638 - 13000023 | table1_group1 | 57638 - 13000024 | table1_group1 | 57637 - 13000025 | table1_group1 | 57638 - 13000026 | table1_group1 | 57637 - 13000027 | table1_group1 | 57638 - 13000028 | table2_group1 | 57638 - 13000029 | table2_group1 | 57638 - 13000030 | table2_group1 | 57637 - 13000031 | table2_group1 | 57638 - 13000032 | table2_group1 | 57637 - 13000033 | table2_group1 | 57638 -(12 rows) - --- also connect worker to verify we successfully moved given shard (and other colocated shards) -\c - - - :worker_2_port -SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table1_group1_13000022'::regclass; - Column | Type | Modifiers ---------------------------------------------------------------------- - id | integer | not null -(1 row) - -SELECT "Column", "Type", "Modifiers" FROM table_desc WHERE relid='public.table2_group1_13000028'::regclass; - Column | Type | Modifiers ---------------------------------------------------------------------- - id | integer | - table1_id | integer | -(2 rows) - --- make sure that we've created the foreign keys -SELECT "Constraint", "Definition" FROM table_fkeys; - Constraint | Definition ---------------------------------------------------------------------- - pg_dist_poolinfo_nodeid_fkey | FOREIGN KEY (nodeid) REFERENCES pg_dist_node(nodeid) - table2_group1_table1_id_fkey_13000028 | FOREIGN KEY (table1_id) REFERENCES table1_group1_13000022(id) - table2_group1_table1_id_fkey_13000029 | FOREIGN KEY (table1_id) REFERENCES table1_group1_13000023(id) - table2_group1_table1_id_fkey_13000031 | FOREIGN KEY (table1_id) REFERENCES table1_group1_13000025(id) - table2_group1_table1_id_fkey_13000033 | FOREIGN KEY (table1_id) REFERENCES table1_group1_13000027(id) - test_constraint_1230019 | FOREIGN KEY (l_orderkey) REFERENCES tenant_isolation.orders_streaming_1230016(o_orderkey) - test_constraint_1230020 | FOREIGN KEY (l_orderkey) REFERENCES tenant_isolation.orders_streaming_1230017(o_orderkey) - test_constraint_1230021 | FOREIGN KEY (l_orderkey) REFERENCES tenant_isolation.orders_streaming_1230018(o_orderkey) - test_constraint_1230025 | FOREIGN KEY (l_orderkey) REFERENCES tenant_isolation.orders_streaming_1230022(o_orderkey) - test_constraint_1230026 | FOREIGN KEY (l_orderkey) REFERENCES tenant_isolation.orders_streaming_1230023(o_orderkey) - test_constraint_1230027 | FOREIGN KEY (l_orderkey) REFERENCES tenant_isolation.orders_streaming_1230024(o_orderkey) -(11 rows) - -\c - - - :master_port --- test shard copy with foreign constraints --- we expect it to error out because we do not support foreign constraints with replication factor > 1 -SELECT master_copy_shard_placement(13000022, 'localhost', :worker_2_port, 'localhost', :worker_1_port, false); -ERROR: cannot replicate shards with foreign keys --- lets also test that master_move_shard_placement doesn't break serials -CREATE TABLE serial_move_test (key int, other_val serial); -SET citus.shard_replication_factor TO 1; -SELECT create_distributed_table('serial_move_test', 'key'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- key 15 goes to shard xxxxx -INSERT INTO serial_move_test (key) VALUES (15) RETURNING *; - key | other_val ---------------------------------------------------------------------- - 15 | 1 -(1 row) - -INSERT INTO serial_move_test (key) VALUES (15) RETURNING *; - key | other_val ---------------------------------------------------------------------- - 15 | 2 -(1 row) - --- confirm the shard id -SELECT * FROM run_command_on_placements('serial_move_test', 'SELECT DISTINCT key FROM %s WHERE key = 15') WHERE result = '15' AND shardid = 13000034; - nodename | nodeport | shardid | success | result ---------------------------------------------------------------------- - localhost | 57637 | 13000034 | t | 15 -(1 row) - -SELECT master_move_shard_placement(13000034, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical'); - master_move_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- confirm the successfull move -SELECT * FROM run_command_on_placements('serial_move_test', 'SELECT DISTINCT key FROM %s WHERE key = 15') WHERE result = '15' AND shardid = 13000034; - nodename | nodeport | shardid | success | result ---------------------------------------------------------------------- - localhost | 57638 | 13000034 | t | 15 -(1 row) - --- finally show that serials work fine afterwards -INSERT INTO serial_move_test (key) VALUES (15) RETURNING *; - key | other_val ---------------------------------------------------------------------- - 15 | 3 -(1 row) - -INSERT INTO serial_move_test (key) VALUES (15) RETURNING *; - key | other_val ---------------------------------------------------------------------- - 15 | 4 -(1 row) - --- lets do some failure testing -CREATE TABLE logical_failure_test (key int); -SET citus.shard_replication_factor TO 1; -SET citus.shard_count TO 4; -SELECT create_distributed_table('logical_failure_test', 'key'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- ensure that the shard is created for this user -\c - - - :worker_2_port -\dt logical_failure_test_13000038 - List of relations - Schema | Name | Type | Owner ---------------------------------------------------------------------- - public | logical_failure_test_13000038 | table | postgres -(1 row) - -DROP TABLE logical_failure_test_13000038; --- should fail since the command wouldn't be able to connect to the worker_1 -\c - - - :master_port -SELECT master_move_shard_placement(13000038, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical'); -ERROR: could not copy table "logical_failure_test_13000038" from "localhost:xxxxx" -CONTEXT: while executing command on localhost:xxxxx -DROP TABLE logical_failure_test; --- lets test the logical replication modes -CREATE TABLE test_with_pkey (key int PRIMARY KEY, value int NOT NULL); -SET citus.shard_replication_factor TO 1; -SET citus.shard_count TO 4; -SELECT create_distributed_table('test_with_pkey', 'key', colocate_with => 'none'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- should succeed since there is a replica identity defined -SELECT master_move_shard_placement(13000042, 'localhost', :worker_1_port, 'localhost', :worker_2_port); - master_move_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- should succeed since we still have a replica identity -ALTER TABLE test_with_pkey REPLICA IDENTITY FULL; -SELECT master_move_shard_placement(13000042, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'auto'); - master_move_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- this time should fail since we don't have replica identity any more -ALTER TABLE test_with_pkey REPLICA IDENTITY NOTHING; -SELECT master_move_shard_placement(13000042, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'auto'); - master_move_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- should succeed since we still have a replica identity -ALTER TABLE test_with_pkey REPLICA IDENTITY USING INDEX test_with_pkey_pkey; -SELECT master_move_shard_placement(13000042, 'localhost', :worker_1_port, 'localhost', :worker_2_port); -ERROR: could not find placement matching "localhost:xxxxx" -HINT: Confirm the placement still exists and try again. --- one final test with shard_transfer_mode auto -CREATE UNIQUE INDEX req_rep_idx ON test_with_pkey(key, value); -ALTER TABLE test_with_pkey REPLICA IDENTITY USING INDEX req_rep_idx; -SELECT master_move_shard_placement(13000042, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'auto'); - master_move_shard_placement ---------------------------------------------------------------------- - -(1 row) - -ALTER TABLE test_with_pkey REPLICA IDENTITY NOTHING; -SELECT master_move_shard_placement(13000042, 'localhost', :worker_1_port, 'localhost', :worker_2_port, 'force_logical'); - master_move_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- should succeed but not use logical replication -ALTER TABLE test_with_pkey REPLICA IDENTITY NOTHING; -SET client_min_messages TO DEBUG1; -SELECT master_move_shard_placement(13000042, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes'); -DEBUG: table "test_with_pkey_13000042" does not exist, skipping -DETAIL: NOTICE from localhost:xxxxx - master_move_shard_placement ---------------------------------------------------------------------- - -(1 row) - -SET client_min_messages TO DEFAULT; --- we don't support multiple shard moves in a single transaction -SELECT - master_move_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='force_logical') -FROM - pg_dist_shard_placement where nodeport = :worker_1_port AND - shardid IN (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_with_pkey'::regclass); - master_move_shard_placement ---------------------------------------------------------------------- - - -(2 rows) - --- similar test with explicit transaction block -BEGIN; - - SELECT master_move_shard_placement(13000042, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='force_logical'); -ERROR: could not find placement matching "localhost:xxxxx" -HINT: Confirm the placement still exists and try again. - SELECT master_move_shard_placement(13000044, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='force_logical'); -ERROR: current transaction is aborted, commands ignored until end of transaction block -COMMIT; --- we do support the same with block writes -SELECT - master_move_shard_placement(shardid, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='block_writes') -FROM - pg_dist_shard_placement where nodeport = :worker_1_port AND - shardid IN (SELECT shardid FROM pg_dist_shard WHERE logicalrelid = 'test_with_pkey'::regclass); - master_move_shard_placement ---------------------------------------------------------------------- -(0 rows) - --- we should be able to move shard placements after COMMIT/ABORT -BEGIN; - - SELECT master_move_shard_placement(13000043, 'localhost', :worker_2_port, 'localhost', :worker_1_port, shard_transfer_mode:='force_logical'); - master_move_shard_placement ---------------------------------------------------------------------- - -(1 row) - -COMMIT; -SELECT master_move_shard_placement(13000045, 'localhost', :worker_2_port, 'localhost', :worker_1_port, shard_transfer_mode:='force_logical'); - master_move_shard_placement ---------------------------------------------------------------------- - -(1 row) - -BEGIN; - - SELECT master_move_shard_placement(13000043, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='force_logical'); - master_move_shard_placement ---------------------------------------------------------------------- - -(1 row) - -ABORT; -SELECT master_move_shard_placement(13000045, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='force_logical'); - master_move_shard_placement ---------------------------------------------------------------------- - -(1 row) - --- we should be able to move shard placements of partitioend tables -CREATE SCHEMA move_partitions; -CREATE TABLE move_partitions.events ( - id serial, - t timestamptz default now(), - payload text -) -PARTITION BY RANGE(t); -ERROR: syntax error at or near "PARTITION" -SET citus.shard_count TO 6; -SELECT create_distributed_table('move_partitions.events', 'id', colocate_with := 'none'); -ERROR: relation "move_partitions.events" does not exist -CREATE TABLE move_partitions.events_1 PARTITION OF move_partitions.events -FOR VALUES FROM ('2015-01-01') TO ('2016-01-01'); -ERROR: syntax error at or near "PARTITION" -INSERT INTO move_partitions.events (t, payload) -SELECT '2015-01-01'::date + (interval '1 day' * s), s FROM generate_series(1, 100) s; -ERROR: relation "move_partitions.events" does not exist -SELECT count(*) FROM move_partitions.events; -ERROR: relation "move_partitions.events" does not exist --- try to move automatically -SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port) -FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) -WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port -ORDER BY shardid LIMIT 1; -ERROR: relation "move_partitions.events" does not exist --- force logical replication -SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'force_logical') -FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) -WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port -ORDER BY shardid LIMIT 1; -ERROR: relation "move_partitions.events" does not exist -SELECT count(*) FROM move_partitions.events; -ERROR: relation "move_partitions.events" does not exist --- add a primary key to the partition -ALTER TABLE move_partitions.events_1 ADD CONSTRAINT e_1_pk PRIMARY KEY (id); -ERROR: relation "move_partitions.events_1" does not exist --- should be able to move automatically now -SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port) -FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) -WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port -ORDER BY shardid LIMIT 1; -ERROR: relation "move_partitions.events" does not exist -SELECT count(*) FROM move_partitions.events; -ERROR: relation "move_partitions.events" does not exist --- should also be able to move with block writes -SELECT master_move_shard_placement(shardid, 'localhost', :worker_2_port, 'localhost', :worker_1_port, 'block_writes') -FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) -WHERE logicalrelid = 'move_partitions.events'::regclass AND nodeport = :worker_2_port -ORDER BY shardid LIMIT 1; -ERROR: relation "move_partitions.events" does not exist -SELECT count(*) FROM move_partitions.events; -ERROR: relation "move_partitions.events" does not exist --- should have moved all shards to node 1 (2*6 = 12) -SELECT count(*) -FROM pg_dist_shard JOIN pg_dist_shard_placement USING (shardid) -WHERE logicalrelid::text LIKE 'move_partitions.events%' AND nodeport = :worker_1_port; - count ---------------------------------------------------------------------- - 0 -(1 row) - -DROP TABLE move_partitions.events; -ERROR: table "events" does not exist --- set back to the defaults and drop the table -SET client_min_messages TO DEFAULT; -DROP TABLE test_with_pkey; diff --git a/src/test/regress/expected/multi_function_in_join_0.out b/src/test/regress/expected/multi_function_in_join_0.out deleted file mode 100644 index 5f2bd70c7..000000000 --- a/src/test/regress/expected/multi_function_in_join_0.out +++ /dev/null @@ -1,265 +0,0 @@ --- --- multi function in join queries aims to test the function calls that are --- used in joins. --- --- These functions are supposed to be executed on the worker and to ensure --- that we wrap those functions inside (SELECT * FROM fnc()) sub queries. --- --- We do not yet support those functions that: --- - have lateral joins --- - have WITH ORDINALITY clause --- - are user-defined and immutable -CREATE SCHEMA functions_in_joins; -SET search_path TO 'functions_in_joins'; -SET citus.next_shard_id TO 2500000; -SET citus.shard_replication_factor to 1; -CREATE TABLE table1 (id int, data int); -SELECT create_distributed_table('table1','id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO table1 -SELECT x, x*x -from generate_series(1, 100) as f (x); --- Verbose messages for observing the subqueries that wrapped function calls -SET client_min_messages TO DEBUG1; --- Check joins on a sequence -CREATE SEQUENCE numbers; -SELECT * FROM table1 JOIN nextval('numbers') n ON (id = n) ORDER BY id ASC; -DEBUG: generating subplan XXX_1 for subquery SELECT n FROM nextval('functions_in_joins.numbers'::regclass) n(n) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT table1.id, table1.data, n.n FROM (functions_in_joins.table1 JOIN (SELECT intermediate_result.n FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(n bigint)) n ON ((table1.id OPERATOR(pg_catalog.=) n.n))) ORDER BY table1.id - id | data | n ---------------------------------------------------------------------- - 1 | 1 | 1 -(1 row) - --- Check joins of a function that returns a single integer -CREATE FUNCTION add(integer, integer) RETURNS integer -AS 'SELECT $1 + $2;' -LANGUAGE SQL; -DEBUG: switching to sequential query execution mode -DETAIL: A command for a distributed function is run. To make sure subsequent commands see the function correctly we need to make sure to use only one connection for all future commands -SELECT * FROM table1 JOIN add(3,5) sum ON (id = sum) ORDER BY id ASC; -DEBUG: generating subplan XXX_1 for subquery SELECT sum FROM functions_in_joins.add(3, 5) sum(sum) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT table1.id, table1.data, sum.sum FROM (functions_in_joins.table1 JOIN (SELECT intermediate_result.sum FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(sum integer)) sum ON ((table1.id OPERATOR(pg_catalog.=) sum.sum))) ORDER BY table1.id - id | data | sum ---------------------------------------------------------------------- - 8 | 64 | 8 -(1 row) - --- Check join of plpgsql functions --- a function returning a single integer -CREATE OR REPLACE FUNCTION increment(i integer) RETURNS integer AS $$ -BEGIN - RETURN i + 1; -END; -$$ LANGUAGE plpgsql; -DEBUG: switching to sequential query execution mode -DETAIL: A command for a distributed function is run. To make sure subsequent commands see the function correctly we need to make sure to use only one connection for all future commands -SELECT * FROM table1 JOIN increment(2) val ON (id = val) ORDER BY id ASC; -DEBUG: function does not have co-located tables -DEBUG: generating subplan XXX_1 for subquery SELECT val FROM functions_in_joins.increment(2) val(val) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT table1.id, table1.data, val.val FROM (functions_in_joins.table1 JOIN (SELECT intermediate_result.val FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(val integer)) val ON ((table1.id OPERATOR(pg_catalog.=) val.val))) ORDER BY table1.id - id | data | val ---------------------------------------------------------------------- - 3 | 9 | 3 -(1 row) - --- a function that returns a set of integers --- Block distributing function as we have tests below to test it locally -SET citus.enable_metadata_sync TO OFF; -CREATE OR REPLACE FUNCTION next_k_integers(IN first_value INTEGER, - IN k INTEGER DEFAULT 3, - OUT result INTEGER) - RETURNS SETOF INTEGER AS $$ -BEGIN - RETURN QUERY SELECT x FROM generate_series(first_value, first_value+k-1) f(x); -END; -$$ LANGUAGE plpgsql; -RESET citus.enable_metadata_sync; -SELECT * -FROM table1 JOIN next_k_integers(3,2) next_integers ON (id = next_integers.result) -ORDER BY id ASC; -DEBUG: generating subplan XXX_1 for subquery SELECT result FROM functions_in_joins.next_k_integers(3, 2) next_integers(result) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT table1.id, table1.data, next_integers.result FROM (functions_in_joins.table1 JOIN (SELECT intermediate_result.result FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(result integer)) next_integers ON ((table1.id OPERATOR(pg_catalog.=) next_integers.result))) ORDER BY table1.id - id | data | result ---------------------------------------------------------------------- - 3 | 9 | 3 - 4 | 16 | 4 -(2 rows) - --- a function returning set of records -CREATE FUNCTION get_set_of_records() RETURNS SETOF RECORD AS $cmd$ -SELECT x, x+1 FROM generate_series(0,4) f(x) -$cmd$ -LANGUAGE SQL; -DEBUG: switching to sequential query execution mode -DETAIL: A command for a distributed function is run. To make sure subsequent commands see the function correctly we need to make sure to use only one connection for all future commands -SELECT * FROM table1 JOIN get_set_of_records() AS t2(x int, y int) ON (id = x) ORDER BY id ASC; -DEBUG: function does not have co-located tables -DEBUG: generating subplan XXX_1 for subquery SELECT x, y FROM functions_in_joins.get_set_of_records() t2(x integer, y integer) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT table1.id, table1.data, t2.x, t2.y FROM (functions_in_joins.table1 JOIN (SELECT intermediate_result.x, intermediate_result.y FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(x integer, y integer)) t2 ON ((table1.id OPERATOR(pg_catalog.=) t2.x))) ORDER BY table1.id - id | data | x | y ---------------------------------------------------------------------- - 1 | 1 | 1 | 2 - 2 | 4 | 2 | 3 - 3 | 9 | 3 | 4 - 4 | 16 | 4 | 5 -(4 rows) - --- a function returning table -CREATE FUNCTION dup(int) RETURNS TABLE(f1 int, f2 text) -AS $$ SELECT $1, CAST($1 AS text) || ' is text' $$ -LANGUAGE SQL; -DEBUG: switching to sequential query execution mode -DETAIL: A command for a distributed function is run. To make sure subsequent commands see the function correctly we need to make sure to use only one connection for all future commands -SELECT f.* FROM table1 t JOIN dup(32) f ON (f1 = id); -DEBUG: function does not have co-located tables -DEBUG: generating subplan XXX_1 for subquery SELECT f1, f2 FROM functions_in_joins.dup(32) f(f1, f2) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT f.f1, f.f2 FROM (functions_in_joins.table1 t JOIN (SELECT intermediate_result.f1, intermediate_result.f2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(f1 integer, f2 text)) f ON ((f.f1 OPERATOR(pg_catalog.=) t.id))) - f1 | f2 ---------------------------------------------------------------------- - 32 | 32 is text -(1 row) - --- a stable function -CREATE OR REPLACE FUNCTION the_minimum_id() - RETURNS INTEGER STABLE AS 'SELECT min(id) FROM table1' LANGUAGE SQL; -DEBUG: switching to sequential query execution mode -DETAIL: A command for a distributed function is run. To make sure subsequent commands see the function correctly we need to make sure to use only one connection for all future commands -SELECT * FROM table1 JOIN the_minimum_id() min_id ON (id = min_id); -DEBUG: function does not have co-located tables -DEBUG: generating subplan XXX_1 for subquery SELECT min_id FROM functions_in_joins.the_minimum_id() min_id(min_id) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT table1.id, table1.data, min_id.min_id FROM (functions_in_joins.table1 JOIN (SELECT intermediate_result.min_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(min_id integer)) min_id ON ((table1.id OPERATOR(pg_catalog.=) min_id.min_id))) - id | data | min_id ---------------------------------------------------------------------- - 1 | 1 | 1 -(1 row) - --- a built-in immutable function -SELECT * FROM table1 JOIN abs(100) as hundred ON (id = hundred) ORDER BY id ASC; - id | data | hundred ---------------------------------------------------------------------- - 100 | 10000 | 100 -(1 row) - --- function joins inside a CTE -WITH next_row_to_process AS ( - SELECT * FROM table1 JOIN nextval('numbers') n ON (id = n) - ) -SELECT * -FROM table1, next_row_to_process -WHERE table1.data <= next_row_to_process.data -ORDER BY 1,2 ASC; -DEBUG: generating subplan XXX_1 for CTE next_row_to_process: SELECT table1.id, table1.data, n.n FROM (functions_in_joins.table1 JOIN nextval('functions_in_joins.numbers'::regclass) n(n) ON ((table1.id OPERATOR(pg_catalog.=) n.n))) -DEBUG: generating subplan XXX_1 for subquery SELECT n FROM nextval('functions_in_joins.numbers'::regclass) n(n) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT table1.id, table1.data, n.n FROM (functions_in_joins.table1 JOIN (SELECT intermediate_result.n FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(n bigint)) n ON ((table1.id OPERATOR(pg_catalog.=) n.n))) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT table1.id, table1.data, next_row_to_process.id, next_row_to_process.data, next_row_to_process.n FROM functions_in_joins.table1, (SELECT intermediate_result.id, intermediate_result.data, intermediate_result.n FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, data integer, n bigint)) next_row_to_process WHERE (table1.data OPERATOR(pg_catalog.<=) next_row_to_process.data) ORDER BY table1.id, table1.data - id | data | id | data | n ---------------------------------------------------------------------- - 1 | 1 | 2 | 4 | 2 - 2 | 4 | 2 | 4 | 2 -(2 rows) - --- Multiple functions in an RTE -SELECT * FROM ROWS FROM (next_k_integers(5), next_k_integers(10)) AS f(a, b), - table1 WHERE id = a ORDER BY id ASC; -DEBUG: generating subplan XXX_1 for subquery SELECT a, b FROM ROWS FROM(functions_in_joins.next_k_integers(5), functions_in_joins.next_k_integers(10)) f(a, b) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT f.a, f.b, table1.id, table1.data FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) f(a, b), functions_in_joins.table1 WHERE (table1.id OPERATOR(pg_catalog.=) f.a) ORDER BY table1.id - a | b | id | data ---------------------------------------------------------------------- - 5 | 10 | 5 | 25 - 6 | 11 | 6 | 36 - 7 | 12 | 7 | 49 -(3 rows) - --- Custom Type returning function used in a join -RESET client_min_messages; -CREATE TYPE min_and_max AS ( - minimum INT, - maximum INT -); -SET client_min_messages TO DEBUG1; -CREATE OR REPLACE FUNCTION max_and_min () RETURNS - min_and_max AS $$ -DECLARE - result min_and_max%rowtype; -begin - select into result min(data) as minimum, max(data) as maximum from table1; - return result; -end; -$$ language plpgsql; -DEBUG: switching to sequential query execution mode -DETAIL: A command for a distributed function is run. To make sure subsequent commands see the function correctly we need to make sure to use only one connection for all future commands -SELECT * FROM table1 JOIN max_and_min() m ON (m.maximum = data OR m.minimum = data) ORDER BY 1,2,3,4; -DEBUG: function does not have co-located tables -DEBUG: generating subplan XXX_1 for subquery SELECT minimum, maximum FROM functions_in_joins.max_and_min() m(minimum, maximum) -DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT table1.id, table1.data, m.minimum, m.maximum FROM (functions_in_joins.table1 JOIN (SELECT intermediate_result.minimum, intermediate_result.maximum FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(minimum integer, maximum integer)) m ON (((m.maximum OPERATOR(pg_catalog.=) table1.data) OR (m.minimum OPERATOR(pg_catalog.=) table1.data)))) ORDER BY table1.id, table1.data, m.minimum, m.maximum - id | data | minimum | maximum ---------------------------------------------------------------------- - 1 | 1 | 1 | 10000 - 100 | 10000 | 1 | 10000 -(2 rows) - --- The following tests will fail as we do not support all joins on --- all kinds of functions --- In other words, we cannot recursively plan the functions and hence --- the query fails on the workers -SET client_min_messages TO ERROR; -\set VERBOSITY terse --- function joins in CTE results can create lateral joins that are not supported --- we execute the query within a function to consolidate the error messages --- between different executors -SET citus.enable_metadata_sync TO OFF; -CREATE FUNCTION raise_failed_execution_func_join(query text) RETURNS void AS $$ -BEGIN - EXECUTE query; - EXCEPTION WHEN OTHERS THEN - IF SQLERRM LIKE 'failed to execute task%' THEN - RAISE 'Task failed to execute'; - ELSIF SQLERRM LIKE '%does not exist%' THEN - RAISE 'Task failed to execute'; - END IF; -END; -$$LANGUAGE plpgsql; -RESET citus.enable_metadata_sync; -SELECT raise_failed_execution_func_join($$ - WITH one_row AS ( - SELECT * FROM table1 WHERE id=52 - ) - SELECT table1.id, table1.data - FROM one_row, table1, next_k_integers(one_row.id, 5) next_five_ids - WHERE table1.id = next_five_ids; -$$); -ERROR: Task failed to execute --- a user-defined immutable function -SET citus.enable_metadata_sync TO OFF; -CREATE OR REPLACE FUNCTION the_answer_to_life() - RETURNS INTEGER IMMUTABLE AS 'SELECT 42' LANGUAGE SQL; -RESET citus.enable_metadata_sync; -SELECT raise_failed_execution_func_join($$ - SELECT * FROM table1 JOIN the_answer_to_life() the_answer ON (id = the_answer); -$$); -ERROR: Task failed to execute -SELECT raise_failed_execution_func_join($$ - SELECT * - FROM table1 - JOIN next_k_integers(10,5) WITH ORDINALITY next_integers - ON (id = next_integers.result); -$$); -ERROR: Task failed to execute --- WITH ORDINALITY clause -SELECT raise_failed_execution_func_join($$ - SELECT * - FROM table1 - JOIN next_k_integers(10,5) WITH ORDINALITY next_integers - ON (id = next_integers.result) - ORDER BY id ASC; -$$); -ERROR: Task failed to execute -RESET client_min_messages; -DROP SCHEMA functions_in_joins CASCADE; -NOTICE: drop cascades to 12 other objects -SET search_path TO DEFAULT; diff --git a/src/test/regress/expected/multi_poolinfo_usage_0.out b/src/test/regress/expected/multi_poolinfo_usage_0.out deleted file mode 100644 index 2ccf077e4..000000000 --- a/src/test/regress/expected/multi_poolinfo_usage_0.out +++ /dev/null @@ -1,60 +0,0 @@ --- --- MULTI_POOLINFO_USAGE --- --- Test pooler info logic --- --- Test of ability to override host/port for a node -SET citus.shard_replication_factor TO 1; -SET citus.next_shard_id TO 20000000; -SELECT nodeid AS worker_1_id FROM pg_dist_node WHERE nodename = 'localhost' AND nodeport = :worker_1_port; - worker_1_id ---------------------------------------------------------------------- - 16 -(1 row) - -\gset -SELECT nodeid AS worker_2_id FROM pg_dist_node WHERE nodename = 'localhost' AND nodeport = :worker_2_port; - worker_2_id ---------------------------------------------------------------------- - 18 -(1 row) - -\gset -CREATE TABLE lotsa_connections (id integer, name text); -SELECT create_distributed_table('lotsa_connections', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO lotsa_connections VALUES (1, 'user'), (2, 'user'), (3, 'user'), (4, 'user'); -SELECT COUNT(*) FROM lotsa_connections; - count ---------------------------------------------------------------------- - 4 -(1 row) - --- put outright bad values -\set VERBOSITY terse -INSERT INTO pg_dist_poolinfo VALUES (:worker_1_id, 'host=failhost'), - (:worker_2_id, 'port=9999'); -\c -SELECT COUNT(*) FROM lotsa_connections; -ERROR: epoll_ctl() failed: No such file or directory --- "re-route" worker one to node two and vice-versa -DELETE FROM pg_dist_poolinfo; -INSERT INTO pg_dist_poolinfo VALUES (:worker_1_id, 'port=' || :worker_2_port), - (:worker_2_id, 'port=' || :worker_1_port); -\c --- this fails because the shards of one worker won't exist on the other and shards --- are still looked up using the node name, not the effective connection host -INSERT INTO lotsa_connections VALUES (1, 'user'), (2, 'user'), (3, 'user'), (4, 'user'); -ERROR: relation "public.lotsa_connections_20000000" does not exist --- tweak poolinfo to use 127.0.0.1 instead of localhost; should work! -DELETE FROM pg_dist_poolinfo; -INSERT INTO pg_dist_poolinfo VALUES (:worker_1_id, 'host=127.0.0.1 port=' || :worker_1_port), - (:worker_2_id, 'host=127.0.0.1 port=' || :worker_2_port); -\c -DELETE FROM lotsa_connections; -DROP TABLE lotsa_connections; -DELETE FROM pg_dist_poolinfo; diff --git a/src/test/regress/expected/multi_select_distinct.out b/src/test/regress/expected/multi_select_distinct.out index bfb189095..8f180e266 100644 --- a/src/test/regress/expected/multi_select_distinct.out +++ b/src/test/regress/expected/multi_select_distinct.out @@ -958,10 +958,12 @@ EXPLAIN (COSTS FALSE) Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate + -> GroupAggregate Group Key: l_orderkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(13 rows) + -> Sort + Sort Key: l_orderkey + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(15 rows) -- check the plan if the hash aggreate is disabled. SET enable_hashagg TO off; @@ -982,10 +984,12 @@ EXPLAIN (COSTS FALSE) Tasks Shown: One of 4 -> Task Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate + -> GroupAggregate Group Key: l_orderkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(12 rows) + -> Sort + Sort Key: l_orderkey + -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part +(14 rows) SET enable_hashagg TO on; -- distinct on non-partition column with aggregate diff --git a/src/test/regress/expected/multi_select_distinct_1.out b/src/test/regress/expected/multi_select_distinct_1.out deleted file mode 100644 index 506ce215f..000000000 --- a/src/test/regress/expected/multi_select_distinct_1.out +++ /dev/null @@ -1,1569 +0,0 @@ --- --- MULTI_SELECT_DISTINCT --- --- Tests select distinct, and select distinct on features. --- -ANALYZE lineitem_hash_part; --- function calls are supported -SELECT DISTINCT l_orderkey, now() FROM lineitem_hash_part LIMIT 0; - l_orderkey | now ---------------------------------------------------------------------- -(0 rows) - -SELECT DISTINCT l_orderkey, avg(l_linenumber) -FROM lineitem_hash_part -GROUP BY l_orderkey -HAVING avg(l_linenumber) = (select avg(distinct l_linenumber)) -LIMIT 10; -ERROR: Subqueries in HAVING cannot refer to outer query -SELECT DISTINCT l_orderkey -FROM lineitem_hash_part -GROUP BY l_orderkey -HAVING (select avg(distinct l_linenumber) = l_orderkey) -LIMIT 10; -ERROR: Subqueries in HAVING cannot refer to outer query -SELECT DISTINCT l_partkey, 1 + (random() * 0)::int FROM lineitem_hash_part ORDER BY 1 DESC LIMIT 3; - l_partkey | ?column? ---------------------------------------------------------------------- - 199973 | 1 - 199946 | 1 - 199943 | 1 -(3 rows) - --- const expressions are supported -SELECT DISTINCT l_orderkey, 1+1 FROM lineitem_hash_part ORDER BY 1 LIMIT 5; - l_orderkey | ?column? ---------------------------------------------------------------------- - 1 | 2 - 2 | 2 - 3 | 2 - 4 | 2 - 5 | 2 -(5 rows) - --- non const expressions are also supported -SELECT DISTINCT l_orderkey, l_partkey + 1 FROM lineitem_hash_part ORDER BY 1, 2 LIMIT 5; - l_orderkey | ?column? ---------------------------------------------------------------------- - 1 | 2133 - 1 | 15636 - 1 | 24028 - 1 | 63701 - 1 | 67311 -(5 rows) - --- column expressions are supported -SELECT DISTINCT l_orderkey, l_shipinstruct || l_shipmode FROM lineitem_hash_part ORDER BY 2 , 1 LIMIT 5; - l_orderkey | ?column? ---------------------------------------------------------------------- - 32 | COLLECT CODAIR - 39 | COLLECT CODAIR - 66 | COLLECT CODAIR - 70 | COLLECT CODAIR - 98 | COLLECT CODAIR -(5 rows) - --- function calls with const input are supported -SELECT DISTINCT l_orderkey, strpos('AIR', 'A') FROM lineitem_hash_part ORDER BY 1,2 LIMIT 5; - l_orderkey | strpos ---------------------------------------------------------------------- - 1 | 1 - 2 | 1 - 3 | 1 - 4 | 1 - 5 | 1 -(5 rows) - --- function calls with non-const input are supported -SELECT DISTINCT l_orderkey, strpos(l_shipmode, 'I') - FROM lineitem_hash_part - WHERE strpos(l_shipmode, 'I') > 1 - ORDER BY 2, 1 - LIMIT 5; - l_orderkey | strpos ---------------------------------------------------------------------- - 1 | 2 - 3 | 2 - 5 | 2 - 32 | 2 - 33 | 2 -(5 rows) - --- row types are supported -SELECT DISTINCT (l_orderkey, l_partkey) AS pair FROM lineitem_hash_part ORDER BY 1 LIMIT 5; - pair ---------------------------------------------------------------------- - (1,2132) - (1,15635) - (1,24027) - (1,63700) - (1,67310) -(5 rows) - --- distinct on partition column --- verify counts match with respect to count(distinct) -CREATE TEMP TABLE temp_orderkeys AS SELECT DISTINCT l_orderkey FROM lineitem_hash_part; -SELECT COUNT(*) FROM temp_orderkeys; - count ---------------------------------------------------------------------- - 2985 -(1 row) - -SELECT COUNT(DISTINCT l_orderkey) FROM lineitem_hash_part; - count ---------------------------------------------------------------------- - 2985 -(1 row) - -SELECT DISTINCT l_orderkey FROM lineitem_hash_part WHERE l_orderkey < 500 and l_partkey < 5000 order by 1; - l_orderkey ---------------------------------------------------------------------- - 1 - 3 - 32 - 35 - 39 - 65 - 129 - 130 - 134 - 164 - 194 - 228 - 261 - 290 - 320 - 321 - 354 - 418 -(18 rows) - --- distinct on non-partition column -SELECT DISTINCT l_partkey FROM lineitem_hash_part WHERE l_orderkey > 5 and l_orderkey < 20 order by 1; - l_partkey ---------------------------------------------------------------------- - 79251 - 94780 - 139636 - 145243 - 151894 - 157238 - 163073 - 182052 -(8 rows) - -SELECT DISTINCT l_shipmode FROM lineitem_hash_part ORDER BY 1 DESC; - l_shipmode ---------------------------------------------------------------------- - TRUCK - SHIP - REG AIR - RAIL - MAIL - FOB - AIR -(7 rows) - --- distinct with multiple columns -SELECT DISTINCT l_orderkey, o_orderdate - FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) - WHERE l_orderkey < 10 - ORDER BY l_orderkey; - l_orderkey | o_orderdate ---------------------------------------------------------------------- - 1 | 01-02-1996 - 2 | 12-01-1996 - 3 | 10-14-1993 - 4 | 10-11-1995 - 5 | 07-30-1994 - 6 | 02-21-1992 - 7 | 01-10-1996 -(7 rows) - --- distinct on partition column with aggregate --- this is the same as the one without distinct due to group by -SELECT DISTINCT l_orderkey, count(*) - FROM lineitem_hash_part - WHERE l_orderkey < 200 - GROUP BY 1 - HAVING count(*) > 5 - ORDER BY 2 DESC, 1; - l_orderkey | count ---------------------------------------------------------------------- - 7 | 7 - 68 | 7 - 129 | 7 - 164 | 7 - 194 | 7 - 1 | 6 - 3 | 6 - 32 | 6 - 35 | 6 - 39 | 6 - 67 | 6 - 69 | 6 - 70 | 6 - 71 | 6 - 134 | 6 - 135 | 6 - 163 | 6 - 192 | 6 - 197 | 6 -(19 rows) - --- explain the query to see actual plan -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_orderkey, count(*) - FROM lineitem_hash_part - WHERE l_orderkey < 200 - GROUP BY 1 - HAVING count(*) > 5 - ORDER BY 2 DESC, 1; - QUERY PLAN ---------------------------------------------------------------------- - Sort - Sort Key: remote_scan.count DESC, remote_scan.l_orderkey - -> HashAggregate - Group Key: remote_scan.count, remote_scan.l_orderkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_orderkey - Filter: (count(*) > 5) - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part - Filter: (l_orderkey < 200) -(14 rows) - --- check the plan if the hash aggreate is disabled -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_orderkey, count(*) - FROM lineitem_hash_part - WHERE l_orderkey < 200 - GROUP BY 1 - HAVING count(*) > 5 - ORDER BY 2 DESC, 1; - QUERY PLAN ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: remote_scan.count DESC, remote_scan.l_orderkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_orderkey - Filter: (count(*) > 5) - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part - Filter: (l_orderkey < 200) -(13 rows) - -SET enable_hashagg TO on; --- distinct on aggregate of group by columns, we try to check whether we handle --- queries which does not have any group by column in distinct columns properly. -SELECT DISTINCT count(*) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1; - count ---------------------------------------------------------------------- - 1 - 2 - 3 - 4 -(4 rows) - --- explain the query to see actual plan. We expect to see Aggregate node having --- group by key on count(*) column, since columns in the Group By doesn't guarantee --- the uniqueness of the result. -EXPLAIN (COSTS FALSE) - SELECT DISTINCT count(*) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1; - QUERY PLAN ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) - -> HashAggregate - Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(13 rows) - --- check the plan if the hash aggreate is disabled. We expect to see sort+unique --- instead of aggregate plan node to handle distinct. -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT count(*) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1; - QUERY PLAN ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) - -> GroupAggregate - Group Key: remote_scan.worker_column_2, remote_scan.worker_column_3 - -> Sort - Sort Key: remote_scan.worker_column_2, remote_scan.worker_column_3 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(15 rows) - -SET enable_hashagg TO on; --- Now we have only part of group clause columns in distinct, yet it is still not --- enough to use Group By columns to guarantee uniqueness of result list. -SELECT DISTINCT l_suppkey, count(*) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - l_suppkey | count ---------------------------------------------------------------------- - 1 | 1 - 2 | 1 - 3 | 1 - 4 | 1 - 5 | 1 - 7 | 1 - 10 | 1 - 12 | 1 - 13 | 1 - 14 | 1 -(10 rows) - --- explain the query to see actual plan. Similar to the explain of the query above. -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_suppkey, count(*) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_suppkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) - -> HashAggregate - Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - --- check the plan if the hash aggreate is disabled. Similar to the explain of --- the query above. -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_suppkey, count(*) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_suppkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) - -> GroupAggregate - Group Key: remote_scan.l_suppkey, remote_scan.worker_column_3 - -> Sort - Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_3 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - -SET enable_hashagg TO on; --- Similar to the above query, not with count but avg. Only difference with the --- above query is that, we create run two aggregate functions in workers. -SELECT DISTINCT l_suppkey, avg(l_partkey) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1,2 - LIMIT 10; - l_suppkey | avg ---------------------------------------------------------------------- - 1 | 190000.000000000000 - 2 | 172450.000000000000 - 3 | 112469.000000000000 - 3 | 134976.000000000000 - 4 | 112470.000000000000 - 4 | 142461.000000000000 - 5 | 182450.000000000000 - 7 | 137493.000000000000 - 10 | 150009.000000000000 - 12 | 17510.0000000000000000 -(10 rows) - --- explain the query to see actual plan. Similar to the explain of the query above. --- Only aggregate functions will be changed. -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_suppkey, avg(l_partkey) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1,2 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) - -> HashAggregate - Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - --- check the plan if the hash aggreate is disabled. This explain errors out due --- to a bug right now, expectation must be corrected after fixing it. -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_suppkey, avg(l_partkey) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1,2 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_suppkey, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) - -> GroupAggregate - Group Key: remote_scan.l_suppkey, remote_scan.worker_column_4 - -> Sort - Sort Key: remote_scan.l_suppkey, remote_scan.worker_column_4 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - -SET enable_hashagg TO on; --- Similar to the above query but with distinct on -SELECT DISTINCT ON (l_suppkey) avg(l_partkey) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY l_suppkey,1 - LIMIT 10; - avg ---------------------------------------------------------------------- - 190000.000000000000 - 172450.000000000000 - 112469.000000000000 - 112470.000000000000 - 182450.000000000000 - 137493.000000000000 - 150009.000000000000 - 17510.0000000000000000 - 87504.000000000000 - 77506.000000000000 -(10 rows) - --- explain the query to see actual plan. We expect to see sort+unique to handle --- distinct on. -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (l_suppkey) avg(l_partkey) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY l_suppkey,1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.worker_column_3, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) - -> HashAggregate - Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - --- check the plan if the hash aggreate is disabled. We expect to see sort+unique to --- handle distinct on. -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (l_suppkey) avg(l_partkey) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY l_suppkey,1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.worker_column_3, ((pg_catalog.sum(remote_scan.avg) / pg_catalog.sum(remote_scan.avg_1))) - -> GroupAggregate - Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Sort - Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - -SET enable_hashagg TO on; --- distinct with expression and aggregation -SELECT DISTINCT avg(ceil(l_partkey / 2)) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - avg ---------------------------------------------------------------------- - 9 - 39 - 74 - 87 - 89 - 91 - 97 - 102 - 111 - 122 -(10 rows) - --- explain the query to see actual plan -EXPLAIN (COSTS FALSE) - SELECT DISTINCT avg(ceil(l_partkey / 2)) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision)) - -> HashAggregate - Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - --- check the plan if the hash aggreate is disabled. This explain errors out due --- to a bug right now, expectation must be corrected after fixing it. -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT avg(ceil(l_partkey / 2)) - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: ((sum(remote_scan.avg) / (pg_catalog.sum(remote_scan.avg_1))::double precision)) - -> GroupAggregate - Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Sort - Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - -SET enable_hashagg TO on; --- expression among aggregations. -SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - dis ---------------------------------------------------------------------- - 2 - 3 - 4 - 5 - 6 - 8 - 11 - 13 - 14 - 15 -(10 rows) - --- explain the query to see actual plan -EXPLAIN (COSTS FALSE) - SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint))) - -> HashAggregate - Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - --- check the plan if the hash aggreate is disabled. This explain errors out due --- to a bug right now, expectation must be corrected after fixing it. -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT sum(l_suppkey) + count(l_partkey) AS dis - FROM lineitem_hash_part - GROUP BY l_suppkey, l_linenumber - ORDER BY 1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: (((pg_catalog.sum(remote_scan.dis))::bigint + COALESCE((pg_catalog.sum(remote_scan.dis_1))::bigint, '0'::bigint))) - -> GroupAggregate - Group Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Sort - Sort Key: remote_scan.worker_column_3, remote_scan.worker_column_4 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey, l_linenumber - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - -SET enable_hashagg TO on; --- distinct on all columns, note Group By columns guarantees uniqueness of the --- result list. -SELECT DISTINCT * - FROM lineitem_hash_part - GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 - ORDER BY 1,2 - LIMIT 10; - l_orderkey | l_partkey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate | l_shipinstruct | l_shipmode | l_comment ---------------------------------------------------------------------- - 1 | 2132 | 4633 | 4 | 28.00 | 28955.64 | 0.09 | 0.06 | N | O | 04-21-1996 | 03-30-1996 | 05-16-1996 | NONE | AIR | lites. fluffily even de - 1 | 15635 | 638 | 6 | 32.00 | 49620.16 | 0.07 | 0.02 | N | O | 01-30-1996 | 02-07-1996 | 02-03-1996 | DELIVER IN PERSON | MAIL | arefully slyly ex - 1 | 24027 | 1534 | 5 | 24.00 | 22824.48 | 0.10 | 0.04 | N | O | 03-30-1996 | 03-14-1996 | 04-01-1996 | NONE | FOB | pending foxes. slyly re - 1 | 63700 | 3701 | 3 | 8.00 | 13309.60 | 0.10 | 0.02 | N | O | 01-29-1996 | 03-05-1996 | 01-31-1996 | TAKE BACK RETURN | REG AIR | riously. regular, express dep - 1 | 67310 | 7311 | 2 | 36.00 | 45983.16 | 0.09 | 0.06 | N | O | 04-12-1996 | 02-28-1996 | 04-20-1996 | TAKE BACK RETURN | MAIL | ly final dependencies: slyly bold - 1 | 155190 | 7706 | 1 | 17.00 | 21168.23 | 0.04 | 0.02 | N | O | 03-13-1996 | 02-12-1996 | 03-22-1996 | DELIVER IN PERSON | TRUCK | egular courts above the - 2 | 106170 | 1191 | 1 | 38.00 | 44694.46 | 0.00 | 0.05 | N | O | 01-28-1997 | 01-14-1997 | 02-02-1997 | TAKE BACK RETURN | RAIL | ven requests. deposits breach a - 3 | 4297 | 1798 | 1 | 45.00 | 54058.05 | 0.06 | 0.00 | R | F | 02-02-1994 | 01-04-1994 | 02-23-1994 | NONE | AIR | ongside of the furiously brave acco - 3 | 19036 | 6540 | 2 | 49.00 | 46796.47 | 0.10 | 0.00 | R | F | 11-09-1993 | 12-20-1993 | 11-24-1993 | TAKE BACK RETURN | RAIL | unusual accounts. eve - 3 | 29380 | 1883 | 4 | 2.00 | 2618.76 | 0.01 | 0.06 | A | F | 12-04-1993 | 01-07-1994 | 01-01-1994 | NONE | TRUCK | y. fluffily pending d -(10 rows) - --- explain the query to see actual plan. We expect to see only one aggregation --- node since group by columns guarantees the uniqueness. -EXPLAIN (COSTS FALSE) - SELECT DISTINCT * - FROM lineitem_hash_part - GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 - ORDER BY 1,2 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Sort - Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey - -> HashAggregate - Group Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Unique - -> Group - Group Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment - -> Sort - Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(17 rows) - --- check the plan if the hash aggreate is disabled. We expect to see only one --- aggregation node since group by columns guarantees the uniqueness. -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT * - FROM lineitem_hash_part - GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 - ORDER BY 1,2 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey, remote_scan.l_linenumber, remote_scan.l_quantity, remote_scan.l_extendedprice, remote_scan.l_discount, remote_scan.l_tax, remote_scan.l_returnflag, remote_scan.l_linestatus, remote_scan.l_shipdate, remote_scan.l_commitdate, remote_scan.l_receiptdate, remote_scan.l_shipinstruct, remote_scan.l_shipmode, remote_scan.l_comment - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Unique - -> Group - Group Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment - -> Sort - Sort Key: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - -SET enable_hashagg TO on; --- distinct on count distinct -SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode) - FROM lineitem_hash_part - GROUP BY l_orderkey - ORDER BY 1,2; - count | count ---------------------------------------------------------------------- - 1 | 1 - 2 | 1 - 2 | 2 - 3 | 1 - 3 | 2 - 3 | 3 - 4 | 1 - 4 | 2 - 4 | 3 - 4 | 4 - 5 | 2 - 5 | 3 - 5 | 4 - 5 | 5 - 6 | 2 - 6 | 3 - 6 | 4 - 6 | 5 - 6 | 6 - 7 | 2 - 7 | 3 - 7 | 4 - 7 | 5 - 7 | 6 - 7 | 7 -(25 rows) - --- explain the query to see actual plan. We expect to see aggregation plan for --- the outer distinct. -EXPLAIN (COSTS FALSE) - SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode) - FROM lineitem_hash_part - GROUP BY l_orderkey - ORDER BY 1,2; - QUERY PLAN ---------------------------------------------------------------------- - Sort - Sort Key: remote_scan.count, remote_scan.count_1 - -> HashAggregate - Group Key: remote_scan.count, remote_scan.count_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> GroupAggregate - Group Key: l_orderkey - -> Sort - Sort Key: l_orderkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - --- check the plan if the hash aggreate is disabled. We expect to see sort + unique --- plans for the outer distinct. -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT count(DISTINCT l_partkey), count(DISTINCT l_shipmode) - FROM lineitem_hash_part - GROUP BY l_orderkey - ORDER BY 1,2; - QUERY PLAN ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: remote_scan.count, remote_scan.count_1 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> GroupAggregate - Group Key: l_orderkey - -> Sort - Sort Key: l_orderkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(13 rows) - -SET enable_hashagg TO on; --- distinct on aggregation with filter and expression -SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count - FROM lineitem_hash_part - GROUP BY l_suppkey - ORDER BY 1; - count ---------------------------------------------------------------------- - 0 - 1 - 2 - 3 - 4 -(5 rows) - --- explain the query to see actual plan -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count - FROM lineitem_hash_part - GROUP BY l_suppkey - ORDER BY 1; - QUERY PLAN ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision)) - -> HashAggregate - Group Key: remote_scan.worker_column_2 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(13 rows) - --- check the plan if the hash aggreate is disabled -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ceil(count(case when l_partkey > 100000 THEN 1 ELSE 0 END) / 2) AS count - FROM lineitem_hash_part - GROUP BY l_suppkey - ORDER BY 1; - QUERY PLAN ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: (ceil(((COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint) / 2))::double precision)) - -> GroupAggregate - Group Key: remote_scan.worker_column_2 - -> Sort - Sort Key: remote_scan.worker_column_2 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_suppkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(15 rows) - -SET enable_hashagg TO on; --- explain the query to see actual plan with array_agg aggregation. -EXPLAIN (COSTS FALSE) - SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1) - FROM lineitem_hash_part - GROUP BY l_orderkey - ORDER BY 2 - LIMIT 15; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Sort - Sort Key: remote_scan.array_length - -> HashAggregate - Group Key: remote_scan.array_length, remote_scan.array_agg - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> GroupAggregate - Group Key: l_orderkey - -> Sort - Sort Key: l_orderkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(15 rows) - --- check the plan if the hash aggreate is disabled. -SET enable_hashagg TO off; -EXPLAIN (COSTS FALSE) - SELECT DISTINCT array_agg(l_linenumber), array_length(array_agg(l_linenumber), 1) - FROM lineitem_hash_part - GROUP BY l_orderkey - ORDER BY 2 - LIMIT 15; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.array_length, remote_scan.array_agg - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> GroupAggregate - Group Key: l_orderkey - -> Sort - Sort Key: l_orderkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - -SET enable_hashagg TO on; --- distinct on non-partition column with aggregate --- this is the same as non-distinct version due to group by -SELECT DISTINCT l_partkey, count(*) - FROM lineitem_hash_part - GROUP BY 1 - HAVING count(*) > 2 - ORDER BY 1; - l_partkey | count ---------------------------------------------------------------------- - 1051 | 3 - 1927 | 3 - 6983 | 3 - 15283 | 3 - 87761 | 3 - 136884 | 3 - 149926 | 3 - 160895 | 3 - 177771 | 3 - 188804 | 3 - 199146 | 3 -(11 rows) - --- explain the query to see actual plan -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_partkey, count(*) - FROM lineitem_hash_part - GROUP BY 1 - HAVING count(*) > 2 - ORDER BY 1; - QUERY PLAN ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: remote_scan.l_partkey, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)) - -> HashAggregate - Group Key: remote_scan.l_partkey - Filter: (COALESCE((pg_catalog.sum(remote_scan.worker_column_3))::bigint, '0'::bigint) > 2) - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: l_partkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - --- distinct on non-partition column and avg -SELECT DISTINCT l_partkey, avg(l_linenumber) - FROM lineitem_hash_part - WHERE l_partkey < 500 - GROUP BY 1 - HAVING avg(l_linenumber) > 2 - ORDER BY 1; - l_partkey | avg ---------------------------------------------------------------------- - 18 | 7.0000000000000000 - 79 | 6.0000000000000000 - 149 | 4.5000000000000000 - 175 | 5.0000000000000000 - 179 | 6.0000000000000000 - 182 | 3.0000000000000000 - 222 | 4.0000000000000000 - 278 | 3.0000000000000000 - 299 | 7.0000000000000000 - 308 | 7.0000000000000000 - 309 | 5.0000000000000000 - 321 | 3.0000000000000000 - 337 | 6.0000000000000000 - 364 | 3.0000000000000000 - 403 | 4.0000000000000000 -(15 rows) - --- distinct on multiple non-partition columns -SELECT DISTINCT l_partkey, l_suppkey - FROM lineitem_hash_part - WHERE l_shipmode = 'AIR' AND l_orderkey < 100 - ORDER BY 1, 2; - l_partkey | l_suppkey ---------------------------------------------------------------------- - 2132 | 4633 - 4297 | 1798 - 37531 | 35 - 44161 | 6666 - 44706 | 4707 - 67831 | 5350 - 85811 | 8320 - 94368 | 6878 - 108338 | 849 - 108570 | 8571 - 137267 | 4807 - 137469 | 9983 - 173489 | 3490 - 196156 | 1195 - 197921 | 441 -(15 rows) - -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_partkey, l_suppkey - FROM lineitem_hash_part - WHERE l_shipmode = 'AIR' AND l_orderkey < 100 - ORDER BY 1, 2; - QUERY PLAN ---------------------------------------------------------------------- - Sort - Sort Key: remote_scan.l_partkey, remote_scan.l_suppkey - -> HashAggregate - Group Key: remote_scan.l_partkey, remote_scan.l_suppkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Unique - -> Sort - Sort Key: l_partkey, l_suppkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part - Filter: ((l_orderkey < 100) AND (l_shipmode = 'AIR'::bpchar)) -(14 rows) - --- distinct on partition column -SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey, l_suppkey - FROM lineitem_hash_part - WHERE l_orderkey < 35 - ORDER BY 1, 2, 3; - l_orderkey | l_partkey | l_suppkey ---------------------------------------------------------------------- - 1 | 2132 | 4633 - 2 | 106170 | 1191 - 3 | 4297 | 1798 - 4 | 88035 | 5560 - 5 | 37531 | 35 - 6 | 139636 | 2150 - 7 | 79251 | 1759 - 32 | 2743 | 7744 - 33 | 33918 | 3919 - 34 | 88362 | 871 -(10 rows) - -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey, l_suppkey - FROM lineitem_hash_part - WHERE l_orderkey < 35 - ORDER BY 1, 2, 3; - QUERY PLAN ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey, remote_scan.l_suppkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Unique - -> Sort - Sort Key: l_orderkey, l_partkey, l_suppkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part - Filter: (l_orderkey < 35) -(13 rows) - --- distinct on non-partition column --- note order by is required here --- otherwise query results will be different since --- distinct on clause is on non-partition column -SELECT DISTINCT ON (l_partkey) l_partkey, l_orderkey - FROM lineitem_hash_part - ORDER BY 1,2 - LIMIT 20; - l_partkey | l_orderkey ---------------------------------------------------------------------- - 18 | 12005 - 79 | 5121 - 91 | 2883 - 149 | 807 - 175 | 4102 - 179 | 2117 - 182 | 548 - 195 | 2528 - 204 | 10048 - 222 | 9413 - 245 | 9446 - 278 | 1287 - 299 | 1122 - 308 | 11137 - 309 | 2374 - 318 | 321 - 321 | 5984 - 337 | 10403 - 350 | 13698 - 358 | 4323 -(20 rows) - -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (l_partkey) l_partkey, l_orderkey - FROM lineitem_hash_part - ORDER BY 1,2 - LIMIT 20; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_partkey, remote_scan.l_orderkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Unique - -> Sort - Sort Key: l_partkey, l_orderkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(14 rows) - --- distinct on with joins --- each customer's first order key -SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey - FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) - WHERE o_custkey < 15 - ORDER BY 1,2; - o_custkey | l_orderkey ---------------------------------------------------------------------- - 1 | 9154 - 2 | 10563 - 4 | 320 - 5 | 11682 - 7 | 10402 - 8 | 102 - 10 | 1602 - 11 | 12800 - 13 | 994 - 14 | 11011 -(10 rows) - -SELECT coordinator_plan($Q$ -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey - FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) - WHERE o_custkey < 15 - ORDER BY 1,2; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: remote_scan.o_custkey, remote_scan.l_orderkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(5 rows) - --- explain without order by --- notice master plan has order by on distinct on column -SELECT coordinator_plan($Q$ -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (o_custkey) o_custkey, l_orderkey - FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) - WHERE o_custkey < 15; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: remote_scan.o_custkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(5 rows) - --- each customer's each order's first l_partkey -SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey - FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) - WHERE o_custkey < 20 - ORDER BY 1,2,3; - o_custkey | l_orderkey | l_linenumber | l_partkey ---------------------------------------------------------------------- - 1 | 9154 | 1 | 86513 - 1 | 14656 | 1 | 59539 - 2 | 10563 | 1 | 147459 - 4 | 320 | 1 | 4415 - 4 | 739 | 1 | 84489 - 4 | 10688 | 1 | 45037 - 4 | 10788 | 1 | 50814 - 4 | 13728 | 1 | 86216 - 5 | 11682 | 1 | 31634 - 5 | 11746 | 1 | 180724 - 5 | 14308 | 1 | 157430 - 7 | 10402 | 1 | 53661 - 7 | 13031 | 1 | 112161 - 7 | 14145 | 1 | 138729 - 7 | 14404 | 1 | 143034 - 8 | 102 | 1 | 88914 - 8 | 164 | 1 | 91309 - 8 | 13601 | 1 | 40504 - 10 | 1602 | 1 | 182806 - 10 | 9862 | 1 | 86241 - 10 | 11431 | 1 | 62112 - 10 | 13124 | 1 | 29414 - 11 | 12800 | 1 | 152806 - 13 | 994 | 1 | 64486 - 13 | 1603 | 1 | 38191 - 13 | 4704 | 1 | 77934 - 13 | 9927 | 1 | 875 - 14 | 11011 | 1 | 172485 - 17 | 896 | 1 | 38675 - 17 | 5507 | 1 | 9600 - 19 | 353 | 1 | 119305 - 19 | 1504 | 1 | 81389 - 19 | 1669 | 1 | 78373 - 19 | 5893 | 1 | 133707 - 19 | 9954 | 1 | 92138 - 19 | 14885 | 1 | 36154 -(36 rows) - --- explain without order by -SELECT coordinator_plan($Q$ -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey - FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) - WHERE o_custkey < 20; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Unique - -> Sort - Sort Key: remote_scan.o_custkey, remote_scan.l_orderkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(5 rows) - --- each customer's each order's last l_partkey -SELECT DISTINCT ON (o_custkey, l_orderkey) o_custkey, l_orderkey, l_linenumber, l_partkey - FROM lineitem_hash_part JOIN orders_hash_part ON (l_orderkey = o_orderkey) - WHERE o_custkey < 15 - ORDER BY 1,2,3 DESC; - o_custkey | l_orderkey | l_linenumber | l_partkey ---------------------------------------------------------------------- - 1 | 9154 | 7 | 173448 - 1 | 14656 | 1 | 59539 - 2 | 10563 | 4 | 110741 - 4 | 320 | 2 | 192158 - 4 | 739 | 5 | 187523 - 4 | 10688 | 2 | 132574 - 4 | 10788 | 4 | 196473 - 4 | 13728 | 3 | 12450 - 5 | 11682 | 3 | 177152 - 5 | 11746 | 7 | 193807 - 5 | 14308 | 3 | 140916 - 7 | 10402 | 2 | 64514 - 7 | 13031 | 6 | 7761 - 7 | 14145 | 6 | 130723 - 7 | 14404 | 7 | 35349 - 8 | 102 | 4 | 61158 - 8 | 164 | 7 | 3037 - 8 | 13601 | 5 | 12470 - 10 | 1602 | 1 | 182806 - 10 | 9862 | 5 | 135675 - 10 | 11431 | 7 | 8563 - 10 | 13124 | 3 | 67055 - 11 | 12800 | 5 | 179110 - 13 | 994 | 4 | 130471 - 13 | 1603 | 2 | 65209 - 13 | 4704 | 3 | 63081 - 13 | 9927 | 6 | 119356 - 14 | 11011 | 7 | 95939 -(28 rows) - --- subqueries -SELECT DISTINCT l_orderkey, l_partkey - FROM ( - SELECT l_orderkey, l_partkey - FROM lineitem_hash_part - ) q - ORDER BY 1,2 - LIMIT 10; - l_orderkey | l_partkey ---------------------------------------------------------------------- - 1 | 2132 - 1 | 15635 - 1 | 24027 - 1 | 63700 - 1 | 67310 - 1 | 155190 - 2 | 106170 - 3 | 4297 - 3 | 19036 - 3 | 29380 -(10 rows) - -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_orderkey, l_partkey - FROM ( - SELECT l_orderkey, l_partkey - FROM lineitem_hash_part - ) q - ORDER BY 1,2 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Sort - Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey - -> HashAggregate - Group Key: remote_scan.l_orderkey, remote_scan.l_partkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Sort - Sort Key: l_orderkey, l_partkey - -> HashAggregate - Group Key: l_orderkey, l_partkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - -SELECT DISTINCT l_orderkey, cnt - FROM ( - SELECT l_orderkey, count(*) as cnt - FROM lineitem_hash_part - GROUP BY 1 - ) q - ORDER BY 1,2 - LIMIT 10; - l_orderkey | cnt ---------------------------------------------------------------------- - 1 | 6 - 2 | 1 - 3 | 6 - 4 | 1 - 5 | 3 - 6 | 1 - 7 | 7 - 32 | 6 - 33 | 4 - 34 | 3 -(10 rows) - -EXPLAIN (COSTS FALSE) - SELECT DISTINCT l_orderkey, cnt - FROM ( - SELECT l_orderkey, count(*) as cnt - FROM lineitem_hash_part - GROUP BY 1 - ) q - ORDER BY 1,2 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Sort - Sort Key: remote_scan.l_orderkey, remote_scan.cnt - -> HashAggregate - Group Key: remote_scan.l_orderkey, remote_scan.cnt - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Sort - Sort Key: lineitem_hash_part.l_orderkey, (count(*)) - -> HashAggregate - Group Key: lineitem_hash_part.l_orderkey, count(*) - -> HashAggregate - Group Key: lineitem_hash_part.l_orderkey - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(18 rows) - --- distinct on partition column --- random() is added to inner query to prevent flattening -SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey - FROM ( - SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r - FROM lineitem_hash_part - ) q - WHERE r > 1 - ORDER BY 1,2 - LIMIT 10; - l_orderkey | l_partkey ---------------------------------------------------------------------- - 1 | 2132 - 2 | 106170 - 3 | 4297 - 4 | 88035 - 5 | 37531 - 6 | 139636 - 7 | 79251 - 32 | 2743 - 33 | 33918 - 34 | 88362 -(10 rows) - -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (l_orderkey) l_orderkey, l_partkey - FROM ( - SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r - FROM lineitem_hash_part - ) q - WHERE r > 1 - ORDER BY 1,2 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_orderkey, remote_scan.l_partkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Unique - -> Sort - Sort Key: q.l_orderkey, q.l_partkey - -> Subquery Scan on q - Filter: (q.r > 1) - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - --- distinct on non-partition column -SELECT DISTINCT ON (l_partkey) l_orderkey, l_partkey - FROM ( - SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r - FROM lineitem_hash_part - ) q - WHERE r > 1 - ORDER BY 2,1 - LIMIT 10; - l_orderkey | l_partkey ---------------------------------------------------------------------- - 12005 | 18 - 5121 | 79 - 2883 | 91 - 807 | 149 - 4102 | 175 - 2117 | 179 - 548 | 182 - 2528 | 195 - 10048 | 204 - 9413 | 222 -(10 rows) - -EXPLAIN (COSTS FALSE) - SELECT DISTINCT ON (l_partkey) l_orderkey, l_partkey - FROM ( - SELECT l_orderkey, l_partkey, (random()*10)::int + 2 as r - FROM lineitem_hash_part - ) q - WHERE r > 1 - ORDER BY 2,1 - LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Unique - -> Sort - Sort Key: remote_scan.l_partkey, remote_scan.l_orderkey - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Unique - -> Sort - Sort Key: q.l_partkey, q.l_orderkey - -> Subquery Scan on q - Filter: (q.r > 1) - -> Seq Scan on lineitem_hash_part_360041 lineitem_hash_part -(16 rows) - diff --git a/src/test/regress/expected/window_functions.out b/src/test/regress/expected/window_functions.out index 6657c3670..6f30a49e3 100644 --- a/src/test/regress/expected/window_functions.out +++ b/src/test/regress/expected/window_functions.out @@ -1,6 +1,11 @@ +-- +-- WINDOW_FUNCTIONS -- =================================================================== -- test top level window functions that are pushdownable -- =================================================================== +-- This test file has an alternative output because of use of +-- incremental sort in some explain outputs in PG13 +-- -- a very simple window function with an aggregate and a window function -- distribution column is on the partition by clause SELECT @@ -1382,20 +1387,15 @@ LIMIT 5; -> Task Node: host=localhost port=xxxxx dbname=regression -> Limit - -> Incremental Sort + -> Sort Sort Key: users_table.user_id, (avg(users_table.value_1)) DESC - Presorted Key: users_table.user_id -> WindowAgg - -> Incremental Sort + -> Sort Sort Key: users_table.user_id, (('1'::numeric / ('1'::numeric + avg(users_table.value_1)))) - Presorted Key: users_table.user_id - -> GroupAggregate + -> HashAggregate Group Key: users_table.user_id, users_table.value_2 - -> Incremental Sort - Sort Key: users_table.user_id, users_table.value_2 - Presorted Key: users_table.user_id - -> Index Scan using is_index1_1400256 on users_table_1400256 users_table -(22 rows) + -> Seq Scan on users_table_1400256 users_table +(17 rows) EXPLAIN (COSTS FALSE) SELECT @@ -1418,20 +1418,15 @@ LIMIT 5; -> Task Node: host=localhost port=xxxxx dbname=regression -> Limit - -> Incremental Sort + -> Sort Sort Key: users_table.user_id, (avg(users_table.value_1)) DESC - Presorted Key: users_table.user_id -> WindowAgg - -> Incremental Sort + -> Sort Sort Key: users_table.user_id, (('1'::numeric / ('1'::numeric + avg(users_table.value_1)))) - Presorted Key: users_table.user_id - -> GroupAggregate + -> HashAggregate Group Key: users_table.user_id, users_table.value_2 - -> Incremental Sort - Sort Key: users_table.user_id, users_table.value_2 - Presorted Key: users_table.user_id - -> Index Scan using is_index1_1400256 on users_table_1400256 users_table -(22 rows) + -> Seq Scan on users_table_1400256 users_table +(17 rows) EXPLAIN (COSTS FALSE) SELECT @@ -1443,7 +1438,7 @@ FROM GROUP BY user_id, value_2 ORDER BY user_id, avg(value_1) DESC LIMIT 5; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Limit -> Sort @@ -1454,20 +1449,15 @@ LIMIT 5; -> Task Node: host=localhost port=xxxxx dbname=regression -> Limit - -> Incremental Sort + -> Sort Sort Key: users_table.user_id, (avg(users_table.value_1)) DESC - Presorted Key: users_table.user_id -> WindowAgg - -> Incremental Sort + -> Sort Sort Key: users_table.user_id, ((1 / (1 + sum(users_table.value_2)))) - Presorted Key: users_table.user_id - -> GroupAggregate + -> HashAggregate Group Key: users_table.user_id, users_table.value_2 - -> Incremental Sort - Sort Key: users_table.user_id, users_table.value_2 - Presorted Key: users_table.user_id - -> Index Scan using is_index1_1400256 on users_table_1400256 users_table -(22 rows) + -> Seq Scan on users_table_1400256 users_table +(17 rows) EXPLAIN (COSTS FALSE) SELECT @@ -1479,7 +1469,7 @@ FROM GROUP BY user_id, value_2 ORDER BY user_id, avg(value_1) DESC LIMIT 5; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Limit -> Sort @@ -1490,20 +1480,15 @@ LIMIT 5; -> Task Node: host=localhost port=xxxxx dbname=regression -> Limit - -> Incremental Sort + -> Sort Sort Key: users_table.user_id, (avg(users_table.value_1)) DESC - Presorted Key: users_table.user_id -> WindowAgg - -> Incremental Sort + -> Sort Sort Key: users_table.user_id, (sum(users_table.value_2)) - Presorted Key: users_table.user_id - -> GroupAggregate + -> HashAggregate Group Key: users_table.user_id, users_table.value_2 - -> Incremental Sort - Sort Key: users_table.user_id, users_table.value_2 - Presorted Key: users_table.user_id - -> Index Scan using is_index1_1400256 on users_table_1400256 users_table -(22 rows) + -> Seq Scan on users_table_1400256 users_table +(17 rows) -- Grouping can be pushed down with aggregates even when window function can't EXPLAIN (COSTS FALSE) diff --git a/src/test/regress/expected/window_functions_0.out b/src/test/regress/expected/window_functions_0.out index 0a41bc0cc..c5a132301 100644 --- a/src/test/regress/expected/window_functions_0.out +++ b/src/test/regress/expected/window_functions_0.out @@ -1,6 +1,11 @@ +-- +-- WINDOW_FUNCTIONS -- =================================================================== -- test top level window functions that are pushdownable -- =================================================================== +-- This test file has an alternative output because of use of +-- incremental sort in some explain outputs in PG13 +-- -- a very simple window function with an aggregate and a window function -- distribution column is on the partition by clause SELECT diff --git a/src/test/regress/expected/window_functions_1.out b/src/test/regress/expected/window_functions_1.out deleted file mode 100644 index aea319c0b..000000000 --- a/src/test/regress/expected/window_functions_1.out +++ /dev/null @@ -1,1648 +0,0 @@ --- =================================================================== --- test top level window functions that are pushdownable --- =================================================================== --- a very simple window function with an aggregate and a window function --- distribution column is on the partition by clause -SELECT - user_id, COUNT(*) OVER (PARTITION BY user_id), - rank() OVER (PARTITION BY user_id) -FROM - users_table -ORDER BY - 1 DESC, 2 DESC, 3 DESC -LIMIT 5; - user_id | count | rank ---------------------------------------------------------------------- - 6 | 10 | 1 - 6 | 10 | 1 - 6 | 10 | 1 - 6 | 10 | 1 - 6 | 10 | 1 -(5 rows) - --- a more complicated window clause, including an aggregate --- in both the window clause and the target entry -SELECT - user_id, avg(avg(value_3)) OVER (PARTITION BY user_id, MIN(value_2)) -FROM - users_table -GROUP BY - 1 -ORDER BY - 2 DESC NULLS LAST, 1 DESC; - user_id | avg ---------------------------------------------------------------------- - 2 | 3 - 4 | 2.82608695652174 - 3 | 2.70588235294118 - 6 | 2.6 - 1 | 2.57142857142857 - 5 | 2.46153846153846 -(6 rows) - --- window clause operates on the results of a subquery -SELECT - user_id, max(value_1) OVER (PARTITION BY user_id, MIN(value_2)) -FROM ( - SELECT - DISTINCT us.user_id, us.value_2, value_1, random() as r1 - FROM - users_table as us, events_table - WHERE - us.user_id = events_table.user_id AND event_type IN (1,2) - ORDER BY - user_id, value_2 - ) s -GROUP BY - 1, value_1 -ORDER BY - 2 DESC, 1; - user_id | max ---------------------------------------------------------------------- - 1 | 5 - 3 | 5 - 3 | 5 - 4 | 5 - 5 | 5 - 5 | 5 - 6 | 5 - 6 | 5 - 1 | 4 - 2 | 4 - 3 | 4 - 3 | 4 - 3 | 4 - 4 | 4 - 4 | 4 - 5 | 4 - 5 | 4 - 1 | 3 - 2 | 3 - 2 | 3 - 2 | 3 - 6 | 3 - 2 | 2 - 4 | 2 - 4 | 2 - 4 | 2 - 6 | 2 - 1 | 1 - 3 | 1 - 5 | 1 - 6 | 1 - 5 | 0 -(32 rows) - --- window function operates on the results of --- a join --- we also want to verify that this doesn't crash --- when the logging level is DEBUG4 -SET log_min_messages TO DEBUG4; -SELECT - us.user_id, - SUM(us.value_1) OVER (PARTITION BY us.user_id) -FROM - users_table us - JOIN - events_table ev - ON (us.user_id = ev.user_id) -GROUP BY - 1, - value_1 -ORDER BY - 1, - 2 -LIMIT 5; - user_id | sum ---------------------------------------------------------------------- - 1 | 13 - 1 | 13 - 1 | 13 - 1 | 13 - 2 | 10 -(5 rows) - --- the same query, but this time join with an alias -SELECT - user_id, value_1, SUM(j.value_1) OVER (PARTITION BY j.user_id) -FROM - (users_table us - JOIN - events_table ev - USING (user_id ) - ) j -GROUP BY - user_id, - value_1 -ORDER BY - 3 DESC, 2 DESC, 1 DESC -LIMIT 5; - user_id | value_1 | sum ---------------------------------------------------------------------- - 5 | 5 | 15 - 4 | 5 | 15 - 3 | 5 | 15 - 5 | 4 | 15 - 4 | 4 | 15 -(5 rows) - --- querying views that have window functions should be ok -CREATE VIEW window_view AS -SELECT - DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1) -FROM - users_table -GROUP BY - user_id, value_1 -HAVING count(*) > 1; --- Window function in View works -SELECT * -FROM - window_view -ORDER BY - 2 DESC, 1 -LIMIT 10; - user_id | rank ---------------------------------------------------------------------- - 5 | 6 - 2 | 5 - 4 | 5 - 5 | 5 - 2 | 4 - 3 | 4 - 4 | 4 - 5 | 4 - 6 | 4 - 2 | 3 -(10 rows) - --- the other way around also should work fine --- query a view using window functions -CREATE VIEW users_view AS SELECT * FROM users_table; -SELECT - DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1) -FROM - users_view -GROUP BY - user_id, value_1 -HAVING count(*) > 4 -ORDER BY - 2 DESC, 1; - user_id | rank ---------------------------------------------------------------------- - 4 | 2 - 5 | 2 - 2 | 1 - 3 | 1 - 4 | 1 - 5 | 1 -(6 rows) - -DROP VIEW users_view, window_view; --- window functions along with subquery in HAVING -SELECT - user_id, count (user_id) OVER (PARTITION BY user_id) -FROM - users_table -GROUP BY - user_id HAVING avg(value_1) < (SELECT min(k_no) FROM users_ref_test_table) -ORDER BY 1 DESC,2 DESC -LIMIT 1; - user_id | count ---------------------------------------------------------------------- - 6 | 1 -(1 row) - --- window function uses columns from two different tables -SELECT - DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk -FROM - events_table, users_table -WHERE - users_table.user_id = events_table.user_id -WINDOW - my_win AS (PARTITION BY events_table.user_id, users_table.value_1 ORDER BY events_table.time DESC) -ORDER BY - rnk DESC, 1 DESC -LIMIT 10; - user_id | rnk ---------------------------------------------------------------------- - 3 | 121 - 5 | 118 - 2 | 116 - 3 | 115 - 4 | 113 - 2 | 111 - 5 | 109 - 3 | 109 - 4 | 106 - 2 | 106 -(10 rows) - --- the same query with reference table column is also on the partition by clause -SELECT - DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk -FROM - events_table, users_ref_test_table uref -WHERE - uref.id = events_table.user_id -WINDOW - my_win AS (PARTITION BY events_table.user_id, uref.k_no ORDER BY events_table.time DESC) -ORDER BY - rnk DESC, 1 DESC -LIMIT 10; - user_id | rnk ---------------------------------------------------------------------- - 2 | 24 - 2 | 23 - 2 | 22 - 3 | 21 - 2 | 21 - 3 | 20 - 2 | 20 - 3 | 19 - 2 | 19 - 3 | 18 -(10 rows) - --- similar query with no distribution column on the partition by clause -SELECT - DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk -FROM - events_table, users_ref_test_table uref -WHERE - uref.id = events_table.user_id -WINDOW - my_win AS (PARTITION BY events_table.value_2, uref.k_no ORDER BY events_table.time DESC) -ORDER BY - rnk DESC, 1 DESC -LIMIT 10; - user_id | rnk ---------------------------------------------------------------------- - 3 | 7 - 2 | 7 - 3 | 6 - 2 | 6 - 4 | 5 - 3 | 5 - 2 | 5 - 1 | 5 - 6 | 4 - 5 | 4 -(10 rows) - --- ORDER BY in the window function is an aggregate -SELECT - user_id, rank() OVER my_win as rnk, avg(value_2) as avg_val_2 -FROM - events_table -GROUP BY - user_id, date_trunc('day', time) -WINDOW - my_win AS (PARTITION BY user_id ORDER BY avg(event_type) DESC) -ORDER BY - 3 DESC, 2 DESC, 1 DESC; - user_id | rnk | avg_val_2 ---------------------------------------------------------------------- - 1 | 1 | 3.3750000000000000 - 3 | 2 | 3.1666666666666667 - 5 | 1 | 2.6666666666666667 - 6 | 1 | 2.5000000000000000 - 4 | 1 | 2.5000000000000000 - 2 | 1 | 2.4736842105263158 - 4 | 2 | 2.4000000000000000 - 1 | 2 | 2.1428571428571429 - 5 | 2 | 2.0909090909090909 - 6 | 2 | 2.0000000000000000 - 2 | 2 | 2.0000000000000000 - 3 | 1 | 1.8000000000000000 -(12 rows) - --- lets push the limits of writing complex expressions aling with the window functions -SELECT - COUNT(*) OVER (PARTITION BY user_id, user_id + 1), - rank() OVER (PARTITION BY user_id) as cnt1, - COUNT(*) OVER (PARTITION BY user_id, abs(value_1 - value_2)) as cnt2, - date_trunc('min', lag(time) OVER (PARTITION BY user_id ORDER BY time)) as datee, - rank() OVER my_win as rnnk, - avg(CASE - WHEN user_id > 4 - THEN value_1 - ELSE value_2 - END) FILTER (WHERE user_id > 2) OVER my_win_2 as filtered_count, - sum(user_id * (5.0 / (value_1 + value_2 + 0.1)) * value_3) FILTER (WHERE value_1::text LIKE '%1%') OVER my_win_4 as cnt_with_filter_2 -FROM - users_table -WINDOW - my_win AS (PARTITION BY user_id, (value_1%3)::int ORDER BY time DESC), - my_win_2 AS (PARTITION BY user_id, (value_1)::int ORDER BY time DESC), - my_win_3 AS (PARTITION BY user_id, date_trunc('min', time)), - my_win_4 AS (my_win_3 ORDER BY value_2, value_3) -ORDER BY - cnt_with_filter_2 DESC NULLS LAST, filtered_count DESC NULLS LAST, datee DESC NULLS LAST, rnnk DESC, cnt2 DESC, cnt1 DESC, user_id DESC -LIMIT 5; - count | cnt1 | cnt2 | datee | rnnk | filtered_count | cnt_with_filter_2 ---------------------------------------------------------------------- - 23 | 1 | 7 | Thu Nov 23 02:14:00 2017 | 6 | 0.00000000000000000000 | 72.7272727272727 - 10 | 1 | 3 | Wed Nov 22 23:01:00 2017 | 1 | 1.00000000000000000000 | 57.1428571428571 - 17 | 1 | 5 | Wed Nov 22 23:24:00 2017 | 8 | 3.0000000000000000 | 28.5714285714286 - 17 | 1 | 5 | | 10 | 2.6666666666666667 | 28.5714285714286 - 17 | 1 | 5 | Thu Nov 23 00:15:00 2017 | 7 | 3.6666666666666667 | 24.1935483870968 -(5 rows) - --- some tests with GROUP BY along with PARTITION BY -SELECT - user_id, - rank() OVER my_win as my_rank, - avg(avg(event_type)) OVER my_win_2 as avg, - max(time) as mx_time -FROM - events_table -GROUP BY - user_id, - value_2 -WINDOW - my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC), - my_win_2 AS (PARTITION BY user_id, avg(user_id) ORDER BY count(*) DESC) -ORDER BY - avg DESC, - mx_time DESC, - my_rank DESC, - user_id DESC; - user_id | my_rank | avg | mx_time ---------------------------------------------------------------------- - 6 | 1 | 3.0000000000000000 | Thu Nov 23 14:00:13.20013 2017 - 6 | 2 | 3.0000000000000000 | Thu Nov 23 11:16:13.106691 2017 - 6 | 1 | 3.0000000000000000 | Thu Nov 23 07:27:32.822068 2017 - 3 | 1 | 2.9857142857142857 | Thu Nov 23 16:31:56.219594 2017 - 4 | 2 | 2.9555555555555556 | Thu Nov 23 14:19:25.765876 2017 - 4 | 1 | 2.9555555555555556 | Thu Nov 23 08:36:53.871919 2017 - 1 | 4 | 2.8633333333333333 | Wed Nov 22 21:06:57.457147 2017 - 1 | 1 | 2.8250000000000000 | Thu Nov 23 21:54:46.924477 2017 - 2 | 2 | 2.7738095238095238 | Thu Nov 23 13:27:37.441959 2017 - 1 | 2 | 2.7722222222222222 | Thu Nov 23 09:23:30.994345 2017 - 3 | 1 | 2.7682539682539682 | Thu Nov 23 01:17:49.040685 2017 - 2 | 1 | 2.7142857142857143 | Thu Nov 23 15:58:49.273421 2017 - 1 | 3 | 2.5791666666666667 | Thu Nov 23 11:09:38.074595 2017 - 3 | 1 | 2.5714285714285714 | Thu Nov 23 16:44:41.903713 2017 - 2 | 1 | 2.5158730158730159 | Thu Nov 23 14:02:47.738901 2017 - 4 | 1 | 2.47777777777777778333 | Thu Nov 23 16:20:33.264457 2017 - 4 | 3 | 2.47777777777777778333 | Thu Nov 23 08:14:18.231273 2017 - 4 | 3 | 2.47777777777777778333 | Thu Nov 23 07:32:45.521278 2017 - 1 | 1 | 2.4000000000000000 | Thu Nov 23 10:23:27.617726 2017 - 2 | 1 | 2.3869047619047619 | Thu Nov 23 17:26:14.563216 2017 - 3 | 1 | 2.3841269841269841 | Thu Nov 23 18:08:26.550729 2017 - 3 | 1 | 2.3841269841269841 | Thu Nov 23 09:38:45.338008 2017 - 3 | 2 | 2.3841269841269841 | Thu Nov 23 06:44:50.887182 2017 - 2 | 2 | 2.3095238095238095 | Thu Nov 23 04:05:16.217731 2017 - 5 | 2 | 2.3000000000000000 | Thu Nov 23 14:28:51.833214 2017 - 5 | 2 | 2.3000000000000000 | Thu Nov 23 14:23:09.889786 2017 - 4 | 1 | 2.2000000000000000 | Thu Nov 23 18:10:21.338399 2017 - 2 | 1 | 2.09126984126984126667 | Thu Nov 23 03:35:04.321504 2017 - 5 | 1 | 2.0000000000000000 | Thu Nov 23 16:11:02.929469 2017 - 5 | 1 | 2.0000000000000000 | Thu Nov 23 14:40:40.467511 2017 - 5 | 1 | 2.0000000000000000 | Thu Nov 23 13:26:45.571108 2017 -(31 rows) - --- test for range and rows mode and different window functions --- mostly to make sure that deparsing works fine -SELECT - user_id, - rank() OVER (PARTITION BY user_id ROWS BETWEEN - UNBOUNDED PRECEDING AND CURRENT ROW), - dense_rank() OVER (PARTITION BY user_id RANGE BETWEEN - UNBOUNDED PRECEDING AND CURRENT ROW), - CUME_DIST() OVER (PARTITION BY user_id RANGE BETWEEN - UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), - PERCENT_RANK() OVER (PARTITION BY user_id ORDER BY avg(value_1) RANGE BETWEEN - UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) -FROM - users_table -GROUP BY - 1 -ORDER BY - 4 DESC,3 DESC,2 DESC ,1 DESC; - user_id | rank | dense_rank | cume_dist | percent_rank ---------------------------------------------------------------------- - 6 | 1 | 1 | 1 | 0 - 5 | 1 | 1 | 1 | 0 - 4 | 1 | 1 | 1 | 0 - 3 | 1 | 1 | 1 | 0 - 2 | 1 | 1 | 1 | 0 - 1 | 1 | 1 | 1 | 0 -(6 rows) - --- test exclude supported -SELECT - user_id, - value_1, - array_agg(value_1) OVER (PARTITION BY user_id ORDER BY value_1 RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - array_agg(value_1) OVER (PARTITION BY user_id ORDER BY value_1 RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE CURRENT ROW) -FROM - users_table -WHERE - user_id > 2 AND user_id < 6 -ORDER BY - user_id, value_1, 3, 4; - user_id | value_1 | array_agg | array_agg ---------------------------------------------------------------------- - 3 | 0 | {0} | - 3 | 1 | {0,1,1,1,1,1,1} | {0,1,1,1,1,1} - 3 | 1 | {0,1,1,1,1,1,1} | {0,1,1,1,1,1} - 3 | 1 | {0,1,1,1,1,1,1} | {0,1,1,1,1,1} - 3 | 1 | {0,1,1,1,1,1,1} | {0,1,1,1,1,1} - 3 | 1 | {0,1,1,1,1,1,1} | {0,1,1,1,1,1} - 3 | 1 | {0,1,1,1,1,1,1} | {0,1,1,1,1,1} - 3 | 2 | {0,1,1,1,1,1,1,2,2} | {0,1,1,1,1,1,1,2} - 3 | 2 | {0,1,1,1,1,1,1,2,2} | {0,1,1,1,1,1,1,2} - 3 | 3 | {0,1,1,1,1,1,1,2,2,3,3,3} | {0,1,1,1,1,1,1,2,2,3,3} - 3 | 3 | {0,1,1,1,1,1,1,2,2,3,3,3} | {0,1,1,1,1,1,1,2,2,3,3} - 3 | 3 | {0,1,1,1,1,1,1,2,2,3,3,3} | {0,1,1,1,1,1,1,2,2,3,3} - 3 | 4 | {0,1,1,1,1,1,1,2,2,3,3,3,4,4,4,4} | {0,1,1,1,1,1,1,2,2,3,3,3,4,4,4} - 3 | 4 | {0,1,1,1,1,1,1,2,2,3,3,3,4,4,4,4} | {0,1,1,1,1,1,1,2,2,3,3,3,4,4,4} - 3 | 4 | {0,1,1,1,1,1,1,2,2,3,3,3,4,4,4,4} | {0,1,1,1,1,1,1,2,2,3,3,3,4,4,4} - 3 | 4 | {0,1,1,1,1,1,1,2,2,3,3,3,4,4,4,4} | {0,1,1,1,1,1,1,2,2,3,3,3,4,4,4} - 3 | 5 | {0,1,1,1,1,1,1,2,2,3,3,3,4,4,4,4,5} | {0,1,1,1,1,1,1,2,2,3,3,3,4,4,4,4} - 4 | 0 | {0,0,0,0} | {0,0,0} - 4 | 0 | {0,0,0,0} | {0,0,0} - 4 | 0 | {0,0,0,0} | {0,0,0} - 4 | 0 | {0,0,0,0} | {0,0,0} - 4 | 1 | {0,0,0,0,1} | {0,0,0,0} - 4 | 2 | {0,0,0,0,1,2,2,2} | {0,0,0,0,1,2,2} - 4 | 2 | {0,0,0,0,1,2,2,2} | {0,0,0,0,1,2,2} - 4 | 2 | {0,0,0,0,1,2,2,2} | {0,0,0,0,1,2,2} - 4 | 3 | {0,0,0,0,1,2,2,2,3,3,3,3,3,3} | {0,0,0,0,1,2,2,2,3,3,3,3,3} - 4 | 3 | {0,0,0,0,1,2,2,2,3,3,3,3,3,3} | {0,0,0,0,1,2,2,2,3,3,3,3,3} - 4 | 3 | {0,0,0,0,1,2,2,2,3,3,3,3,3,3} | {0,0,0,0,1,2,2,2,3,3,3,3,3} - 4 | 3 | {0,0,0,0,1,2,2,2,3,3,3,3,3,3} | {0,0,0,0,1,2,2,2,3,3,3,3,3} - 4 | 3 | {0,0,0,0,1,2,2,2,3,3,3,3,3,3} | {0,0,0,0,1,2,2,2,3,3,3,3,3} - 4 | 3 | {0,0,0,0,1,2,2,2,3,3,3,3,3,3} | {0,0,0,0,1,2,2,2,3,3,3,3,3} - 4 | 4 | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4} | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4} - 4 | 4 | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4} | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4} - 4 | 4 | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4} | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4} - 4 | 4 | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4} | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4} - 4 | 4 | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4} | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4} - 4 | 4 | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4} | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4} - 4 | 4 | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4} | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4} - 4 | 5 | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,5,5} | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,5} - 4 | 5 | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,5,5} | {0,0,0,0,1,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,5} - 5 | 0 | {0,0} | {0} - 5 | 0 | {0,0} | {0} - 5 | 1 | {0,0,1,1,1} | {0,0,1,1} - 5 | 1 | {0,0,1,1,1} | {0,0,1,1} - 5 | 1 | {0,0,1,1,1} | {0,0,1,1} - 5 | 2 | {0,0,1,1,1,2,2,2,2,2,2} | {0,0,1,1,1,2,2,2,2,2} - 5 | 2 | {0,0,1,1,1,2,2,2,2,2,2} | {0,0,1,1,1,2,2,2,2,2} - 5 | 2 | {0,0,1,1,1,2,2,2,2,2,2} | {0,0,1,1,1,2,2,2,2,2} - 5 | 2 | {0,0,1,1,1,2,2,2,2,2,2} | {0,0,1,1,1,2,2,2,2,2} - 5 | 2 | {0,0,1,1,1,2,2,2,2,2,2} | {0,0,1,1,1,2,2,2,2,2} - 5 | 2 | {0,0,1,1,1,2,2,2,2,2,2} | {0,0,1,1,1,2,2,2,2,2} - 5 | 3 | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3} | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3} - 5 | 3 | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3} | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3} - 5 | 3 | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3} | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3} - 5 | 3 | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3} | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3} - 5 | 3 | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3} | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3} - 5 | 3 | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3} | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3} - 5 | 3 | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3} | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3} - 5 | 3 | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3} | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3} - 5 | 3 | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3} | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3} - 5 | 4 | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4} | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4} - 5 | 4 | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4} | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4} - 5 | 4 | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4} | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4} - 5 | 5 | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4,5,5,5} | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4,5,5} - 5 | 5 | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4,5,5,5} | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4,5,5} - 5 | 5 | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4,5,5,5} | {0,0,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4,5,5} -(66 rows) - --- test preceding and following on RANGE window -SELECT - user_id, - value_1, - array_agg(value_1) OVER range_window, - array_agg(value_1) OVER range_window_exclude -FROM - users_table -WHERE - user_id > 2 AND user_id < 6 -WINDOW - range_window as (PARTITION BY user_id ORDER BY value_1 RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING), - range_window_exclude as (PARTITION BY user_id ORDER BY value_1 RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING EXCLUDE CURRENT ROW) -ORDER BY - user_id, value_1, 3, 4; - user_id | value_1 | array_agg | array_agg ---------------------------------------------------------------------- - 3 | 0 | {0,1,1,1,1,1,1} | {1,1,1,1,1,1} - 3 | 1 | {0,1,1,1,1,1,1,2,2} | {0,1,1,1,1,1,2,2} - 3 | 1 | {0,1,1,1,1,1,1,2,2} | {0,1,1,1,1,1,2,2} - 3 | 1 | {0,1,1,1,1,1,1,2,2} | {0,1,1,1,1,1,2,2} - 3 | 1 | {0,1,1,1,1,1,1,2,2} | {0,1,1,1,1,1,2,2} - 3 | 1 | {0,1,1,1,1,1,1,2,2} | {0,1,1,1,1,1,2,2} - 3 | 1 | {0,1,1,1,1,1,1,2,2} | {0,1,1,1,1,1,2,2} - 3 | 2 | {1,1,1,1,1,1,2,2,3,3,3} | {1,1,1,1,1,1,2,3,3,3} - 3 | 2 | {1,1,1,1,1,1,2,2,3,3,3} | {1,1,1,1,1,1,2,3,3,3} - 3 | 3 | {2,2,3,3,3,4,4,4,4} | {2,2,3,3,4,4,4,4} - 3 | 3 | {2,2,3,3,3,4,4,4,4} | {2,2,3,3,4,4,4,4} - 3 | 3 | {2,2,3,3,3,4,4,4,4} | {2,2,3,3,4,4,4,4} - 3 | 4 | {3,3,3,4,4,4,4,5} | {3,3,3,4,4,4,5} - 3 | 4 | {3,3,3,4,4,4,4,5} | {3,3,3,4,4,4,5} - 3 | 4 | {3,3,3,4,4,4,4,5} | {3,3,3,4,4,4,5} - 3 | 4 | {3,3,3,4,4,4,4,5} | {3,3,3,4,4,4,5} - 3 | 5 | {4,4,4,4,5} | {4,4,4,4} - 4 | 0 | {0,0,0,0,1} | {0,0,0,1} - 4 | 0 | {0,0,0,0,1} | {0,0,0,1} - 4 | 0 | {0,0,0,0,1} | {0,0,0,1} - 4 | 0 | {0,0,0,0,1} | {0,0,0,1} - 4 | 1 | {0,0,0,0,1,2,2,2} | {0,0,0,0,2,2,2} - 4 | 2 | {1,2,2,2,3,3,3,3,3,3} | {1,2,2,3,3,3,3,3,3} - 4 | 2 | {1,2,2,2,3,3,3,3,3,3} | {1,2,2,3,3,3,3,3,3} - 4 | 2 | {1,2,2,2,3,3,3,3,3,3} | {1,2,2,3,3,3,3,3,3} - 4 | 3 | {2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4} | {2,2,2,3,3,3,3,3,4,4,4,4,4,4,4} - 4 | 3 | {2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4} | {2,2,2,3,3,3,3,3,4,4,4,4,4,4,4} - 4 | 3 | {2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4} | {2,2,2,3,3,3,3,3,4,4,4,4,4,4,4} - 4 | 3 | {2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4} | {2,2,2,3,3,3,3,3,4,4,4,4,4,4,4} - 4 | 3 | {2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4} | {2,2,2,3,3,3,3,3,4,4,4,4,4,4,4} - 4 | 3 | {2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4} | {2,2,2,3,3,3,3,3,4,4,4,4,4,4,4} - 4 | 4 | {3,3,3,3,3,3,4,4,4,4,4,4,4,5,5} | {3,3,3,3,3,3,4,4,4,4,4,4,5,5} - 4 | 4 | {3,3,3,3,3,3,4,4,4,4,4,4,4,5,5} | {3,3,3,3,3,3,4,4,4,4,4,4,5,5} - 4 | 4 | {3,3,3,3,3,3,4,4,4,4,4,4,4,5,5} | {3,3,3,3,3,3,4,4,4,4,4,4,5,5} - 4 | 4 | {3,3,3,3,3,3,4,4,4,4,4,4,4,5,5} | {3,3,3,3,3,3,4,4,4,4,4,4,5,5} - 4 | 4 | {3,3,3,3,3,3,4,4,4,4,4,4,4,5,5} | {3,3,3,3,3,3,4,4,4,4,4,4,5,5} - 4 | 4 | {3,3,3,3,3,3,4,4,4,4,4,4,4,5,5} | {3,3,3,3,3,3,4,4,4,4,4,4,5,5} - 4 | 4 | {3,3,3,3,3,3,4,4,4,4,4,4,4,5,5} | {3,3,3,3,3,3,4,4,4,4,4,4,5,5} - 4 | 5 | {4,4,4,4,4,4,4,5,5} | {4,4,4,4,4,4,4,5} - 4 | 5 | {4,4,4,4,4,4,4,5,5} | {4,4,4,4,4,4,4,5} - 5 | 0 | {0,0,1,1,1} | {0,1,1,1} - 5 | 0 | {0,0,1,1,1} | {0,1,1,1} - 5 | 1 | {0,0,1,1,1,2,2,2,2,2,2} | {0,0,1,1,2,2,2,2,2,2} - 5 | 1 | {0,0,1,1,1,2,2,2,2,2,2} | {0,0,1,1,2,2,2,2,2,2} - 5 | 1 | {0,0,1,1,1,2,2,2,2,2,2} | {0,0,1,1,2,2,2,2,2,2} - 5 | 2 | {1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3} | {1,1,1,2,2,2,2,2,3,3,3,3,3,3,3,3,3} - 5 | 2 | {1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3} | {1,1,1,2,2,2,2,2,3,3,3,3,3,3,3,3,3} - 5 | 2 | {1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3} | {1,1,1,2,2,2,2,2,3,3,3,3,3,3,3,3,3} - 5 | 2 | {1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3} | {1,1,1,2,2,2,2,2,3,3,3,3,3,3,3,3,3} - 5 | 2 | {1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3} | {1,1,1,2,2,2,2,2,3,3,3,3,3,3,3,3,3} - 5 | 2 | {1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3} | {1,1,1,2,2,2,2,2,3,3,3,3,3,3,3,3,3} - 5 | 3 | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4} | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4} - 5 | 3 | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4} | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4} - 5 | 3 | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4} | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4} - 5 | 3 | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4} | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4} - 5 | 3 | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4} | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4} - 5 | 3 | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4} | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4} - 5 | 3 | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4} | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4} - 5 | 3 | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4} | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4} - 5 | 3 | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4} | {2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4} - 5 | 4 | {3,3,3,3,3,3,3,3,3,4,4,4,5,5,5} | {3,3,3,3,3,3,3,3,3,4,4,5,5,5} - 5 | 4 | {3,3,3,3,3,3,3,3,3,4,4,4,5,5,5} | {3,3,3,3,3,3,3,3,3,4,4,5,5,5} - 5 | 4 | {3,3,3,3,3,3,3,3,3,4,4,4,5,5,5} | {3,3,3,3,3,3,3,3,3,4,4,5,5,5} - 5 | 5 | {4,4,4,5,5,5} | {4,4,4,5,5} - 5 | 5 | {4,4,4,5,5,5} | {4,4,4,5,5} - 5 | 5 | {4,4,4,5,5,5} | {4,4,4,5,5} -(66 rows) - --- test preceding and following on ROW window -SELECT - user_id, - value_1, - array_agg(value_1) OVER row_window, - array_agg(value_1) OVER row_window_exclude -FROM - users_table -WHERE - user_id > 2 and user_id < 6 -WINDOW - row_window as (PARTITION BY user_id ORDER BY value_1 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING), - row_window_exclude as (PARTITION BY user_id ORDER BY value_1 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING EXCLUDE CURRENT ROW) -ORDER BY - user_id, value_1, 3, 4; - user_id | value_1 | array_agg | array_agg ---------------------------------------------------------------------- - 3 | 0 | {0,1} | {1} - 3 | 1 | {0,1,1} | {0,1} - 3 | 1 | {1,1,1} | {1,1} - 3 | 1 | {1,1,1} | {1,1} - 3 | 1 | {1,1,1} | {1,1} - 3 | 1 | {1,1,1} | {1,1} - 3 | 1 | {1,1,2} | {1,2} - 3 | 2 | {1,2,2} | {1,2} - 3 | 2 | {2,2,3} | {2,3} - 3 | 3 | {2,3,3} | {2,3} - 3 | 3 | {3,3,3} | {3,3} - 3 | 3 | {3,3,4} | {3,4} - 3 | 4 | {3,4,4} | {3,4} - 3 | 4 | {4,4,4} | {4,4} - 3 | 4 | {4,4,4} | {4,4} - 3 | 4 | {4,4,5} | {4,5} - 3 | 5 | {4,5} | {4} - 4 | 0 | {0,0} | {0} - 4 | 0 | {0,0,0} | {0,0} - 4 | 0 | {0,0,0} | {0,0} - 4 | 0 | {0,0,1} | {0,1} - 4 | 1 | {0,1,2} | {0,2} - 4 | 2 | {1,2,2} | {1,2} - 4 | 2 | {2,2,2} | {2,2} - 4 | 2 | {2,2,3} | {2,3} - 4 | 3 | {2,3,3} | {2,3} - 4 | 3 | {3,3,3} | {3,3} - 4 | 3 | {3,3,3} | {3,3} - 4 | 3 | {3,3,3} | {3,3} - 4 | 3 | {3,3,3} | {3,3} - 4 | 3 | {3,3,4} | {3,4} - 4 | 4 | {3,4,4} | {3,4} - 4 | 4 | {4,4,4} | {4,4} - 4 | 4 | {4,4,4} | {4,4} - 4 | 4 | {4,4,4} | {4,4} - 4 | 4 | {4,4,4} | {4,4} - 4 | 4 | {4,4,4} | {4,4} - 4 | 4 | {4,4,5} | {4,5} - 4 | 5 | {4,5,5} | {4,5} - 4 | 5 | {5,5} | {5} - 5 | 0 | {0,0} | {0} - 5 | 0 | {0,0,1} | {0,1} - 5 | 1 | {0,1,1} | {0,1} - 5 | 1 | {1,1,1} | {1,1} - 5 | 1 | {1,1,2} | {1,2} - 5 | 2 | {1,2,2} | {1,2} - 5 | 2 | {2,2,2} | {2,2} - 5 | 2 | {2,2,2} | {2,2} - 5 | 2 | {2,2,2} | {2,2} - 5 | 2 | {2,2,2} | {2,2} - 5 | 2 | {2,2,3} | {2,3} - 5 | 3 | {2,3,3} | {2,3} - 5 | 3 | {3,3,3} | {3,3} - 5 | 3 | {3,3,3} | {3,3} - 5 | 3 | {3,3,3} | {3,3} - 5 | 3 | {3,3,3} | {3,3} - 5 | 3 | {3,3,3} | {3,3} - 5 | 3 | {3,3,3} | {3,3} - 5 | 3 | {3,3,3} | {3,3} - 5 | 3 | {3,3,4} | {3,4} - 5 | 4 | {3,4,4} | {3,4} - 5 | 4 | {4,4,4} | {4,4} - 5 | 4 | {4,4,5} | {4,5} - 5 | 5 | {4,5,5} | {4,5} - 5 | 5 | {5,5} | {5} - 5 | 5 | {5,5,5} | {5,5} -(66 rows) - --- repeat above 3 tests without grouping by distribution column -SELECT - value_2, - rank() OVER (PARTITION BY value_2 ROWS BETWEEN - UNBOUNDED PRECEDING AND CURRENT ROW), - dense_rank() OVER (PARTITION BY value_2 RANGE BETWEEN - UNBOUNDED PRECEDING AND CURRENT ROW), - CUME_DIST() OVER (PARTITION BY value_2 RANGE BETWEEN - UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), - PERCENT_RANK() OVER (PARTITION BY value_2 ORDER BY avg(value_1) RANGE BETWEEN - UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) -FROM - users_table -GROUP BY - 1 -ORDER BY - 4 DESC,3 DESC,2 DESC ,1 DESC; - value_2 | rank | dense_rank | cume_dist | percent_rank ---------------------------------------------------------------------- - 5 | 1 | 1 | 1 | 0 - 4 | 1 | 1 | 1 | 0 - 3 | 1 | 1 | 1 | 0 - 2 | 1 | 1 | 1 | 0 - 1 | 1 | 1 | 1 | 0 - 0 | 1 | 1 | 1 | 0 -(6 rows) - --- test exclude supported -SELECT - value_2, - value_1, - array_agg(value_1) OVER (PARTITION BY value_2 ORDER BY value_1 RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), - array_agg(value_1) OVER (PARTITION BY value_2 ORDER BY value_1 RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW EXCLUDE CURRENT ROW) -FROM - users_table -WHERE - value_2 > 2 AND value_2 < 6 -ORDER BY - value_2, value_1, 3, 4; - value_2 | value_1 | array_agg | array_agg ---------------------------------------------------------------------- - 3 | 0 | {0,0,0} | {0,0} - 3 | 0 | {0,0,0} | {0,0} - 3 | 0 | {0,0,0} | {0,0} - 3 | 1 | {0,0,0,1,1,1,1} | {0,0,0,1,1,1} - 3 | 1 | {0,0,0,1,1,1,1} | {0,0,0,1,1,1} - 3 | 1 | {0,0,0,1,1,1,1} | {0,0,0,1,1,1} - 3 | 1 | {0,0,0,1,1,1,1} | {0,0,0,1,1,1} - 3 | 2 | {0,0,0,1,1,1,1,2,2} | {0,0,0,1,1,1,1,2} - 3 | 2 | {0,0,0,1,1,1,1,2,2} | {0,0,0,1,1,1,1,2} - 3 | 3 | {0,0,0,1,1,1,1,2,2,3,3} | {0,0,0,1,1,1,1,2,2,3} - 3 | 3 | {0,0,0,1,1,1,1,2,2,3,3} | {0,0,0,1,1,1,1,2,2,3} - 3 | 4 | {0,0,0,1,1,1,1,2,2,3,3,4,4,4,4,4} | {0,0,0,1,1,1,1,2,2,3,3,4,4,4,4} - 3 | 4 | {0,0,0,1,1,1,1,2,2,3,3,4,4,4,4,4} | {0,0,0,1,1,1,1,2,2,3,3,4,4,4,4} - 3 | 4 | {0,0,0,1,1,1,1,2,2,3,3,4,4,4,4,4} | {0,0,0,1,1,1,1,2,2,3,3,4,4,4,4} - 3 | 4 | {0,0,0,1,1,1,1,2,2,3,3,4,4,4,4,4} | {0,0,0,1,1,1,1,2,2,3,3,4,4,4,4} - 3 | 4 | {0,0,0,1,1,1,1,2,2,3,3,4,4,4,4,4} | {0,0,0,1,1,1,1,2,2,3,3,4,4,4,4} - 3 | 5 | {0,0,0,1,1,1,1,2,2,3,3,4,4,4,4,4,5} | {0,0,0,1,1,1,1,2,2,3,3,4,4,4,4,4} - 4 | 0 | {0,0} | {0} - 4 | 0 | {0,0} | {0} - 4 | 1 | {0,0,1,1} | {0,0,1} - 4 | 1 | {0,0,1,1} | {0,0,1} - 4 | 2 | {0,0,1,1,2,2,2} | {0,0,1,1,2,2} - 4 | 2 | {0,0,1,1,2,2,2} | {0,0,1,1,2,2} - 4 | 2 | {0,0,1,1,2,2,2} | {0,0,1,1,2,2} - 4 | 3 | {0,0,1,1,2,2,2,3,3,3,3,3,3,3} | {0,0,1,1,2,2,2,3,3,3,3,3,3} - 4 | 3 | {0,0,1,1,2,2,2,3,3,3,3,3,3,3} | {0,0,1,1,2,2,2,3,3,3,3,3,3} - 4 | 3 | {0,0,1,1,2,2,2,3,3,3,3,3,3,3} | {0,0,1,1,2,2,2,3,3,3,3,3,3} - 4 | 3 | {0,0,1,1,2,2,2,3,3,3,3,3,3,3} | {0,0,1,1,2,2,2,3,3,3,3,3,3} - 4 | 3 | {0,0,1,1,2,2,2,3,3,3,3,3,3,3} | {0,0,1,1,2,2,2,3,3,3,3,3,3} - 4 | 3 | {0,0,1,1,2,2,2,3,3,3,3,3,3,3} | {0,0,1,1,2,2,2,3,3,3,3,3,3} - 4 | 3 | {0,0,1,1,2,2,2,3,3,3,3,3,3,3} | {0,0,1,1,2,2,2,3,3,3,3,3,3} - 4 | 4 | {0,0,1,1,2,2,2,3,3,3,3,3,3,3,4,4,4,4} | {0,0,1,1,2,2,2,3,3,3,3,3,3,3,4,4,4} - 4 | 4 | {0,0,1,1,2,2,2,3,3,3,3,3,3,3,4,4,4,4} | {0,0,1,1,2,2,2,3,3,3,3,3,3,3,4,4,4} - 4 | 4 | {0,0,1,1,2,2,2,3,3,3,3,3,3,3,4,4,4,4} | {0,0,1,1,2,2,2,3,3,3,3,3,3,3,4,4,4} - 4 | 4 | {0,0,1,1,2,2,2,3,3,3,3,3,3,3,4,4,4,4} | {0,0,1,1,2,2,2,3,3,3,3,3,3,3,4,4,4} - 4 | 5 | {0,0,1,1,2,2,2,3,3,3,3,3,3,3,4,4,4,4,5,5} | {0,0,1,1,2,2,2,3,3,3,3,3,3,3,4,4,4,4,5} - 4 | 5 | {0,0,1,1,2,2,2,3,3,3,3,3,3,3,4,4,4,4,5,5} | {0,0,1,1,2,2,2,3,3,3,3,3,3,3,4,4,4,4,5} - 5 | 0 | {0,0} | {0} - 5 | 0 | {0,0} | {0} - 5 | 1 | {0,0,1} | {0,0} - 5 | 2 | {0,0,1,2,2} | {0,0,1,2} - 5 | 2 | {0,0,1,2,2} | {0,0,1,2} - 5 | 3 | {0,0,1,2,2,3,3,3,3} | {0,0,1,2,2,3,3,3} - 5 | 3 | {0,0,1,2,2,3,3,3,3} | {0,0,1,2,2,3,3,3} - 5 | 3 | {0,0,1,2,2,3,3,3,3} | {0,0,1,2,2,3,3,3} - 5 | 3 | {0,0,1,2,2,3,3,3,3} | {0,0,1,2,2,3,3,3} - 5 | 4 | {0,0,1,2,2,3,3,3,3,4,4} | {0,0,1,2,2,3,3,3,3,4} - 5 | 4 | {0,0,1,2,2,3,3,3,3,4,4} | {0,0,1,2,2,3,3,3,3,4} - 5 | 5 | {0,0,1,2,2,3,3,3,3,4,4,5,5} | {0,0,1,2,2,3,3,3,3,4,4,5} - 5 | 5 | {0,0,1,2,2,3,3,3,3,4,4,5,5} | {0,0,1,2,2,3,3,3,3,4,4,5} -(50 rows) - --- test preceding and following on RANGE window -SELECT - value_2, - value_1, - array_agg(value_1) OVER range_window, - array_agg(value_1) OVER range_window_exclude -FROM - users_table -WHERE - value_2 > 2 AND value_2 < 6 -WINDOW - range_window as (PARTITION BY value_2 ORDER BY value_1 RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING), - range_window_exclude as (PARTITION BY value_2 ORDER BY value_1 RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING EXCLUDE CURRENT ROW) -ORDER BY - value_2, value_1, 3, 4; - value_2 | value_1 | array_agg | array_agg ---------------------------------------------------------------------- - 3 | 0 | {0,0,0,1,1,1,1} | {0,0,1,1,1,1} - 3 | 0 | {0,0,0,1,1,1,1} | {0,0,1,1,1,1} - 3 | 0 | {0,0,0,1,1,1,1} | {0,0,1,1,1,1} - 3 | 1 | {0,0,0,1,1,1,1,2,2} | {0,0,0,1,1,1,2,2} - 3 | 1 | {0,0,0,1,1,1,1,2,2} | {0,0,0,1,1,1,2,2} - 3 | 1 | {0,0,0,1,1,1,1,2,2} | {0,0,0,1,1,1,2,2} - 3 | 1 | {0,0,0,1,1,1,1,2,2} | {0,0,0,1,1,1,2,2} - 3 | 2 | {1,1,1,1,2,2,3,3} | {1,1,1,1,2,3,3} - 3 | 2 | {1,1,1,1,2,2,3,3} | {1,1,1,1,2,3,3} - 3 | 3 | {2,2,3,3,4,4,4,4,4} | {2,2,3,4,4,4,4,4} - 3 | 3 | {2,2,3,3,4,4,4,4,4} | {2,2,3,4,4,4,4,4} - 3 | 4 | {3,3,4,4,4,4,4,5} | {3,3,4,4,4,4,5} - 3 | 4 | {3,3,4,4,4,4,4,5} | {3,3,4,4,4,4,5} - 3 | 4 | {3,3,4,4,4,4,4,5} | {3,3,4,4,4,4,5} - 3 | 4 | {3,3,4,4,4,4,4,5} | {3,3,4,4,4,4,5} - 3 | 4 | {3,3,4,4,4,4,4,5} | {3,3,4,4,4,4,5} - 3 | 5 | {4,4,4,4,4,5} | {4,4,4,4,4} - 4 | 0 | {0,0,1,1} | {0,1,1} - 4 | 0 | {0,0,1,1} | {0,1,1} - 4 | 1 | {0,0,1,1,2,2,2} | {0,0,1,2,2,2} - 4 | 1 | {0,0,1,1,2,2,2} | {0,0,1,2,2,2} - 4 | 2 | {1,1,2,2,2,3,3,3,3,3,3,3} | {1,1,2,2,3,3,3,3,3,3,3} - 4 | 2 | {1,1,2,2,2,3,3,3,3,3,3,3} | {1,1,2,2,3,3,3,3,3,3,3} - 4 | 2 | {1,1,2,2,2,3,3,3,3,3,3,3} | {1,1,2,2,3,3,3,3,3,3,3} - 4 | 3 | {2,2,2,3,3,3,3,3,3,3,4,4,4,4} | {2,2,2,3,3,3,3,3,3,4,4,4,4} - 4 | 3 | {2,2,2,3,3,3,3,3,3,3,4,4,4,4} | {2,2,2,3,3,3,3,3,3,4,4,4,4} - 4 | 3 | {2,2,2,3,3,3,3,3,3,3,4,4,4,4} | {2,2,2,3,3,3,3,3,3,4,4,4,4} - 4 | 3 | {2,2,2,3,3,3,3,3,3,3,4,4,4,4} | {2,2,2,3,3,3,3,3,3,4,4,4,4} - 4 | 3 | {2,2,2,3,3,3,3,3,3,3,4,4,4,4} | {2,2,2,3,3,3,3,3,3,4,4,4,4} - 4 | 3 | {2,2,2,3,3,3,3,3,3,3,4,4,4,4} | {2,2,2,3,3,3,3,3,3,4,4,4,4} - 4 | 3 | {2,2,2,3,3,3,3,3,3,3,4,4,4,4} | {2,2,2,3,3,3,3,3,3,4,4,4,4} - 4 | 4 | {3,3,3,3,3,3,3,4,4,4,4,5,5} | {3,3,3,3,3,3,3,4,4,4,5,5} - 4 | 4 | {3,3,3,3,3,3,3,4,4,4,4,5,5} | {3,3,3,3,3,3,3,4,4,4,5,5} - 4 | 4 | {3,3,3,3,3,3,3,4,4,4,4,5,5} | {3,3,3,3,3,3,3,4,4,4,5,5} - 4 | 4 | {3,3,3,3,3,3,3,4,4,4,4,5,5} | {3,3,3,3,3,3,3,4,4,4,5,5} - 4 | 5 | {4,4,4,4,5,5} | {4,4,4,4,5} - 4 | 5 | {4,4,4,4,5,5} | {4,4,4,4,5} - 5 | 0 | {0,0,1} | {0,1} - 5 | 0 | {0,0,1} | {0,1} - 5 | 1 | {0,0,1,2,2} | {0,0,2,2} - 5 | 2 | {1,2,2,3,3,3,3} | {1,2,3,3,3,3} - 5 | 2 | {1,2,2,3,3,3,3} | {1,2,3,3,3,3} - 5 | 3 | {2,2,3,3,3,3,4,4} | {2,2,3,3,3,4,4} - 5 | 3 | {2,2,3,3,3,3,4,4} | {2,2,3,3,3,4,4} - 5 | 3 | {2,2,3,3,3,3,4,4} | {2,2,3,3,3,4,4} - 5 | 3 | {2,2,3,3,3,3,4,4} | {2,2,3,3,3,4,4} - 5 | 4 | {3,3,3,3,4,4,5,5} | {3,3,3,3,4,5,5} - 5 | 4 | {3,3,3,3,4,4,5,5} | {3,3,3,3,4,5,5} - 5 | 5 | {4,4,5,5} | {4,4,5} - 5 | 5 | {4,4,5,5} | {4,4,5} -(50 rows) - --- test preceding and following on ROW window -SELECT - value_2, - value_1, - array_agg(value_1) OVER row_window, - array_agg(value_1) OVER row_window_exclude -FROM - users_table -WHERE - value_2 > 2 and value_2 < 6 -WINDOW - row_window as (PARTITION BY value_2 ORDER BY value_1 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING), - row_window_exclude as (PARTITION BY value_2 ORDER BY value_1 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING EXCLUDE CURRENT ROW) -ORDER BY - value_2, value_1, 3, 4; - value_2 | value_1 | array_agg | array_agg ---------------------------------------------------------------------- - 3 | 0 | {0,0} | {0} - 3 | 0 | {0,0,0} | {0,0} - 3 | 0 | {0,0,1} | {0,1} - 3 | 1 | {0,1,1} | {0,1} - 3 | 1 | {1,1,1} | {1,1} - 3 | 1 | {1,1,1} | {1,1} - 3 | 1 | {1,1,2} | {1,2} - 3 | 2 | {1,2,2} | {1,2} - 3 | 2 | {2,2,3} | {2,3} - 3 | 3 | {2,3,3} | {2,3} - 3 | 3 | {3,3,4} | {3,4} - 3 | 4 | {3,4,4} | {3,4} - 3 | 4 | {4,4,4} | {4,4} - 3 | 4 | {4,4,4} | {4,4} - 3 | 4 | {4,4,4} | {4,4} - 3 | 4 | {4,4,5} | {4,5} - 3 | 5 | {4,5} | {4} - 4 | 0 | {0,0} | {0} - 4 | 0 | {0,0,1} | {0,1} - 4 | 1 | {0,1,1} | {0,1} - 4 | 1 | {1,1,2} | {1,2} - 4 | 2 | {1,2,2} | {1,2} - 4 | 2 | {2,2,2} | {2,2} - 4 | 2 | {2,2,3} | {2,3} - 4 | 3 | {2,3,3} | {2,3} - 4 | 3 | {3,3,3} | {3,3} - 4 | 3 | {3,3,3} | {3,3} - 4 | 3 | {3,3,3} | {3,3} - 4 | 3 | {3,3,3} | {3,3} - 4 | 3 | {3,3,3} | {3,3} - 4 | 3 | {3,3,4} | {3,4} - 4 | 4 | {3,4,4} | {3,4} - 4 | 4 | {4,4,4} | {4,4} - 4 | 4 | {4,4,4} | {4,4} - 4 | 4 | {4,4,5} | {4,5} - 4 | 5 | {4,5,5} | {4,5} - 4 | 5 | {5,5} | {5} - 5 | 0 | {0,0} | {0} - 5 | 0 | {0,0,1} | {0,1} - 5 | 1 | {0,1,2} | {0,2} - 5 | 2 | {1,2,2} | {1,2} - 5 | 2 | {2,2,3} | {2,3} - 5 | 3 | {2,3,3} | {2,3} - 5 | 3 | {3,3,3} | {3,3} - 5 | 3 | {3,3,3} | {3,3} - 5 | 3 | {3,3,4} | {3,4} - 5 | 4 | {3,4,4} | {3,4} - 5 | 4 | {4,4,5} | {4,5} - 5 | 5 | {4,5,5} | {4,5} - 5 | 5 | {5,5} | {5} -(50 rows) - --- some tests with GROUP BY, HAVING and LIMIT -SELECT - user_id, sum(event_type) OVER my_win , event_type -FROM - events_table -GROUP BY - user_id, event_type -HAVING count(*) > 2 - WINDOW my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC) -ORDER BY - 2 DESC, 3 DESC, 1 DESC -LIMIT - 5; - user_id | sum | event_type ---------------------------------------------------------------------- - 4 | 4 | 4 - 3 | 4 | 4 - 2 | 4 | 4 - 1 | 4 | 4 - 5 | 3 | 3 -(5 rows) - --- test PARTITION BY avg(...) ORDER BY avg(...) -SELECT - value_1, - avg(value_3), - dense_rank() OVER (PARTITION BY avg(value_3) ORDER BY avg(value_2)) -FROM - users_table -GROUP BY - 1 -ORDER BY - 1; - value_1 | avg | dense_rank ---------------------------------------------------------------------- - 0 | 3.08333333333333 | 1 - 1 | 2.93333333333333 | 1 - 2 | 2.22222222222222 | 1 - 3 | 2.73076923076923 | 1 - 4 | 2.9047619047619 | 1 - 5 | 2.22222222222222 | 2 -(6 rows) - --- Group by has more columns than partition by -SELECT - DISTINCT user_id, SUM(value_2) OVER (PARTITION BY user_id) -FROM - users_table -GROUP BY - user_id, value_1, value_2 -HAVING count(*) > 2 -ORDER BY - 2 DESC, 1 -LIMIT - 10; - user_id | sum ---------------------------------------------------------------------- - 5 | 3 - 4 | 2 -(2 rows) - -SELECT - DISTINCT ON (user_id) user_id, SUM(value_2) OVER (PARTITION BY user_id) -FROM - users_table -GROUP BY - user_id, value_1, value_2 -HAVING count(*) > 2 -ORDER BY - 1, 2 DESC -LIMIT - 10; - user_id | sum ---------------------------------------------------------------------- - 4 | 2 - 5 | 3 -(2 rows) - -SELECT - DISTINCT ON (SUM(value_1) OVER (PARTITION BY user_id)) user_id, SUM(value_2) OVER (PARTITION BY user_id) -FROM - users_table -GROUP BY - user_id, value_1, value_2 -HAVING count(*) > 2 -ORDER BY - (SUM(value_1) OVER (PARTITION BY user_id)) , 2 DESC, 1 -LIMIT - 10; - user_id | sum ---------------------------------------------------------------------- - 5 | 3 - 4 | 2 -(2 rows) - --- not a meaningful query, with interesting syntax -SELECT - user_id, - AVG(avg(value_1)) OVER (PARTITION BY user_id, max(user_id), MIN(value_2)), - AVG(avg(user_id)) OVER (PARTITION BY user_id, min(user_id), AVG(value_1)) -FROM - users_table -GROUP BY - 1 -ORDER BY - 3 DESC, 2 DESC, 1 DESC; - user_id | avg | avg ---------------------------------------------------------------------- - 6 | 2.1000000000000000 | 6.0000000000000000 - 5 | 2.6538461538461538 | 5.0000000000000000 - 4 | 2.7391304347826087 | 4.0000000000000000 - 3 | 2.3529411764705882 | 3.0000000000000000 - 2 | 2.3333333333333333 | 2.0000000000000000 - 1 | 3.2857142857142857 | 1.00000000000000000000 -(6 rows) - -SELECT coordinator_plan($Q$ -EXPLAIN (COSTS FALSE) -SELECT - user_id, - AVG(avg(value_1)) OVER (PARTITION BY user_id, max(user_id), MIN(value_2)), - AVG(avg(user_id)) OVER (PARTITION BY user_id, min(user_id), AVG(value_1)) -FROM - users_table -GROUP BY - 1 -ORDER BY - 3 DESC, 2 DESC, 1 DESC; -$Q$); - coordinator_plan ---------------------------------------------------------------------- - Sort - Sort Key: remote_scan.avg_1 DESC, remote_scan.avg DESC, remote_scan.user_id DESC - -> Custom Scan (Citus Adaptive) - Task Count: 4 -(4 rows) - -SELECT - value_2, - AVG(avg(value_1)) OVER (PARTITION BY value_2, max(value_2), MIN(value_2)), - AVG(avg(value_2)) OVER (PARTITION BY value_2, min(value_2), AVG(value_1)) -FROM - users_table -GROUP BY - 1 -ORDER BY - 3 DESC, 2 DESC, 1 DESC; - value_2 | avg | avg ---------------------------------------------------------------------- - 5 | 2.6923076923076923 | 5.0000000000000000 - 4 | 2.7500000000000000 | 4.0000000000000000 - 3 | 2.2941176470588235 | 3.0000000000000000 - 2 | 2.7619047619047619 | 2.0000000000000000 - 1 | 2.4285714285714286 | 1.00000000000000000000 - 0 | 2.2222222222222222 | 0.00000000000000000000 -(6 rows) - -SELECT - value_2, user_id, - AVG(avg(value_1)) OVER (PARTITION BY value_2, max(value_2), MIN(value_2)), - AVG(avg(value_2)) OVER (PARTITION BY user_id, min(value_2), AVG(value_1)) -FROM - users_table -GROUP BY - 1, 2 -ORDER BY - 3 DESC, 2 DESC, 1 DESC; - value_2 | user_id | avg | avg ---------------------------------------------------------------------- - 5 | 5 | 2.6666666666666667 | 5.0000000000000000 - 5 | 4 | 2.6666666666666667 | 5.0000000000000000 - 5 | 3 | 2.6666666666666667 | 5.0000000000000000 - 5 | 2 | 2.6666666666666667 | 5.0000000000000000 - 2 | 6 | 2.54583333333333333333 | 2.0000000000000000 - 2 | 5 | 2.54583333333333333333 | 2.0000000000000000 - 2 | 4 | 2.54583333333333333333 | 2.0000000000000000 - 2 | 3 | 2.54583333333333333333 | 2.0000000000000000 - 2 | 2 | 2.54583333333333333333 | 2.0000000000000000 - 2 | 1 | 2.54583333333333333333 | 2.0000000000000000 - 0 | 6 | 2.50000000000000000000 | 0.00000000000000000000 - 0 | 5 | 2.50000000000000000000 | 0.00000000000000000000 - 0 | 4 | 2.50000000000000000000 | 0.00000000000000000000 - 0 | 2 | 2.50000000000000000000 | 0.00000000000000000000 - 0 | 1 | 2.50000000000000000000 | 0.00000000000000000000 - 4 | 6 | 2.45555555555555555000 | 4.0000000000000000 - 4 | 5 | 2.45555555555555555000 | 4.0000000000000000 - 4 | 4 | 2.45555555555555555000 | 4.0000000000000000 - 4 | 3 | 2.45555555555555555000 | 4.0000000000000000 - 4 | 2 | 2.45555555555555555000 | 4.0000000000000000 - 4 | 1 | 2.45555555555555555000 | 4.0000000000000000 - 3 | 6 | 2.3500000000000000 | 3.0000000000000000 - 3 | 5 | 2.3500000000000000 | 3.0000000000000000 - 3 | 4 | 2.3500000000000000 | 3.0000000000000000 - 3 | 3 | 2.3500000000000000 | 3.0000000000000000 - 3 | 2 | 2.3500000000000000 | 3.0000000000000000 - 3 | 1 | 2.3500000000000000 | 3.0000000000000000 - 1 | 6 | 1.90666666666666666000 | 1.00000000000000000000 - 1 | 5 | 1.90666666666666666000 | 1.00000000000000000000 - 1 | 4 | 1.90666666666666666000 | 1.00000000000000000000 - 1 | 3 | 1.90666666666666666000 | 1.00000000000000000000 - 1 | 2 | 1.90666666666666666000 | 1.00000000000000000000 -(32 rows) - -SELECT user_id, sum(avg(user_id)) OVER () -FROM users_table -GROUP BY user_id -ORDER BY 1 -LIMIT 10; - user_id | sum ---------------------------------------------------------------------- - 1 | 21.00000000000000000000 - 2 | 21.00000000000000000000 - 3 | 21.00000000000000000000 - 4 | 21.00000000000000000000 - 5 | 21.00000000000000000000 - 6 | 21.00000000000000000000 -(6 rows) - -SELECT - user_id, - 1 + sum(value_1), - 1 + AVG(value_2) OVER (partition by user_id) -FROM - users_table -GROUP BY - user_id, value_2 -ORDER BY - user_id, value_2; - user_id | ?column? | ?column? ---------------------------------------------------------------------- - 1 | 5 | 3.2500000000000000 - 1 | 4 | 3.2500000000000000 - 1 | 6 | 3.2500000000000000 - 1 | 12 | 3.2500000000000000 - 2 | 3 | 3.5000000000000000 - 2 | 5 | 3.5000000000000000 - 2 | 13 | 3.5000000000000000 - 2 | 6 | 3.5000000000000000 - 2 | 17 | 3.5000000000000000 - 2 | 4 | 3.5000000000000000 - 3 | 3 | 4.0000000000000000 - 3 | 13 | 4.0000000000000000 - 3 | 10 | 4.0000000000000000 - 3 | 2 | 4.0000000000000000 - 3 | 17 | 4.0000000000000000 - 4 | 4 | 3.5000000000000000 - 4 | 28 | 3.5000000000000000 - 4 | 1 | 3.5000000000000000 - 4 | 11 | 3.5000000000000000 - 4 | 17 | 3.5000000000000000 - 4 | 8 | 3.5000000000000000 - 5 | 7 | 3.5000000000000000 - 5 | 17 | 3.5000000000000000 - 5 | 24 | 3.5000000000000000 - 5 | 9 | 3.5000000000000000 - 5 | 8 | 3.5000000000000000 - 5 | 10 | 3.5000000000000000 - 6 | 6 | 3.0000000000000000 - 6 | 3 | 3.0000000000000000 - 6 | 9 | 3.0000000000000000 - 6 | 3 | 3.0000000000000000 - 6 | 5 | 3.0000000000000000 -(32 rows) - -SELECT - user_id, - 1 + sum(value_1), - 1 + AVG(value_2) OVER (partition by user_id) -FROM - users_table -GROUP BY - user_id, value_2 -ORDER BY - 2 DESC, 1 -LIMIT 5; - user_id | ?column? | ?column? ---------------------------------------------------------------------- - 4 | 28 | 3.5000000000000000 - 5 | 24 | 3.5000000000000000 - 2 | 17 | 3.5000000000000000 - 3 | 17 | 4.0000000000000000 - 4 | 17 | 3.5000000000000000 -(5 rows) - --- rank and ordering in the reverse order -SELECT - user_id, - avg(value_1), - RANK() OVER (partition by user_id order by value_2) -FROM - users_table -GROUP BY user_id, value_2 -ORDER BY user_id, value_2 DESC; - user_id | avg | rank ---------------------------------------------------------------------- - 1 | 3.6666666666666667 | 4 - 1 | 2.5000000000000000 | 3 - 1 | 3.0000000000000000 | 2 - 1 | 4.0000000000000000 | 1 - 2 | 1.5000000000000000 | 6 - 2 | 3.2000000000000000 | 5 - 2 | 1.6666666666666667 | 4 - 2 | 3.0000000000000000 | 3 - 2 | 1.3333333333333333 | 2 - 2 | 2.0000000000000000 | 1 - 3 | 2.6666666666666667 | 5 - 3 | 1.00000000000000000000 | 4 - 3 | 3.0000000000000000 | 3 - 3 | 2.4000000000000000 | 2 - 3 | 1.00000000000000000000 | 1 - 4 | 3.5000000000000000 | 6 - 4 | 3.2000000000000000 | 5 - 4 | 3.3333333333333333 | 4 - 4 | 0.00000000000000000000 | 3 - 4 | 3.0000000000000000 | 2 - 4 | 1.00000000000000000000 | 1 - 5 | 3.0000000000000000 | 6 - 5 | 2.3333333333333333 | 5 - 5 | 1.6000000000000000 | 4 - 5 | 2.8750000000000000 | 3 - 5 | 3.2000000000000000 | 2 - 5 | 3.0000000000000000 | 1 - 6 | 1.3333333333333333 | 5 - 6 | 2.0000000000000000 | 4 - 6 | 4.0000000000000000 | 3 - 6 | 1.00000000000000000000 | 2 - 6 | 2.5000000000000000 | 1 -(32 rows) - --- order by in the window function is same as avg(value_1) DESC -SELECT - user_id, - avg(value_1), - RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1))) -FROM - users_table -GROUP BY user_id, value_2 -ORDER BY user_id, avg(value_1) DESC; - user_id | avg | rank ---------------------------------------------------------------------- - 1 | 4.0000000000000000 | 1 - 1 | 3.6666666666666667 | 2 - 1 | 3.0000000000000000 | 3 - 1 | 2.5000000000000000 | 4 - 2 | 3.2000000000000000 | 1 - 2 | 3.0000000000000000 | 2 - 2 | 2.0000000000000000 | 3 - 2 | 1.6666666666666667 | 4 - 2 | 1.5000000000000000 | 5 - 2 | 1.3333333333333333 | 6 - 3 | 3.0000000000000000 | 1 - 3 | 2.6666666666666667 | 2 - 3 | 2.4000000000000000 | 3 - 3 | 1.00000000000000000000 | 4 - 3 | 1.00000000000000000000 | 4 - 4 | 3.5000000000000000 | 1 - 4 | 3.3333333333333333 | 2 - 4 | 3.2000000000000000 | 3 - 4 | 3.0000000000000000 | 4 - 4 | 1.00000000000000000000 | 5 - 4 | 0.00000000000000000000 | 6 - 5 | 3.2000000000000000 | 1 - 5 | 3.0000000000000000 | 2 - 5 | 3.0000000000000000 | 2 - 5 | 2.8750000000000000 | 4 - 5 | 2.3333333333333333 | 5 - 5 | 1.6000000000000000 | 6 - 6 | 4.0000000000000000 | 1 - 6 | 2.5000000000000000 | 2 - 6 | 2.0000000000000000 | 3 - 6 | 1.3333333333333333 | 4 - 6 | 1.00000000000000000000 | 5 -(32 rows) - -EXPLAIN (COSTS FALSE) -SELECT - user_id, - avg(value_1), - RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1))) -FROM - users_table -GROUP BY user_id, value_2 -ORDER BY user_id, avg(value_1) DESC; - QUERY PLAN ---------------------------------------------------------------------- - Sort - Sort Key: remote_scan.user_id, remote_scan.avg DESC - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> WindowAgg - -> Sort - Sort Key: users_table.user_id, (('1'::numeric / ('1'::numeric + avg(users_table.value_1)))) - -> HashAggregate - Group Key: users_table.user_id, users_table.value_2 - -> Seq Scan on users_table_1400256 users_table -(13 rows) - --- order by in the window function is same as avg(value_1) DESC -SELECT - user_id, - avg(value_1), - RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1))) -FROM - users_table -GROUP BY user_id, value_2 -ORDER BY user_id, avg(value_1) DESC; - user_id | avg | rank ---------------------------------------------------------------------- - 1 | 4.0000000000000000 | 1 - 1 | 3.6666666666666667 | 2 - 1 | 3.0000000000000000 | 3 - 1 | 2.5000000000000000 | 4 - 2 | 3.2000000000000000 | 1 - 2 | 3.0000000000000000 | 2 - 2 | 2.0000000000000000 | 3 - 2 | 1.6666666666666667 | 4 - 2 | 1.5000000000000000 | 5 - 2 | 1.3333333333333333 | 6 - 3 | 3.0000000000000000 | 1 - 3 | 2.6666666666666667 | 2 - 3 | 2.4000000000000000 | 3 - 3 | 1.00000000000000000000 | 4 - 3 | 1.00000000000000000000 | 4 - 4 | 3.5000000000000000 | 1 - 4 | 3.3333333333333333 | 2 - 4 | 3.2000000000000000 | 3 - 4 | 3.0000000000000000 | 4 - 4 | 1.00000000000000000000 | 5 - 4 | 0.00000000000000000000 | 6 - 5 | 3.2000000000000000 | 1 - 5 | 3.0000000000000000 | 2 - 5 | 3.0000000000000000 | 2 - 5 | 2.8750000000000000 | 4 - 5 | 2.3333333333333333 | 5 - 5 | 1.6000000000000000 | 6 - 6 | 4.0000000000000000 | 1 - 6 | 2.5000000000000000 | 2 - 6 | 2.0000000000000000 | 3 - 6 | 1.3333333333333333 | 4 - 6 | 1.00000000000000000000 | 5 -(32 rows) - --- limit is not pushed down to worker !! -EXPLAIN (COSTS FALSE) -SELECT - user_id, - avg(value_1), - RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1))) -FROM - users_table -GROUP BY user_id, value_2 -ORDER BY user_id, avg(value_1) DESC -LIMIT 5; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Sort - Sort Key: remote_scan.user_id, remote_scan.avg DESC - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Sort - Sort Key: users_table.user_id, (avg(users_table.value_1)) DESC - -> WindowAgg - -> Sort - Sort Key: users_table.user_id, (('1'::numeric / ('1'::numeric + avg(users_table.value_1)))) - -> HashAggregate - Group Key: users_table.user_id, users_table.value_2 - -> Seq Scan on users_table_1400256 users_table -(17 rows) - -EXPLAIN (COSTS FALSE) -SELECT - user_id, - avg(value_1), - RANK() OVER (partition by user_id order by 1 / (1 + avg(value_1))) -FROM - users_table -GROUP BY user_id, value_2 -ORDER BY user_id, avg(value_1) DESC -LIMIT 5; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Sort - Sort Key: remote_scan.user_id, remote_scan.avg DESC - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Sort - Sort Key: users_table.user_id, (avg(users_table.value_1)) DESC - -> WindowAgg - -> Sort - Sort Key: users_table.user_id, (('1'::numeric / ('1'::numeric + avg(users_table.value_1)))) - -> HashAggregate - Group Key: users_table.user_id, users_table.value_2 - -> Seq Scan on users_table_1400256 users_table -(17 rows) - -EXPLAIN (COSTS FALSE) -SELECT - user_id, - avg(value_1), - RANK() OVER (partition by user_id order by 1 / (1 + sum(value_2))) -FROM - users_table -GROUP BY user_id, value_2 -ORDER BY user_id, avg(value_1) DESC -LIMIT 5; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Sort - Sort Key: remote_scan.user_id, remote_scan.avg DESC - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Sort - Sort Key: users_table.user_id, (avg(users_table.value_1)) DESC - -> WindowAgg - -> Sort - Sort Key: users_table.user_id, ((1 / (1 + sum(users_table.value_2)))) - -> HashAggregate - Group Key: users_table.user_id, users_table.value_2 - -> Seq Scan on users_table_1400256 users_table -(17 rows) - -EXPLAIN (COSTS FALSE) -SELECT - user_id, - avg(value_1), - RANK() OVER (partition by user_id order by sum(value_2)) -FROM - users_table -GROUP BY user_id, value_2 -ORDER BY user_id, avg(value_1) DESC -LIMIT 5; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Sort - Sort Key: remote_scan.user_id, remote_scan.avg DESC - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Sort - Sort Key: users_table.user_id, (avg(users_table.value_1)) DESC - -> WindowAgg - -> Sort - Sort Key: users_table.user_id, (sum(users_table.value_2)) - -> HashAggregate - Group Key: users_table.user_id, users_table.value_2 - -> Seq Scan on users_table_1400256 users_table -(17 rows) - --- Grouping can be pushed down with aggregates even when window function can't -EXPLAIN (COSTS FALSE) -SELECT user_id, count(value_1), stddev(value_1), count(user_id) OVER (PARTITION BY random()) -FROM users_table GROUP BY user_id HAVING avg(value_1) > 2 LIMIT 1; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> WindowAgg - -> Sort - Sort Key: remote_scan.worker_column_5 - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> HashAggregate - Group Key: user_id - Filter: (avg(value_1) > '2'::numeric) - -> Seq Scan on users_table_1400256 users_table -(13 rows) - --- Window function with inlined CTE -WITH cte as ( - SELECT uref.id user_id, events_table.value_2, count(*) c - FROM events_table - JOIN users_ref_test_table uref ON uref.id = events_table.user_id - GROUP BY 1, 2 -) -SELECT DISTINCT cte.value_2, cte.c, sum(cte.value_2) OVER (PARTITION BY cte.c) -FROM cte JOIN events_table et ON et.value_2 = cte.value_2 and et.value_2 = cte.c -ORDER BY 1; - value_2 | c | sum ---------------------------------------------------------------------- - 3 | 3 | 108 - 4 | 4 | 56 -(2 rows) - --- There was a strange bug where this wouldn't have window functions being pushed down --- Bug dependent on column ordering -CREATE TABLE daily_uniques (value_2 float, user_id bigint); -SELECT create_distributed_table('daily_uniques', 'user_id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -EXPLAIN (COSTS FALSE) SELECT - user_id, - sum(value_2) AS commits, - RANK () OVER ( - PARTITION BY user_id - ORDER BY - sum(value_2) DESC - ) -FROM daily_uniques -GROUP BY user_id -HAVING - sum(value_2) > 0 -ORDER BY commits DESC -LIMIT 10; - QUERY PLAN ---------------------------------------------------------------------- - Limit - -> Sort - Sort Key: remote_scan.commits DESC - -> Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Limit - -> Sort - Sort Key: (sum(daily_uniques.value_2)) DESC - -> WindowAgg - -> Sort - Sort Key: daily_uniques.user_id, (sum(daily_uniques.value_2)) DESC - -> HashAggregate - Group Key: daily_uniques.user_id - Filter: (sum(daily_uniques.value_2) > '0'::double precision) - -> Seq Scan on daily_uniques_xxxxxxx daily_uniques -(18 rows) - -DROP TABLE daily_uniques; --- Partition by reference table column joined to distribution column -SELECT DISTINCT value_2, array_agg(rnk ORDER BY rnk) FROM ( -SELECT events_table.value_2, sum(uref.k_no) OVER (PARTITION BY uref.id) AS rnk -FROM events_table -JOIN users_ref_test_table uref ON uref.id = events_table.user_id) sq -GROUP BY 1 ORDER BY 1; - value_2 | array_agg ---------------------------------------------------------------------- - 0 | {686,686,816,816,987,987,1104} - 1 | {500,500,675,675,675,686,686,816,816,816,987,987,987,987,987,1104,1104,1104,1104,1104,1104,1104} - 2 | {500,500,500,500,675,675,675,675,675,686,686,686,686,816,816,816,816,816,987,987,987,987,987,987,987,1104,1104,1104,1104,1104,1104} - 3 | {500,500,500,500,675,686,686,686,816,816,987,987,987,1104,1104,1104,1104,1104} - 4 | {675,675,675,675,686,686,686,816,816,816,987,987,1104,1104} - 5 | {675,675,816,816,987,987,1104,1104,1104} -(6 rows) - --- https://github.com/citusdata/citus/issues/3754 -select null = sum(null::int2) over () -from public.users_table as ut limit 1; - ?column? ---------------------------------------------------------------------- - -(1 row) - --- verify that this doesn't crash with DEBUG4 -SET log_min_messages TO DEBUG4; -SELECT - user_id, max(value_1) OVER (PARTITION BY user_id, MIN(value_2)) -FROM ( - SELECT - DISTINCT us.user_id, us.value_2, value_1, random() as r1 - FROM - users_table as us, events_table - WHERE - us.user_id = events_table.user_id AND event_type IN (1,2) - ORDER BY - user_id, value_2 - ) s -GROUP BY - 1, value_1 -ORDER BY - 2 DESC, 1; - user_id | max ---------------------------------------------------------------------- - 1 | 5 - 3 | 5 - 3 | 5 - 4 | 5 - 5 | 5 - 5 | 5 - 6 | 5 - 6 | 5 - 1 | 4 - 2 | 4 - 3 | 4 - 3 | 4 - 3 | 4 - 4 | 4 - 4 | 4 - 5 | 4 - 5 | 4 - 1 | 3 - 2 | 3 - 2 | 3 - 2 | 3 - 6 | 3 - 2 | 2 - 4 | 2 - 4 | 2 - 4 | 2 - 6 | 2 - 1 | 1 - 3 | 1 - 5 | 1 - 6 | 1 - 5 | 0 -(32 rows) - diff --git a/src/test/regress/sql/window_functions.sql b/src/test/regress/sql/window_functions.sql index 5c94515a9..de936c95c 100644 --- a/src/test/regress/sql/window_functions.sql +++ b/src/test/regress/sql/window_functions.sql @@ -1,6 +1,11 @@ +-- +-- WINDOW_FUNCTIONS -- =================================================================== -- test top level window functions that are pushdownable -- =================================================================== +-- This test file has an alternative output because of use of +-- incremental sort in some explain outputs in PG13 +-- -- a very simple window function with an aggregate and a window function -- distribution column is on the partition by clause