mirror of https://github.com/citusdata/citus.git
388 lines
12 KiB
Plaintext
388 lines
12 KiB
Plaintext
CREATE SCHEMA am_columnar_join;
|
|
SET search_path TO am_columnar_join;
|
|
CREATE TABLE users (id int, name text) USING columnar;
|
|
INSERT INTO users SELECT a, 'name' || a FROM generate_series(0,30-1) AS a;
|
|
CREATE TABLE things (id int, user_id int, name text) USING columnar;
|
|
INSERT INTO things SELECT a, a % 30, 'thing' || a FROM generate_series(1,300) AS a;
|
|
-- force the nested loop to rescan the table
|
|
SET enable_material TO off;
|
|
SET enable_hashjoin TO off;
|
|
SET enable_mergejoin TO off;
|
|
SELECT count(*)
|
|
FROM users
|
|
JOIN things ON (users.id = things.user_id)
|
|
WHERE things.id > 290;
|
|
count
|
|
---------------------------------------------------------------------
|
|
10
|
|
(1 row)
|
|
|
|
-- verify the join uses a nested loop to trigger the rescan behaviour
|
|
EXPLAIN (COSTS OFF)
|
|
SELECT count(*)
|
|
FROM users
|
|
JOIN things ON (users.id = things.user_id)
|
|
WHERE things.id > 299990;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Aggregate
|
|
-> Nested Loop
|
|
Join Filter: (users.id = things.user_id)
|
|
-> Custom Scan (ColumnarScan) on things
|
|
Filter: (id > 299990)
|
|
Columnar Projected Columns: id, user_id
|
|
Columnar Chunk Group Filters: (id > 299990)
|
|
-> Custom Scan (ColumnarScan) on users
|
|
Columnar Projected Columns: id
|
|
(9 rows)
|
|
|
|
EXPLAIN (COSTS OFF)
|
|
SELECT u1.id, u2.id, COUNT(u2.*)
|
|
FROM users u1
|
|
JOIN users u2 ON (u1.id::text = u2.name)
|
|
WHERE u2.id > 299990
|
|
GROUP BY u1.id, u2.id;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
HashAggregate
|
|
Group Key: u1.id, u2.id
|
|
-> Nested Loop
|
|
Join Filter: ((u1.id)::text = u2.name)
|
|
-> Custom Scan (ColumnarScan) on users u2
|
|
Filter: (id > 299990)
|
|
Columnar Projected Columns: id, name
|
|
Columnar Chunk Group Filters: (id > 299990)
|
|
-> Custom Scan (ColumnarScan) on users u1
|
|
Columnar Projected Columns: id
|
|
(10 rows)
|
|
|
|
-- ================================
|
|
-- join COLUMNAR with HEAP
|
|
-- ================================
|
|
-- Left Join with Mixed Table Types
|
|
CREATE TABLE tbl_left_heap1 (id integer);
|
|
CREATE TABLE tbl_left_heap2 (id integer);
|
|
CREATE TABLE tbl_left_columnar (id integer) USING columnar;
|
|
INSERT INTO tbl_left_heap1 VALUES (1), (2), (3), (4);
|
|
INSERT INTO tbl_left_heap2 VALUES (2), (3), (5), (6);
|
|
INSERT INTO tbl_left_columnar VALUES (3), (5), (7);
|
|
SELECT *
|
|
FROM tbl_left_heap1 h1
|
|
LEFT JOIN tbl_left_heap2 h2 ON h1.id = h2.id
|
|
LEFT JOIN tbl_left_columnar c ON h2.id = c.id
|
|
ORDER BY 1;
|
|
id | id | id
|
|
---------------------------------------------------------------------
|
|
1 | |
|
|
2 | 2 |
|
|
3 | 3 | 3
|
|
4 | |
|
|
(4 rows)
|
|
|
|
-- Left Join with Filter
|
|
CREATE TABLE tbl_left_filter_heap1 (id integer);
|
|
CREATE TABLE tbl_left_filter_heap2 (id integer);
|
|
CREATE TABLE tbl_left_filter_columnar (id integer) USING columnar;
|
|
INSERT INTO tbl_left_filter_heap1 VALUES (1), (2), (3), (4);
|
|
INSERT INTO tbl_left_filter_heap2 VALUES (2), (3), (5), (6);
|
|
INSERT INTO tbl_left_filter_columnar VALUES (3), (5), (7);
|
|
SELECT *
|
|
FROM tbl_left_filter_heap1 h1
|
|
LEFT JOIN tbl_left_filter_heap2 h2 ON h1.id = h2.id
|
|
LEFT JOIN tbl_left_filter_columnar c ON h2.id = c.id
|
|
WHERE h1.id > 2
|
|
ORDER BY 1;
|
|
id | id | id
|
|
---------------------------------------------------------------------
|
|
3 | 3 | 3
|
|
4 | |
|
|
(2 rows)
|
|
|
|
-- Right Join with Mixed Table Types
|
|
CREATE TABLE tbl_right_heap1 (id integer);
|
|
CREATE TABLE tbl_right_heap2 (id integer);
|
|
CREATE TABLE tbl_right_columnar (id integer) USING columnar;
|
|
INSERT INTO tbl_right_heap1 VALUES (1), (2), (3), (4);
|
|
INSERT INTO tbl_right_heap2 VALUES (2), (3), (5), (6);
|
|
INSERT INTO tbl_right_columnar VALUES (3), (5), (7);
|
|
SELECT *
|
|
FROM tbl_right_heap1 h1
|
|
RIGHT JOIN tbl_right_heap2 h2 ON h1.id = h2.id
|
|
RIGHT JOIN tbl_right_columnar c ON h2.id = c.id
|
|
ORDER BY 3;
|
|
id | id | id
|
|
---------------------------------------------------------------------
|
|
3 | 3 | 3
|
|
| 5 | 5
|
|
| | 7
|
|
(3 rows)
|
|
|
|
-- Right Join with Filters
|
|
CREATE TABLE tbl_right_filter_heap1 (id integer);
|
|
CREATE TABLE tbl_right_filter_heap2 (id integer);
|
|
CREATE TABLE tbl_right_filter_columnar (id integer) USING columnar;
|
|
INSERT INTO tbl_right_filter_heap1 VALUES (1), (2), (3), (4);
|
|
INSERT INTO tbl_right_filter_heap2 VALUES (2), (3), (5), (6);
|
|
INSERT INTO tbl_right_filter_columnar VALUES (3), (5), (7);
|
|
SELECT *
|
|
FROM tbl_right_filter_heap1 h1
|
|
RIGHT JOIN tbl_right_filter_heap2 h2 ON h1.id = h2.id
|
|
RIGHT JOIN tbl_right_filter_columnar c ON h2.id = c.id
|
|
WHERE c.id < 6
|
|
ORDER BY 3;
|
|
id | id | id
|
|
---------------------------------------------------------------------
|
|
3 | 3 | 3
|
|
| 5 | 5
|
|
(2 rows)
|
|
|
|
-- Inner Join with Mixed Table Types
|
|
CREATE TABLE tbl_heap1 (id serial primary key, val integer);
|
|
CREATE TABLE tbl_heap2 (id serial primary key, val integer);
|
|
CREATE TABLE tbl_columnar (id integer, val integer) USING columnar;
|
|
INSERT INTO tbl_heap1 (val) SELECT generate_series(1, 100);
|
|
INSERT INTO tbl_heap2 (val) SELECT generate_series(50, 150);
|
|
INSERT INTO tbl_columnar SELECT generate_series(75, 125), generate_series(200, 250);
|
|
SELECT h1.id, h1.val, h2.val, c.val
|
|
FROM tbl_heap1 h1
|
|
JOIN tbl_heap2 h2 ON h1.val = h2.val
|
|
JOIN tbl_columnar c ON h1.val = c.id
|
|
ORDER BY 1;
|
|
id | val | val | val
|
|
---------------------------------------------------------------------
|
|
75 | 75 | 75 | 200
|
|
76 | 76 | 76 | 201
|
|
77 | 77 | 77 | 202
|
|
78 | 78 | 78 | 203
|
|
79 | 79 | 79 | 204
|
|
80 | 80 | 80 | 205
|
|
81 | 81 | 81 | 206
|
|
82 | 82 | 82 | 207
|
|
83 | 83 | 83 | 208
|
|
84 | 84 | 84 | 209
|
|
85 | 85 | 85 | 210
|
|
86 | 86 | 86 | 211
|
|
87 | 87 | 87 | 212
|
|
88 | 88 | 88 | 213
|
|
89 | 89 | 89 | 214
|
|
90 | 90 | 90 | 215
|
|
91 | 91 | 91 | 216
|
|
92 | 92 | 92 | 217
|
|
93 | 93 | 93 | 218
|
|
94 | 94 | 94 | 219
|
|
95 | 95 | 95 | 220
|
|
96 | 96 | 96 | 221
|
|
97 | 97 | 97 | 222
|
|
98 | 98 | 98 | 223
|
|
99 | 99 | 99 | 224
|
|
100 | 100 | 100 | 225
|
|
(26 rows)
|
|
|
|
-- Outer Join with NULLs
|
|
CREATE TABLE tbl_null_heap (id integer, val integer);
|
|
CREATE TABLE tbl_null_columnar (id integer, val integer) USING columnar;
|
|
INSERT INTO tbl_null_heap VALUES (1, NULL), (2, 20), (3, 30);
|
|
INSERT INTO tbl_null_columnar VALUES (1, 100), (NULL, 200), (3, 300);
|
|
SELECT nh.id, nh.val, nc.val
|
|
FROM tbl_null_heap nh
|
|
FULL OUTER JOIN tbl_null_columnar nc ON nh.id = nc.id
|
|
ORDER BY 1;
|
|
id | val | val
|
|
---------------------------------------------------------------------
|
|
1 | | 100
|
|
2 | 20 |
|
|
3 | 30 | 300
|
|
| | 200
|
|
(4 rows)
|
|
|
|
-- Join with Aggregates
|
|
CREATE TABLE tbl_agg_heap (id serial primary key, val integer);
|
|
CREATE TABLE tbl_agg_columnar (id integer, val integer) USING columnar;
|
|
INSERT INTO tbl_agg_heap (val) SELECT generate_series(1, 100);
|
|
INSERT INTO tbl_agg_columnar SELECT generate_series(50, 150), generate_series(200, 300);
|
|
SELECT ah.val AS heap_val, COUNT(ac.val) AS columnar_count
|
|
FROM tbl_agg_heap ah
|
|
LEFT JOIN tbl_agg_columnar ac ON ah.val = ac.id
|
|
GROUP BY ah.val
|
|
ORDER BY ah.val;
|
|
heap_val | columnar_count
|
|
---------------------------------------------------------------------
|
|
1 | 0
|
|
2 | 0
|
|
3 | 0
|
|
4 | 0
|
|
5 | 0
|
|
6 | 0
|
|
7 | 0
|
|
8 | 0
|
|
9 | 0
|
|
10 | 0
|
|
11 | 0
|
|
12 | 0
|
|
13 | 0
|
|
14 | 0
|
|
15 | 0
|
|
16 | 0
|
|
17 | 0
|
|
18 | 0
|
|
19 | 0
|
|
20 | 0
|
|
21 | 0
|
|
22 | 0
|
|
23 | 0
|
|
24 | 0
|
|
25 | 0
|
|
26 | 0
|
|
27 | 0
|
|
28 | 0
|
|
29 | 0
|
|
30 | 0
|
|
31 | 0
|
|
32 | 0
|
|
33 | 0
|
|
34 | 0
|
|
35 | 0
|
|
36 | 0
|
|
37 | 0
|
|
38 | 0
|
|
39 | 0
|
|
40 | 0
|
|
41 | 0
|
|
42 | 0
|
|
43 | 0
|
|
44 | 0
|
|
45 | 0
|
|
46 | 0
|
|
47 | 0
|
|
48 | 0
|
|
49 | 0
|
|
50 | 1
|
|
51 | 1
|
|
52 | 1
|
|
53 | 1
|
|
54 | 1
|
|
55 | 1
|
|
56 | 1
|
|
57 | 1
|
|
58 | 1
|
|
59 | 1
|
|
60 | 1
|
|
61 | 1
|
|
62 | 1
|
|
63 | 1
|
|
64 | 1
|
|
65 | 1
|
|
66 | 1
|
|
67 | 1
|
|
68 | 1
|
|
69 | 1
|
|
70 | 1
|
|
71 | 1
|
|
72 | 1
|
|
73 | 1
|
|
74 | 1
|
|
75 | 1
|
|
76 | 1
|
|
77 | 1
|
|
78 | 1
|
|
79 | 1
|
|
80 | 1
|
|
81 | 1
|
|
82 | 1
|
|
83 | 1
|
|
84 | 1
|
|
85 | 1
|
|
86 | 1
|
|
87 | 1
|
|
88 | 1
|
|
89 | 1
|
|
90 | 1
|
|
91 | 1
|
|
92 | 1
|
|
93 | 1
|
|
94 | 1
|
|
95 | 1
|
|
96 | 1
|
|
97 | 1
|
|
98 | 1
|
|
99 | 1
|
|
100 | 1
|
|
(100 rows)
|
|
|
|
-- Join with Filters
|
|
CREATE TABLE tbl_filter_heap (id integer, val integer);
|
|
CREATE TABLE tbl_filter_columnar (id integer, val integer) USING columnar;
|
|
INSERT INTO tbl_filter_heap SELECT generate_series(1, 100), generate_series(1001, 1100);
|
|
INSERT INTO tbl_filter_columnar SELECT generate_series(90, 120), generate_series(2001, 2031);
|
|
SELECT fh.id, fh.val, fc.val
|
|
FROM tbl_filter_heap fh
|
|
INNER JOIN tbl_filter_columnar fc ON fh.id = fc.id
|
|
WHERE fh.val > 1050 AND fc.val < 2025
|
|
ORDER BY 1;
|
|
id | val | val
|
|
---------------------------------------------------------------------
|
|
90 | 1090 | 2001
|
|
91 | 1091 | 2002
|
|
92 | 1092 | 2003
|
|
93 | 1093 | 2004
|
|
94 | 1094 | 2005
|
|
95 | 1095 | 2006
|
|
96 | 1096 | 2007
|
|
97 | 1097 | 2008
|
|
98 | 1098 | 2009
|
|
99 | 1099 | 2010
|
|
100 | 1100 | 2011
|
|
(11 rows)
|
|
|
|
-- Cross Join
|
|
CREATE TABLE tbl_cross_heap (id integer, val integer);
|
|
CREATE TABLE tbl_cross_columnar (id integer, val integer) USING columnar;
|
|
INSERT INTO tbl_cross_heap VALUES (1, 10), (2, 20), (3, 30);
|
|
INSERT INTO tbl_cross_columnar VALUES (4, 40), (5, 50), (6, 60);
|
|
SELECT h.id AS heap_id, h.val AS heap_val, c.id AS columnar_id, c.val AS columnar_val
|
|
FROM tbl_cross_heap h
|
|
CROSS JOIN tbl_cross_columnar c
|
|
ORDER BY 3,4,1,2;
|
|
heap_id | heap_val | columnar_id | columnar_val
|
|
---------------------------------------------------------------------
|
|
1 | 10 | 4 | 40
|
|
2 | 20 | 4 | 40
|
|
3 | 30 | 4 | 40
|
|
1 | 10 | 5 | 50
|
|
2 | 20 | 5 | 50
|
|
3 | 30 | 5 | 50
|
|
1 | 10 | 6 | 60
|
|
2 | 20 | 6 | 60
|
|
3 | 30 | 6 | 60
|
|
(9 rows)
|
|
|
|
-- Left Join with Mixed Table Types and columnar in the middle
|
|
CREATE TABLE tbl_middle_left_heap1 (id integer);
|
|
CREATE TABLE tbl_middle_left_heap2 (id integer);
|
|
CREATE TABLE tbl_middle_left_columnar (id integer) USING columnar;
|
|
INSERT INTO tbl_middle_left_heap1 VALUES (1), (2), (3), (4);
|
|
INSERT INTO tbl_middle_left_heap2 VALUES (2), (3), (5), (6);
|
|
INSERT INTO tbl_middle_left_columnar VALUES (3), (5), (7);
|
|
EXPLAIN (COSTS OFF)
|
|
SELECT h1.*, h2.*, c.*
|
|
FROM tbl_middle_left_heap1 h1
|
|
LEFT JOIN tbl_middle_left_columnar c ON h1.id = c.id
|
|
LEFT JOIN tbl_middle_left_heap2 h2 ON c.id = h2.id
|
|
ORDER BY 1;
|
|
QUERY PLAN
|
|
---------------------------------------------------------------------
|
|
Sort
|
|
Sort Key: h1.id
|
|
-> Nested Loop Left Join
|
|
Join Filter: (c.id = h2.id)
|
|
-> Nested Loop Left Join
|
|
Join Filter: (h1.id = c.id)
|
|
-> Seq Scan on tbl_middle_left_heap1 h1
|
|
-> Custom Scan (ColumnarScan) on tbl_middle_left_columnar c
|
|
Columnar Projected Columns: id
|
|
-> Seq Scan on tbl_middle_left_heap2 h2
|
|
(10 rows)
|
|
|
|
-- End test case
|
|
SET client_min_messages TO warning;
|
|
DROP SCHEMA am_columnar_join CASCADE; |