We previously had a comprehensive prohibition on functions. With this

commit, restrictions on functions are relaxed, allowing the use of
functions in the source-query of MERGE.
pull/7065/head
Teja Mupparti 2023-07-10 09:43:31 -07:00
parent 87dc88f837
commit 90b674d29e
8 changed files with 365 additions and 60 deletions

View File

@ -21,6 +21,7 @@
#include "distributed/citus_clauses.h"
#include "distributed/citus_custom_scan.h"
#include "distributed/citus_ruleutils.h"
#include "distributed/insert_select_planner.h"
#include "distributed/listutils.h"
#include "distributed/local_distributed_join_planner.h"
@ -701,13 +702,6 @@ ErrorIfRepartitionMergeNotSupported(Oid targetRelationId, Query *mergeQuery,
"Postgres table is not yet supported")));
}
queryRteListProperties = GetRTEListPropertiesForQuery(sourceQuery);
if (!queryRteListProperties->hasCitusTable)
{
ereport(ERROR, (errmsg("To MERGE into a distributed table, source must "
"be Citus table(s)")));
}
/*
* Sub-queries and CTEs are not allowed in actions and ON clause
*/
@ -722,6 +716,14 @@ ErrorIfRepartitionMergeNotSupported(Oid targetRelationId, Query *mergeQuery,
"routable query")));
}
/*
* Sequences are not supported
*/
if (FindNodeMatchingCheckFunction((Node *) mergeQuery, IsNextValExpr))
{
ereport(ERROR, (errmsg("Distributed MERGE doesn't support sequences yet")));
}
MergeAction *action = NULL;
foreach_ptr(action, mergeQuery->mergeActionList)
{
@ -1076,6 +1078,17 @@ DeferErrorIfRoutableMergeNotSupported(Query *query, List *rangeTableList,
"repartitioning")));
return deferredError;
}
/*
* Check for any volatile or stable functions
*/
if (contain_mutable_functions((Node *) query))
{
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
"non-IMMUTABLE functions are not routable, "
"try repartitioning", NULL, NULL);
}
return NULL;
}
@ -1089,20 +1102,6 @@ DeferErrorIfRoutableMergeNotSupported(Query *query, List *rangeTableList,
static void
ErrorIfMergeQueryQualAndTargetListNotSupported(Oid targetRelationId, Query *originalQuery)
{
/*
* TODO: For now, we are adding an exception where any volatile or stable
* functions are not allowed in the MERGE query, but this will become too
* restrictive as this will prevent many useful and simple cases, such as,
* INSERT VALUES(ts::timestamp), bigserial column inserts etc. But without
* this restriction, we have a potential danger of some of the function(s)
* getting executed at the worker which will result in incorrect behavior.
*/
if (contain_mutable_functions((Node *) originalQuery))
{
ereport(ERROR, (errmsg("non-IMMUTABLE functions are not yet "
"supported in MERGE sql with distributed tables")));
}
DeferredErrorMessage *deferredError =
MergeQualAndTargetListFunctionsSupported(targetRelationId,
originalQuery,

View File

@ -25,6 +25,7 @@
#include "optimizer/optimizer.h"
#include "optimizer/planmain.h"
#include "utils/datum.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
@ -537,3 +538,30 @@ FixFunctionArgumentsWalker(Node *expr, void *context)
return expression_tree_walker(expr, FixFunctionArgumentsWalker, NULL);
}
/*
* IsNextValExpr checks if the given node either contains a sequence
* column or contains nextval() function.
*/
bool
IsNextValExpr(Node *node)
{
/* check if the node contains an identity column */
if (IsA(node, NextValueExpr))
{
return true;
}
/* check if the node contains call to 'nextval' */
if (IsA(node, FuncExpr))
{
FuncExpr *funcExpr = (FuncExpr *) node;
if (funcExpr->funcid == F_NEXTVAL)
{
return true;
}
}
return false;
}

View File

@ -50,5 +50,6 @@ extern Node * PartiallyEvaluateExpression(Node *expression,
coordinatorEvaluationContext);
extern bool CitusIsVolatileFunction(Node *node);
extern bool CitusIsMutableFunction(Node *node);
extern bool IsNextValExpr(Node *node);
#endif /* CITUS_CLAUSES_H */

View File

@ -2355,7 +2355,7 @@ USING source_serial sdn
ON sda.id = sdn.id
WHEN NOT matched THEN
INSERT (id, z) VALUES (id, z);
ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
ERROR: functions used in MERGE actions on distributed tables must not be VOLATILE
SELECT count(*) from source_serial;
count
---------------------------------------------------------------------
@ -3131,9 +3131,115 @@ SELECT * FROM target_6785 ORDER BY 1;
1 | 5 |
(1 row)
-- Get the result from Postgres tables
CREATE TABLE dest_tab(tid int, val int, ser bigserial, tm timestamp);
CREATE TABLE src_tab(sid int, val int);
SELECT setval('dest_tab_ser_seq', 1, true);
setval
---------------------------------------------------------------------
1
(1 row)
INSERT INTO src_tab SELECT i,i FROM generate_series(0,1000)i;
CREATE OR REPLACE FUNCTION src_tab_result()
RETURNS TABLE (id INT, val INT, bs bigint, tm timestamp) AS
$$
BEGIN
RETURN QUERY SELECT src_tab.*, nextval('dest_tab_ser_seq'::regclass), now()::timestamp FROM src_tab;
END;
$$
LANGUAGE plpgsql;
MERGE INTO dest_tab t
USING (SELECT * FROM src_tab_result()) s
ON (s.id = t.tid)
WHEN MATCHED THEN
UPDATE SET val = t.val + 1
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val, s.bs, s.tm);
-- Save the postgres result
CREATE TABLE postgres_serial AS SELECT MIN(ser), MAX(ser), AVG(ser) FROM dest_tab ;
-- Cleanup the data and reset the sequence
TRUNCATE dest_tab;
TRUNCATE src_tab;
SELECT setval('dest_tab_ser_seq', 1, true);
setval
---------------------------------------------------------------------
1
(1 row)
INSERT INTO src_tab SELECT i,i FROM generate_series(0,1000)i;
-- Now, make them Citus tables
SELECT create_distributed_table('dest_tab', 'tid');
create_distributed_table
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('src_tab', 'sid', colocate_with => 'dest_tab');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_schema.src_tab$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
MERGE INTO dest_tab t
USING (SELECT * FROM src_tab_result()) s
ON (s.id = t.tid)
WHEN MATCHED THEN
UPDATE SET val = t.val + 1
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val, s.bs, s.tm);
-- Compare the sequence value
SELECT
((SELECT MAX(max) FROM postgres_serial) = (SELECT MAX(ser) FROM dest_tab)
AND
(SELECT MIN(min) FROM postgres_serial) = (SELECT MIN(ser) FROM dest_tab)
AND
(SELECT AVG(avg) FROM postgres_serial) = (SELECT AVG(ser) FROM dest_tab)) AS all_equal;
all_equal
---------------------------------------------------------------------
t
(1 row)
--
-- Source without a table
--
MERGE INTO dest_tab t
USING (VALUES (1, -1), (2, -1), (3, -1)) as s (sid, val)
ON t.tid = s.sid
WHEN MATCHED THEN
UPDATE SET val = s.val
WHEN NOT MATCHED THEN
DO NOTHING;
-- Three rows with 'val = -1'
SELECT COUNT(*) FROM dest_tab where val = -1;
count
---------------------------------------------------------------------
3
(1 row)
--
-- Error and Unsupported scenarios
--
-- Test STABLE function with column reference
MERGE INTO dest_tab t
USING (SELECT *, now() as nw FROM src_tab) s
ON (s.sid = t.tid)
WHEN MATCHED THEN
UPDATE SET val = t.val + 1
WHEN NOT MATCHED THEN
INSERT VALUES(s.sid, s.val, 1, nw);
ERROR: STABLE functions used in MERGE queries cannot be called with column references
-- Test sequences
MERGE INTO target_serial sda
USING (select id, z, nextval('target_serial_d_seq'::regclass) as ser from source_serial) sdn
ON sda.id = sdn.id
WHEN NOT matched THEN
INSERT (id, z, d) VALUES (id, z, ser);
ERROR: Distributed MERGE doesn't support sequences yet
-- Test explain analyze with repartition
EXPLAIN ANALYZE
MERGE INTO demo_distributed t
@ -3144,15 +3250,6 @@ WHEN MATCHED THEN
WHEN NOT MATCHED THEN
INSERT VALUES(id2, s3);
ERROR: EXPLAIN ANALYZE is currently not supported for MERGE INTO ... commands with repartitioning
-- Source without a table
MERGE INTO target_cj t
USING (VALUES (1, 1), (2, 1), (3, 3)) as s (sid, val)
ON t.tid = s.sid AND t.tid = 2
WHEN MATCHED THEN
UPDATE SET val = s.val
WHEN NOT MATCHED THEN
DO NOTHING;
ERROR: To MERGE into a distributed table, source must be Citus table(s)
-- Incomplete source
MERGE INTO target_cj t
USING (source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = val2) s
@ -3362,7 +3459,7 @@ MERGE INTO t1 USING (SELECT * FROM s1 WHERE true) s1 ON
t1.id = s1.id AND s1.id = 2
WHEN matched THEN
UPDATE SET id = s1.id, val = random();
ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
ERROR: functions used in MERGE actions on distributed tables must not be VOLATILE
-- Test STABLE function
CREATE FUNCTION add_s(integer, integer) RETURNS integer
AS 'select $1 + $2;'
@ -3372,14 +3469,14 @@ MERGE INTO t1
USING s1 ON t1.id = s1.id
WHEN NOT MATCHED THEN
INSERT VALUES(s1.id, add_s(s1.val, 2));
ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
ERROR: STABLE functions used in MERGE queries cannot be called with column references
-- Test preventing "ON" join condition from writing to the database
BEGIN;
MERGE INTO t1
USING s1 ON t1.id = s1.id AND t1.id = 2 AND (merge_when_and_write())
WHEN MATCHED THEN
UPDATE SET val = t1.val + s1.val;
ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
ERROR: functions used in the ON clause of MERGE queries on distributed tables must not be VOLATILE
ROLLBACK;
-- Test preventing WHEN clause(s) from writing to the database
BEGIN;
@ -3387,7 +3484,7 @@ MERGE INTO t1
USING s1 ON t1.id = s1.id AND t1.id = 2
WHEN MATCHED AND (merge_when_and_write()) THEN
UPDATE SET val = t1.val + s1.val;
ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
ERROR: functions used in the WHEN clause of MERGE queries on distributed tables must not be VOLATILE
ROLLBACK;
-- Joining on non-partition columns with CTE source, but INSERT incorrect column
WITH s1_res AS (
@ -3633,13 +3730,13 @@ SELECT create_distributed_table('dist_source', 'id', colocate_with => 'none');
(1 row)
MERGE INTO dist_target
USING (SELECT 100 id) AS source
ON dist_target.id = source.id AND dist_target.val = 'const'
USING (SELECT 100 id, 101 val) AS source
ON dist_target.id = source.val AND dist_target.val = 'const'
WHEN MATCHED THEN
UPDATE SET val = 'source'
WHEN NOT MATCHED THEN
INSERT VALUES(source.id, 'source');
ERROR: To MERGE into a distributed table, source must be Citus table(s)
ERROR: MERGE INSERT must use the source's joining column for target's distribution column
-- Non-hash distributed tables (append/range).
CREATE VIEW show_tables AS
SELECT logicalrelid, partmethod
@ -3958,7 +4055,7 @@ CONTEXT: SQL statement "SELECT citus_drop_all_shards(v_obj.objid, v_obj.schema_
PL/pgSQL function citus_drop_trigger() line XX at PERFORM
DROP FUNCTION merge_when_and_write();
DROP SCHEMA merge_schema CASCADE;
NOTICE: drop cascades to 103 other objects
NOTICE: drop cascades to 107 other objects
DETAIL: drop cascades to function insert_data()
drop cascades to table local_local
drop cascades to table target
@ -4050,13 +4147,13 @@ drop cascades to table target_filter
drop cascades to function load_filter()
drop cascades to table source_6785
drop cascades to table target_6785
drop cascades to table dest_tab
drop cascades to table src_tab
drop cascades to function src_tab_result()
drop cascades to table postgres_serial
drop cascades to function add_s(integer,integer)
drop cascades to table pg
drop cascades to table t1_4000174
drop cascades to table s1_4000175
drop cascades to table t1_4000182
drop cascades to table s1_4000183
drop cascades to table t1
drop cascades to table s1
drop cascades to table dist_target
drop cascades to table dist_source
drop cascades to view show_tables
and 3 other objects (see server log for list)
and 7 other objects (see server log for list)

View File

@ -88,6 +88,14 @@ CREATE OR REPLACE FUNCTION compare_data() RETURNS VOID SET search_path TO merge_
SELECT check_data('pg_target', 'val', 'citus_target', 'val');
$$
LANGUAGE SQL;
CREATE OR REPLACE FUNCTION source_result(table_name text)
RETURNS TABLE (id int, val int, const int) AS
$$
BEGIN
RETURN QUERY EXECUTE format('SELECT * FROM %I', table_name);
END;
$$
LANGUAGE plpgsql;
--
-- Target and source are distributed, and non-colocated
--
@ -1336,13 +1344,71 @@ SQL function "compare_data" statement 2
(1 row)
--
-- Target is distributed, and source is a function returning rows
--
SET client_min_messages TO WARNING;
SELECT cleanup_data();
cleanup_data
---------------------------------------------------------------------
(1 row)
RESET client_min_messages;
SELECT setup_data();
setup_data
---------------------------------------------------------------------
(1 row)
SELECT create_distributed_table('citus_target', 'id');
NOTICE: Copying data from local table...
NOTICE: copying the data has completed
DETAIL: The local data in the table is no longer visible, but is still on disk.
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$merge_repartition1_schema.citus_target$$)
create_distributed_table
---------------------------------------------------------------------
(1 row)
MERGE INTO pg_target t
USING (SELECT * FROM source_result('pg_source')) s
ON t.id = s.id
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
MERGE INTO citus_target t
USING (SELECT * FROM source_result('citus_source')) s
ON t.id = s.id
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
SELECT compare_data();
NOTICE: The average of pg_target.id is equal to citus_target.id
CONTEXT: PL/pgSQL function check_data(text,text,text,text) line XX at RAISE
SQL function "compare_data" statement 1
NOTICE: The average of pg_target.val is equal to citus_target.val
CONTEXT: PL/pgSQL function check_data(text,text,text,text) line XX at RAISE
SQL function "compare_data" statement 2
compare_data
---------------------------------------------------------------------
(1 row)
DROP SCHEMA merge_repartition1_schema CASCADE;
NOTICE: drop cascades to 8 other objects
NOTICE: drop cascades to 9 other objects
DETAIL: drop cascades to table pg_target
drop cascades to table pg_source
drop cascades to function cleanup_data()
drop cascades to function setup_data()
drop cascades to function check_data(text,text,text,text)
drop cascades to function compare_data()
drop cascades to function source_result(text)
drop cascades to table citus_target
drop cascades to table citus_source

View File

@ -911,7 +911,7 @@ MERGE INTO wq_target t
USING wq_source s ON t.tid = s.sid
WHEN MATCHED AND (merge_when_and_write()) THEN
UPDATE SET balance = t.balance + s.balance;
ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
ERROR: functions used in the WHEN clause of MERGE queries on distributed tables must not be VOLATILE
ROLLBACK;
-- Test preventing ON condition from writing to the database
BEGIN;
@ -919,7 +919,7 @@ MERGE INTO wq_target t
USING wq_source s ON t.tid = s.sid AND (merge_when_and_write())
WHEN MATCHED THEN
UPDATE SET balance = t.balance + s.balance;
ERROR: non-IMMUTABLE functions are not yet supported in MERGE sql with distributed tables
ERROR: functions used in the ON clause of MERGE queries on distributed tables must not be VOLATILE
ROLLBACK;
drop function merge_when_and_write();
DROP TABLE wq_target, wq_source;

View File

@ -1951,10 +1951,93 @@ RESET client_min_messages;
SELECT * FROM target_6785 ORDER BY 1;
-- Get the result from Postgres tables
CREATE TABLE dest_tab(tid int, val int, ser bigserial, tm timestamp);
CREATE TABLE src_tab(sid int, val int);
SELECT setval('dest_tab_ser_seq', 1, true);
INSERT INTO src_tab SELECT i,i FROM generate_series(0,1000)i;
CREATE OR REPLACE FUNCTION src_tab_result()
RETURNS TABLE (id INT, val INT, bs bigint, tm timestamp) AS
$$
BEGIN
RETURN QUERY SELECT src_tab.*, nextval('dest_tab_ser_seq'::regclass), now()::timestamp FROM src_tab;
END;
$$
LANGUAGE plpgsql;
MERGE INTO dest_tab t
USING (SELECT * FROM src_tab_result()) s
ON (s.id = t.tid)
WHEN MATCHED THEN
UPDATE SET val = t.val + 1
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val, s.bs, s.tm);
-- Save the postgres result
CREATE TABLE postgres_serial AS SELECT MIN(ser), MAX(ser), AVG(ser) FROM dest_tab ;
-- Cleanup the data and reset the sequence
TRUNCATE dest_tab;
TRUNCATE src_tab;
SELECT setval('dest_tab_ser_seq', 1, true);
INSERT INTO src_tab SELECT i,i FROM generate_series(0,1000)i;
-- Now, make them Citus tables
SELECT create_distributed_table('dest_tab', 'tid');
SELECT create_distributed_table('src_tab', 'sid', colocate_with => 'dest_tab');
MERGE INTO dest_tab t
USING (SELECT * FROM src_tab_result()) s
ON (s.id = t.tid)
WHEN MATCHED THEN
UPDATE SET val = t.val + 1
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val, s.bs, s.tm);
-- Compare the sequence value
SELECT
((SELECT MAX(max) FROM postgres_serial) = (SELECT MAX(ser) FROM dest_tab)
AND
(SELECT MIN(min) FROM postgres_serial) = (SELECT MIN(ser) FROM dest_tab)
AND
(SELECT AVG(avg) FROM postgres_serial) = (SELECT AVG(ser) FROM dest_tab)) AS all_equal;
--
-- Source without a table
--
MERGE INTO dest_tab t
USING (VALUES (1, -1), (2, -1), (3, -1)) as s (sid, val)
ON t.tid = s.sid
WHEN MATCHED THEN
UPDATE SET val = s.val
WHEN NOT MATCHED THEN
DO NOTHING;
-- Three rows with 'val = -1'
SELECT COUNT(*) FROM dest_tab where val = -1;
--
-- Error and Unsupported scenarios
--
-- Test STABLE function with column reference
MERGE INTO dest_tab t
USING (SELECT *, now() as nw FROM src_tab) s
ON (s.sid = t.tid)
WHEN MATCHED THEN
UPDATE SET val = t.val + 1
WHEN NOT MATCHED THEN
INSERT VALUES(s.sid, s.val, 1, nw);
-- Test sequences
MERGE INTO target_serial sda
USING (select id, z, nextval('target_serial_d_seq'::regclass) as ser from source_serial) sdn
ON sda.id = sdn.id
WHEN NOT matched THEN
INSERT (id, z, d) VALUES (id, z, ser);
-- Test explain analyze with repartition
EXPLAIN ANALYZE
@ -1966,15 +2049,6 @@ WHEN MATCHED THEN
WHEN NOT MATCHED THEN
INSERT VALUES(id2, s3);
-- Source without a table
MERGE INTO target_cj t
USING (VALUES (1, 1), (2, 1), (3, 3)) as s (sid, val)
ON t.tid = s.sid AND t.tid = 2
WHEN MATCHED THEN
UPDATE SET val = s.val
WHEN NOT MATCHED THEN
DO NOTHING;
-- Incomplete source
MERGE INTO target_cj t
USING (source_cj1 s1 INNER JOIN source_cj2 s2 ON sid1 = val2) s
@ -2298,8 +2372,8 @@ CREATE TABLE dist_source(id int, val varchar);
SELECT create_distributed_table('dist_source', 'id', colocate_with => 'none');
MERGE INTO dist_target
USING (SELECT 100 id) AS source
ON dist_target.id = source.id AND dist_target.val = 'const'
USING (SELECT 100 id, 101 val) AS source
ON dist_target.id = source.val AND dist_target.val = 'const'
WHEN MATCHED THEN
UPDATE SET val = 'source'
WHEN NOT MATCHED THEN

View File

@ -80,6 +80,15 @@ CREATE OR REPLACE FUNCTION compare_data() RETURNS VOID SET search_path TO merge_
$$
LANGUAGE SQL;
CREATE OR REPLACE FUNCTION source_result(table_name text)
RETURNS TABLE (id int, val int, const int) AS
$$
BEGIN
RETURN QUERY EXECUTE format('SELECT * FROM %I', table_name);
END;
$$
LANGUAGE plpgsql;
--
-- Target and source are distributed, and non-colocated
--
@ -577,4 +586,35 @@ WHEN NOT MATCHED THEN
SELECT compare_data();
--
-- Target is distributed, and source is a function returning rows
--
SET client_min_messages TO WARNING;
SELECT cleanup_data();
RESET client_min_messages;
SELECT setup_data();
SELECT create_distributed_table('citus_target', 'id');
MERGE INTO pg_target t
USING (SELECT * FROM source_result('pg_source')) s
ON t.id = s.id
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
MERGE INTO citus_target t
USING (SELECT * FROM source_result('citus_source')) s
ON t.id = s.id
WHEN MATCHED AND t.id <= 7500 THEN
UPDATE SET val = s.val + 1
WHEN MATCHED THEN
DELETE
WHEN NOT MATCHED THEN
INSERT VALUES(s.id, s.val);
SELECT compare_data();
DROP SCHEMA merge_repartition1_schema CASCADE;