diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c index 6fad77974..809300f7c 100644 --- a/src/backend/distributed/planner/multi_router_planner.c +++ b/src/backend/distributed/planner/multi_router_planner.c @@ -2676,7 +2676,6 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError) Oid distributedTableId = ExtractFirstCitusTableId(query); CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(distributedTableId); char partitionMethod = cacheEntry->partitionMethod; - uint32 rangeTableId = 1; List *modifyRouteList = NIL; ListCell *insertValuesCell = NULL; @@ -2714,7 +2713,7 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError) return modifyRouteList; } - Var *partitionColumn = PartitionColumn(distributedTableId, rangeTableId); + Var *partitionColumn = cacheEntry->partitionColumn; /* get full list of insert values and iterate over them to prune */ List *insertValuesList = ExtractInsertValuesList(query, partitionColumn); @@ -2723,8 +2722,38 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError) { InsertValues *insertValues = (InsertValues *) lfirst(insertValuesCell); List *prunedShardIntervalList = NIL; - Expr *partitionValueExpr = (Expr *) strip_implicit_coercions( - (Node *) insertValues->partitionValueExpr); + Node *partitionValueExpr = (Node *) insertValues->partitionValueExpr; + + /* + * We only support constant partition values at this point. Sometimes + * they are wrappend in an implicit coercion though. Most notably + * FuncExpr coercions for casts created with CREATE CAST ... WITH + * FUNCTION .. AS IMPLICIT. To support this first we strip them here. + * Then we do the coercion manually below using + * TransformPartitionRestrictionValue, if the types are not the same. + * + * NOTE: eval_const_expressions below would do some of these removals + * too, but it's unclear if it would do all of them. It is possible + * that there are no cases where this strip_implicit_coercions call is + * really necessary at all, but currently that's hard to rule out. + * So to be on the safe side we call strip_implicit_coercions too, to + * be sure we support as much as possible. + */ + partitionValueExpr = strip_implicit_coercions(partitionValueExpr); + + /* + * By evaluating constant expressions an expression such as 2 + 4 + * will become const 6. That way we can use them as a partition column + * value. Normally the planner evaluates constant expressions, but we + * may be working on the original query tree here. So we do it here + * explicitely before checking that the partition value is a const. + * + * NOTE: We do not use expression_planner here, since all it does + * apart from calling eval_const_expressions is call fix_opfuncids. + * This is not needed here, since it's a no-op for T_Const nodes and we + * error out below in all other cases. + */ + partitionValueExpr = eval_const_expressions(NULL, partitionValueExpr); if (!IsA(partitionValueExpr, Const)) { @@ -2741,21 +2770,20 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError) "column"))); } + /* actually do the coercions that we skipped before, if fails throw an + * error */ + if (partitionValueConst->consttype != partitionColumn->vartype) + { + bool missingOk = false; + partitionValueConst = + TransformPartitionRestrictionValue(partitionColumn, + partitionValueConst, + missingOk); + } + if (partitionMethod == DISTRIBUTE_BY_HASH || partitionMethod == DISTRIBUTE_BY_RANGE) { - Var *distributionKey = cacheEntry->partitionColumn; - - /* handle coercions, if fails throw an error */ - if (partitionValueConst->consttype != distributionKey->vartype) - { - bool missingOk = false; - partitionValueConst = - TransformPartitionRestrictionValue(distributionKey, - partitionValueConst, - missingOk); - } - Datum partitionValue = partitionValueConst->constvalue; ShardInterval *shardInterval = FindShardInterval(partitionValue, cacheEntry); diff --git a/src/backend/distributed/utils/citus_clauses.c b/src/backend/distributed/utils/citus_clauses.c index 442412651..7a9e6e72a 100644 --- a/src/backend/distributed/utils/citus_clauses.c +++ b/src/backend/distributed/utils/citus_clauses.c @@ -118,6 +118,40 @@ PartiallyEvaluateExpression(Node *expression, else if (ShouldEvaluateExpression((Expr *) expression) && ShouldEvaluateFunctions(coordinatorEvaluationContext)) { + /* + * The planner normally evaluates constant expressions, but we may be + * working on the original query tree. We could rely on + * citus_evaluate_expr to evaluate constant expressions, but there are + * certain node types that citus_evaluate_expr does not expect because + * the planner normally replaces them (in particular, CollateExpr). + * Hence, we first evaluate constant expressions using + * eval_const_expressions before continuing. + * + * NOTE: We do not use expression_planner here, since all it does + * apart from calling eval_const_expressions is call fix_opfuncids. + * We do not need this, since that is already called in + * citus_evaluate_expr. So we won't needlessly traverse the expression + * tree by calling it another time. + */ + expression = eval_const_expressions(NULL, expression); + + /* + * It's possible that after evaluating const expressions we + * actually don't need to evaluate this expression anymore e.g: + * + * 1 = 0 AND now() > timestamp '10-10-2000 00:00' + * + * This statement would simply resolve to false, because 1 = 0 is + * false. That's why we now check again if we should evaluate the + * expression and only continue if we still do. + */ + if (!ShouldEvaluateExpression((Expr *) expression)) + { + return (Node *) expression_tree_mutator(expression, + PartiallyEvaluateExpression, + coordinatorEvaluationContext); + } + if (FindNodeCheck(expression, IsVariableExpression)) { /* diff --git a/src/test/regress/expected/multi_data_types.out b/src/test/regress/expected/multi_data_types.out index 124b04c74..db1cfedcc 100644 --- a/src/test/regress/expected/multi_data_types.out +++ b/src/test/regress/expected/multi_data_types.out @@ -142,6 +142,85 @@ SELECT * FROM composite_type_partitioned_table WHERE col = '(7, 8)'::test_compo 6 | (7,8) (1 row) +CREATE TYPE other_composite_type AS ( + i integer, + i2 integer +); +-- Check that casts are correctly done on partition columns +SELECT run_command_on_coordinator_and_workers($cf$ + CREATE CAST (other_composite_type AS test_composite_type) WITH INOUT AS IMPLICIT; +$cf$); + run_command_on_coordinator_and_workers +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO composite_type_partitioned_table VALUES (123, '(123, 456)'::other_composite_type); +SELECT * FROM composite_type_partitioned_table WHERE id = 123; + id | col +--------------------------------------------------------------------- + 123 | (123,456) +(1 row) + +EXPLAIN (ANALYZE TRUE, COSTS FALSE, VERBOSE TRUE, TIMING FALSE, SUMMARY FALSE) +INSERT INTO composite_type_partitioned_table VALUES (123, '(123, 456)'::other_composite_type); + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tasks Shown: All + -> Task + Query: INSERT INTO public.composite_type_partitioned_table_530003 (id, col) VALUES (123, '(123,456)'::public.test_composite_type) + Node: host=localhost port=xxxxx dbname=regression + -> Insert on public.composite_type_partitioned_table_530003 (actual rows=0 loops=1) + -> Result (actual rows=1 loops=1) + Output: 123, '(123,456)'::test_composite_type +(9 rows) + +SELECT run_command_on_coordinator_and_workers($cf$ + DROP CAST (other_composite_type as test_composite_type); +$cf$); + run_command_on_coordinator_and_workers +--------------------------------------------------------------------- + +(1 row) + +SELECT run_command_on_coordinator_and_workers($cf$ + CREATE FUNCTION to_test_composite_type(arg other_composite_type) RETURNS test_composite_type + AS 'select arg::text::test_composite_type;' + LANGUAGE SQL + IMMUTABLE + RETURNS NULL ON NULL INPUT; +$cf$); + run_command_on_coordinator_and_workers +--------------------------------------------------------------------- + +(1 row) + +SELECT run_command_on_coordinator_and_workers($cf$ + CREATE CAST (other_composite_type AS test_composite_type) WITH FUNCTION to_test_composite_type(other_composite_type) AS IMPLICIT; +$cf$); + run_command_on_coordinator_and_workers +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO composite_type_partitioned_table VALUES (456, '(456, 678)'::other_composite_type); +EXPLAIN (ANALYZE TRUE, COSTS FALSE, VERBOSE TRUE, TIMING FALSE, SUMMARY FALSE) +INSERT INTO composite_type_partitioned_table VALUES (123, '(456, 678)'::other_composite_type); + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) (actual rows=0 loops=1) + Task Count: 1 + Tasks Shown: All + -> Task + Query: INSERT INTO public.composite_type_partitioned_table_530000 (id, col) VALUES (123, '(456,678)'::public.other_composite_type) + Node: host=localhost port=xxxxx dbname=regression + -> Insert on public.composite_type_partitioned_table_530000 (actual rows=0 loops=1) + -> Result (actual rows=1 loops=1) + Output: 123, '(456,678)'::test_composite_type +(9 rows) + -- create and distribute a table on enum type column CREATE TYPE bug_status AS ENUM ('new', 'open', 'closed'); CREATE TABLE bugs ( diff --git a/src/test/regress/expected/sqlancer_failures.out b/src/test/regress/expected/sqlancer_failures.out new file mode 100644 index 000000000..84d9ecf10 --- /dev/null +++ b/src/test/regress/expected/sqlancer_failures.out @@ -0,0 +1,112 @@ +CREATE SCHEMA sqlancer_failures; +SET search_path TO sqlancer_failures; +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; +SET citus.next_shard_id TO 92862400; +CREATE TABLE t0 (c0 int, c1 MONEY); +SELECT create_distributed_table('t0', 'c0'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +UPDATE t0 SET c1 = ((0.43107963)::MONEY) WHERE ((upper('-14295774') COLLATE "de_CH +.utf8") SIMILAR TO ''); +ERROR: collation "de_CH +.utf8" for encoding "UTF8" does not exist +UPDATE t0 SET c1 = 1 WHERE '' COLLATE "C" = ''; +CREATE TABLE t1 (c0 text); +SELECT create_distributed_table('t1', 'c0'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO t1 VALUES ('' COLLATE "C"); +CREATE TABLE t2 (c0 text, c1 bool, c2 timestamptz default now()); +SELECT create_distributed_table('t2', 'c0'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO t2 VALUES ('key', '' COLLATE "C" = ''); +CREATE TABLE t3 (c0 text, c1 text, c2 timestamptz default now()); +SELECT create_distributed_table('t3', 'c0'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO t3 VALUES ('key', '' COLLATE "C"); +CREATE TABLE t4(c0 real, c1 boolean); +SELECT create_distributed_table('t4', 'c1'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO t4 VALUES (1.0, 2 BETWEEN 1 AND 3); +-- NOTE: For some reason shard pruning doesn't happen correctly here. It does +-- work for non boolean const expressions. See explain plans for t5 below that +-- show that. The query still works though. So doesn't seem important enough to +-- fix, since boolean partition columns should not happen much/at all for +-- actual users. +EXPLAIN (COSTS FALSE) SELECT FROM t4 WHERE c1 = 2 BETWEEN 1 AND 3; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on t4_92862416 t4 + Filter: c1 +(7 rows) + +EXPLAIN (COSTS FALSE) SELECT FROM t4 WHERE c1 = true; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on t4_92862416 t4 + Filter: c1 +(7 rows) + +CREATE TABLE t5(c0 int); +SELECT create_distributed_table('t5', 'c0'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO t5 VALUES (CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END); +EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = 2; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on t5_92862423 t5 + Filter: (c0 = 2) +(7 rows) + +EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 1 + Tasks Shown: All + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Seq Scan on t5_92862423 t5 + Filter: (c0 = 2) +(7 rows) + +SET client_min_messages TO WARNING; +DROP SCHEMA sqlancer_failures CASCADE; diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index cdd89f239..f89b0de7b 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -352,6 +352,11 @@ test: multi_deparse_function multi_deparse_procedure # -------- test: shared_connection_stats +# --------- +# run queries generated by sql smith and sqlancer that caused issues in the past +# -------- +test: sqlsmith_failures sqlancer_failures + # --------- # test that no tests leaked intermediate results. This should always be last # --------- @@ -363,7 +368,3 @@ test: ensure_no_intermediate_data_leak # -------- test: ensure_no_shared_connection_leak -# --------- -# run queries generated by sql smith that caused issues in the past -# -------- -test: sqlsmith_failures diff --git a/src/test/regress/sql/multi_data_types.sql b/src/test/regress/sql/multi_data_types.sql index 9ae4ce1d8..847876e83 100644 --- a/src/test/regress/sql/multi_data_types.sql +++ b/src/test/regress/sql/multi_data_types.sql @@ -103,6 +103,42 @@ UPDATE composite_type_partitioned_table SET id = 6 WHERE col = '(7, 8)'::test_c SELECT * FROM composite_type_partitioned_table WHERE col = '(7, 8)'::test_composite_type; +CREATE TYPE other_composite_type AS ( + i integer, + i2 integer +); + +-- Check that casts are correctly done on partition columns +SELECT run_command_on_coordinator_and_workers($cf$ + CREATE CAST (other_composite_type AS test_composite_type) WITH INOUT AS IMPLICIT; +$cf$); + +INSERT INTO composite_type_partitioned_table VALUES (123, '(123, 456)'::other_composite_type); +SELECT * FROM composite_type_partitioned_table WHERE id = 123; + +EXPLAIN (ANALYZE TRUE, COSTS FALSE, VERBOSE TRUE, TIMING FALSE, SUMMARY FALSE) +INSERT INTO composite_type_partitioned_table VALUES (123, '(123, 456)'::other_composite_type); + +SELECT run_command_on_coordinator_and_workers($cf$ + DROP CAST (other_composite_type as test_composite_type); +$cf$); +SELECT run_command_on_coordinator_and_workers($cf$ + CREATE FUNCTION to_test_composite_type(arg other_composite_type) RETURNS test_composite_type + AS 'select arg::text::test_composite_type;' + LANGUAGE SQL + IMMUTABLE + RETURNS NULL ON NULL INPUT; +$cf$); + +SELECT run_command_on_coordinator_and_workers($cf$ + CREATE CAST (other_composite_type AS test_composite_type) WITH FUNCTION to_test_composite_type(other_composite_type) AS IMPLICIT; +$cf$); +INSERT INTO composite_type_partitioned_table VALUES (456, '(456, 678)'::other_composite_type); + +EXPLAIN (ANALYZE TRUE, COSTS FALSE, VERBOSE TRUE, TIMING FALSE, SUMMARY FALSE) +INSERT INTO composite_type_partitioned_table VALUES (123, '(456, 678)'::other_composite_type); + + -- create and distribute a table on enum type column CREATE TYPE bug_status AS ENUM ('new', 'open', 'closed'); diff --git a/src/test/regress/sql/sqlancer_failures.sql b/src/test/regress/sql/sqlancer_failures.sql new file mode 100644 index 000000000..0555af383 --- /dev/null +++ b/src/test/regress/sql/sqlancer_failures.sql @@ -0,0 +1,43 @@ +CREATE SCHEMA sqlancer_failures; +SET search_path TO sqlancer_failures; +SET citus.shard_count TO 4; +SET citus.shard_replication_factor TO 1; +SET citus.next_shard_id TO 92862400; + +CREATE TABLE t0 (c0 int, c1 MONEY); +SELECT create_distributed_table('t0', 'c0'); +UPDATE t0 SET c1 = ((0.43107963)::MONEY) WHERE ((upper('-14295774') COLLATE "de_CH +.utf8") SIMILAR TO ''); +UPDATE t0 SET c1 = 1 WHERE '' COLLATE "C" = ''; + +CREATE TABLE t1 (c0 text); +SELECT create_distributed_table('t1', 'c0'); +INSERT INTO t1 VALUES ('' COLLATE "C"); + +CREATE TABLE t2 (c0 text, c1 bool, c2 timestamptz default now()); +SELECT create_distributed_table('t2', 'c0'); +INSERT INTO t2 VALUES ('key', '' COLLATE "C" = ''); + +CREATE TABLE t3 (c0 text, c1 text, c2 timestamptz default now()); +SELECT create_distributed_table('t3', 'c0'); +INSERT INTO t3 VALUES ('key', '' COLLATE "C"); + +CREATE TABLE t4(c0 real, c1 boolean); +SELECT create_distributed_table('t4', 'c1'); +INSERT INTO t4 VALUES (1.0, 2 BETWEEN 1 AND 3); +-- NOTE: For some reason shard pruning doesn't happen correctly here. It does +-- work for non boolean const expressions. See explain plans for t5 below that +-- show that. The query still works though. So doesn't seem important enough to +-- fix, since boolean partition columns should not happen much/at all for +-- actual users. +EXPLAIN (COSTS FALSE) SELECT FROM t4 WHERE c1 = 2 BETWEEN 1 AND 3; +EXPLAIN (COSTS FALSE) SELECT FROM t4 WHERE c1 = true; + +CREATE TABLE t5(c0 int); +SELECT create_distributed_table('t5', 'c0'); +INSERT INTO t5 VALUES (CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END); +EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = 2; +EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END; + +SET client_min_messages TO WARNING; +DROP SCHEMA sqlancer_failures CASCADE;