/*------------------------------------------------------------------------- * * deparse_shard_query.c * * This file contains functions for deparsing shard queries. * * Copyright (c) Citus Data, Inc. * *------------------------------------------------------------------------- */ #include "postgres.h" #include "c.h" #include "access/heapam.h" #include "access/htup_details.h" #include "catalog/pg_constraint.h" #include "catalog/pg_type.h" #include "distributed/citus_nodefuncs.h" #include "distributed/citus_ruleutils.h" #include "distributed/deparse_shard_query.h" #include "distributed/insert_select_planner.h" #include "distributed/intermediate_results.h" #include "distributed/listutils.h" #include "distributed/local_executor.h" #include "distributed/metadata_cache.h" #include "distributed/multi_physical_planner.h" #include "distributed/multi_router_planner.h" #include "distributed/shard_utils.h" #include "distributed/version_compat.h" #include "lib/stringinfo.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "nodes/nodes.h" #include "nodes/parsenodes.h" #include "nodes/pg_list.h" #include "parser/parsetree.h" #include "storage/lock.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/syscache.h" static void UpdateTaskQueryString(Query *query, Oid distributedTableId, RangeTblEntry *valuesRTE, Task *task); static bool ReplaceRelationConstraintByShardConstraint(List *relationShardList, OnConflictExpr *onConflict); static RelationShard * FindRelationShard(Oid inputRelationId, List *relationShardList); static void ConvertReadDistributedResultForShard(RangeTblEntry *rte, List *relationShardList); static RelationShard * FindDistributedResultRelationShard(List *relationShardList, char *resultId); static void ConvertRteToSubqueryWithEmptyResult(RangeTblEntry *rte); static bool ShouldLazyDeparseQuery(Task *task); static char * DeparseTaskQuery(Task *task, Query *query); /* * RebuildQueryStrings deparses the job query for each task to * include execution-time changes such as function evaluation. */ void RebuildQueryStrings(Job *workerJob) { Query *originalQuery = workerJob->jobQuery; List *taskList = workerJob->taskList; Oid relationId = ((RangeTblEntry *) linitial(originalQuery->rtable))->relid; RangeTblEntry *valuesRTE = ExtractDistributedInsertValuesRTE(originalQuery); Task *task = NULL; foreach_ptr(task, taskList) { Query *query = originalQuery; if (UpdateOrDeleteQuery(query) && list_length(taskList) > 1) { query = copyObject(originalQuery); } else if (query->commandType == CMD_INSERT && task->modifyWithSubquery) { /* for INSERT..SELECT, adjust shard names in SELECT part */ List *relationShardList = task->relationShardList; ShardInterval *shardInterval = LoadShardInterval(task->anchorShardId); query = copyObject(originalQuery); RangeTblEntry *copiedInsertRte = ExtractResultRelationRTEOrError(query); RangeTblEntry *copiedSubqueryRte = ExtractSelectRangeTableEntry(query); Query *copiedSubquery = copiedSubqueryRte->subquery; /* there are no restrictions to add for reference and citus local tables */ if (IsCitusTableType(shardInterval->relationId, DISTRIBUTED_TABLE)) { AddPartitionKeyNotNullFilterToSelect(copiedSubquery); } ReorderInsertSelectTargetLists(query, copiedInsertRte, copiedSubqueryRte); /* setting an alias simplifies deparsing of RETURNING */ if (copiedInsertRte->alias == NULL) { Alias *alias = makeAlias(CITUS_TABLE_ALIAS, NIL); copiedInsertRte->alias = alias; } UpdateRelationToShardNames((Node *) copiedSubquery, relationShardList); } else if (query->commandType == CMD_INSERT && (query->onConflict != NULL || valuesRTE != NULL)) { /* * Always an alias in UPSERTs and multi-row INSERTs to avoid * deparsing issues (e.g. RETURNING might reference the original * table name, which has been replaced by a shard name). */ RangeTblEntry *rangeTableEntry = linitial(query->rtable); if (rangeTableEntry->alias == NULL) { Alias *alias = makeAlias(CITUS_TABLE_ALIAS, NIL); rangeTableEntry->alias = alias; } } bool isQueryObjectOrText = GetTaskQueryType(task) == TASK_QUERY_TEXT || GetTaskQueryType(task) == TASK_QUERY_OBJECT; ereport(DEBUG4, (errmsg("query before rebuilding: %s", !isQueryObjectOrText ? "(null)" : ApplyLogRedaction(TaskQueryString(task))))); UpdateTaskQueryString(query, relationId, valuesRTE, task); /* * If parameters were resolved in the job query, then they are now also * resolved in the query string. */ task->parametersInQueryStringResolved = workerJob->parametersInJobQueryResolved; ereport(DEBUG4, (errmsg("query after rebuilding: %s", ApplyLogRedaction(TaskQueryString(task))))); } } /* * UpdateTaskQueryString updates the query string stored within the provided * Task. If the Task has row values from a multi-row INSERT, those are injected * into the provided query (using the provided valuesRTE, which must belong to * the query) before deparse occurs (the query's full VALUES list will be * restored before this function returns). */ static void UpdateTaskQueryString(Query *query, Oid distributedTableId, RangeTblEntry *valuesRTE, Task *task) { List *oldValuesLists = NIL; if (valuesRTE != NULL) { Assert(valuesRTE->rtekind == RTE_VALUES); Assert(task->rowValuesLists != NULL); oldValuesLists = valuesRTE->values_lists; valuesRTE->values_lists = task->rowValuesLists; } if (query->commandType != CMD_INSERT) { /* * For UPDATE and DELETE queries, we may have subqueries and joins, so * we use relation shard list to update shard names and call * pg_get_query_def() directly. */ List *relationShardList = task->relationShardList; UpdateRelationToShardNames((Node *) query, relationShardList); } else if (ShouldLazyDeparseQuery(task)) { /* * not all insert queries are copied before calling this * function, so we do it here */ query = copyObject(query); } if (query->commandType == CMD_INSERT) { /* * We store this in the task so we can lazily call * deparse_shard_query when the string is needed */ task->anchorDistributedTableId = distributedTableId; } SetTaskQueryIfShouldLazyDeparse(task, query); if (valuesRTE != NULL) { valuesRTE->values_lists = oldValuesLists; } } /* * UpdateRelationToShardNames walks over the query tree and appends shard ids to * relations. It uses unique identity value to establish connection between a * shard and the range table entry. If the range table id is not given a * identity, than the relation is not referenced from the query, no connection * could be found between a shard and this relation. Therefore relation is replaced * by set of NULL values so that the query would work at worker without any problems. * */ bool UpdateRelationToShardNames(Node *node, List *relationShardList) { uint64 shardId = INVALID_SHARD_ID; if (node == NULL) { return false; } /* want to look at all RTEs, even in subqueries, CTEs and such */ if (IsA(node, Query)) { return query_tree_walker((Query *) node, UpdateRelationToShardNames, relationShardList, QTW_EXAMINE_RTES_BEFORE); } if (!IsA(node, RangeTblEntry)) { return expression_tree_walker(node, UpdateRelationToShardNames, relationShardList); } RangeTblEntry *newRte = (RangeTblEntry *) node; if (IsDistributedIntermediateResultRTE(newRte)) { ConvertReadDistributedResultForShard(newRte, relationShardList); return false; } if (newRte->rtekind != RTE_RELATION) { return false; } if (!IsCitusTable(newRte->relid)) { /* leave local tables as is */ return false; } RelationShard *relationShard = FindRelationShard(newRte->relid, relationShardList); bool replaceRteWithNullValues = relationShard == NULL || relationShard->shardId == INVALID_SHARD_ID; if (replaceRteWithNullValues) { ConvertRteToSubqueryWithEmptyResult(newRte); return false; } shardId = relationShard->shardId; Oid relationId = relationShard->relationId; char *relationName = get_rel_name(relationId); AppendShardIdToName(&relationName, shardId); Oid schemaId = get_rel_namespace(relationId); char *schemaName = get_namespace_name(schemaId); ModifyRangeTblExtraData(newRte, CITUS_RTE_SHARD, schemaName, relationName, NIL); return false; } /* * UpdateRelationsToLocalShardTables walks over the query tree and appends shard ids to * relations. The caller is responsible for ensuring that the resulting Query can * be executed locally. */ bool UpdateRelationsToLocalShardTables(Node *node, List *relationShardList) { if (node == NULL) { return false; } /* want to look at all RTEs, even in subqueries, CTEs and such */ if (IsA(node, Query)) { return query_tree_walker((Query *) node, UpdateRelationsToLocalShardTables, relationShardList, QTW_EXAMINE_RTES_BEFORE); } if (IsA(node, OnConflictExpr)) { OnConflictExpr *onConflict = (OnConflictExpr *) node; return ReplaceRelationConstraintByShardConstraint(relationShardList, onConflict); } if (!IsA(node, RangeTblEntry)) { return expression_tree_walker(node, UpdateRelationsToLocalShardTables, relationShardList); } RangeTblEntry *newRte = (RangeTblEntry *) node; if (newRte->rtekind != RTE_RELATION) { return false; } RelationShard *relationShard = FindRelationShard(newRte->relid, relationShardList); /* the function should only be called with local shards */ if (relationShard == NULL) { return true; } Oid shardOid = GetTableLocalShardOid(relationShard->relationId, relationShard->shardId); newRte->relid = shardOid; return false; } /* * ReplaceRelationConstraintByShardConstraint replaces given OnConflictExpr's * constraint id with constraint id of the corresponding shard. */ static bool ReplaceRelationConstraintByShardConstraint(List *relationShardList, OnConflictExpr *onConflict) { Oid constraintId = onConflict->constraint; if (!OidIsValid(constraintId)) { return false; } Oid constraintRelationId = InvalidOid; HeapTuple heapTuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(constraintId)); if (HeapTupleIsValid(heapTuple)) { Form_pg_constraint contup = (Form_pg_constraint) GETSTRUCT(heapTuple); constraintRelationId = contup->conrelid; ReleaseSysCache(heapTuple); } /* * We can return here without calling the walker function, since we know there * will be no possible tables or constraints after this point, by the syntax. */ if (!OidIsValid(constraintRelationId)) { ereport(ERROR, (errmsg("Invalid relation id (%u) for constraint: %s", constraintRelationId, get_constraint_name(constraintId)))); } RelationShard *relationShard = FindRelationShard(constraintRelationId, relationShardList); if (relationShard != NULL) { char *constraintName = get_constraint_name(constraintId); AppendShardIdToName(&constraintName, relationShard->shardId); Oid shardOid = GetTableLocalShardOid(relationShard->relationId, relationShard->shardId); Oid shardConstraintId = get_relation_constraint_oid(shardOid, constraintName, false); onConflict->constraint = shardConstraintId; return false; } return true; } /* * ConvertReadDistributedResultForShard converts a * read_distributed_intermediate_result('') call to a * read_intermediate_result(ARRAY[..fragments...]) call for the fragments belonging to * a particular shard. * * The shard is obtained from the relationShardList. */ static void ConvertReadDistributedResultForShard(RangeTblEntry *rte, List *relationShardList) { char *resultId = FindDistributedResultId(rte); DistributedResult *distributedResult = GetNamedDistributedResult(resultId); List *sortedResultIds = NIL; RelationShard *relationShard = FindDistributedResultRelationShard(relationShardList, resultId); if (relationShard != NULL) { int shardIndex = relationShard->shardIndex; DistributedResultShard *resultShard = &(distributedResult->resultShards[shardIndex]); List *resultIdList = resultShard->fragmentList; /* sort result ids for consistent test output */ sortedResultIds = SortList(resultIdList, pg_qsort_strcmp); } else { /* no matching relation shard, use empty array */ } bool useBinaryFormat = distributedResult->binaryFormat; /* generate the query on the intermediate result */ RangeTblFunction *rangeTableFunction = (RangeTblFunction *) linitial(rte->functions); /* build read_intermediate_result call */ Const *resultIdConst = makeNode(Const); resultIdConst->consttype = TEXTARRAYOID; resultIdConst->consttypmod = -1; resultIdConst->constlen = -1; resultIdConst->constvalue = PointerGetDatum(strlist_to_textarray(sortedResultIds)); resultIdConst->constbyval = false; resultIdConst->constisnull = false; resultIdConst->location = -1; Oid copyFormatId = BinaryCopyFormatId(); if (!useBinaryFormat) { copyFormatId = TextCopyFormatId(); } Const *resultFormatConst = makeNode(Const); resultFormatConst->consttype = CitusCopyFormatTypeId(); resultFormatConst->consttypmod = -1; resultFormatConst->constlen = 4; resultFormatConst->constvalue = ObjectIdGetDatum(copyFormatId); resultFormatConst->constbyval = true; resultFormatConst->constisnull = false; resultFormatConst->location = -1; /* build the call to read_intermediate_result */ FuncExpr *funcExpr = makeNode(FuncExpr); funcExpr->funcid = CitusReadIntermediateResultArrayFuncId(); funcExpr->funcretset = true; funcExpr->funcvariadic = false; funcExpr->funcformat = 0; funcExpr->funccollid = 0; funcExpr->inputcollid = 0; funcExpr->location = -1; funcExpr->args = list_make2(resultIdConst, resultFormatConst); /* replace function expression in RTE */ rangeTableFunction->funcexpr = (Node *) funcExpr; } /* * FindRelationShard finds the RelationShard for shard relation with * given Oid if exists in given relationShardList. Otherwise, returns NULL. */ static RelationShard * FindRelationShard(Oid inputRelationId, List *relationShardList) { RelationShard *relationShard = NULL; /* * Search for the restrictions associated with the RTE. There better be * some, otherwise this query wouldn't be eligible as a router query. * FIXME: We should probably use a hashtable here, to do efficient lookup. */ foreach_ptr(relationShard, relationShardList) { if (inputRelationId == relationShard->relationId) { return relationShard; } } return NULL; } /* * FindDistributedResultRelationShard finds a relation shard for a distributed * result with the name , or NULL if it's not in the list. */ static RelationShard * FindDistributedResultRelationShard(List *relationShardList, char *resultId) { RelationShard *relationShard = NULL; foreach_ptr(relationShard, relationShardList) { if (relationShard->shardedRelationType == SHARDED_RESULT && strncmp(relationShard->resultId, resultId, NAMEDATALEN) == 0) { return relationShard; } } return NULL; } /* * ConvertRteToSubqueryWithEmptyResult converts given relation RTE into * subquery RTE that returns no results. */ static void ConvertRteToSubqueryWithEmptyResult(RangeTblEntry *rte) { Relation relation = table_open(rte->relid, NoLock); TupleDesc tupleDescriptor = RelationGetDescr(relation); int columnCount = tupleDescriptor->natts; List *targetList = NIL; for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) { FormData_pg_attribute *attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); if (attributeForm->attisdropped) { continue; } StringInfo resname = makeStringInfo(); Const *constValue = makeNullConst(attributeForm->atttypid, attributeForm->atttypmod, attributeForm->attcollation); appendStringInfo(resname, "%s", attributeForm->attname.data); TargetEntry *targetEntry = makeNode(TargetEntry); targetEntry->expr = (Expr *) constValue; targetEntry->resno = columnIndex; targetEntry->resname = resname->data; targetList = lappend(targetList, targetEntry); } table_close(relation, NoLock); FromExpr *joinTree = makeNode(FromExpr); joinTree->quals = makeBoolConst(false, false); Query *subquery = makeNode(Query); subquery->commandType = CMD_SELECT; subquery->querySource = QSRC_ORIGINAL; subquery->canSetTag = true; subquery->targetList = targetList; subquery->jointree = joinTree; rte->rtekind = RTE_SUBQUERY; rte->subquery = subquery; rte->alias = copyObject(rte->eref); } /* * ShouldLazyDeparseQuery returns true if we should lazily deparse the query * when adding it to the task. Right now it simply checks if any shards on the * local node can be used for the task. */ static bool ShouldLazyDeparseQuery(Task *task) { return TaskAccessesLocalNode(task); } /* * SetTaskQueryIfShouldLazyDeparse attaches the query to the task so that it can be used during * execution. If local execution can possibly take place it sets task->jobQueryReferenceForLazyDeparsing. * If not it deparses the query and sets queryStringLazy, to avoid blowing the * size of the task unnecesarily. */ void SetTaskQueryIfShouldLazyDeparse(Task *task, Query *query) { if (ShouldLazyDeparseQuery(task)) { task->taskQuery.queryType = TASK_QUERY_OBJECT; task->taskQuery.data.jobQueryReferenceForLazyDeparsing = query; task->queryCount = 1; return; } SetTaskQueryString(task, DeparseTaskQuery(task, query)); } /* * SetTaskQueryString attaches the query string to the task so that it can be * used during execution. It also unsets jobQueryReferenceForLazyDeparsing to be sure * these are kept in sync. */ void SetTaskQueryString(Task *task, char *queryString) { if (queryString == NULL) { task->taskQuery.queryType = TASK_QUERY_NULL; task->queryCount = 0; } else { task->taskQuery.queryType = TASK_QUERY_TEXT; task->taskQuery.data.queryStringLazy = queryString; task->queryCount = 1; } } /* * SetTaskQueryStringList sets the queryStringList of the given task. */ void SetTaskQueryStringList(Task *task, List *queryStringList) { Assert(queryStringList != NIL); task->taskQuery.queryType = TASK_QUERY_TEXT_LIST; task->taskQuery.data.queryStringList = queryStringList; task->queryCount = list_length(queryStringList); } /* * DeparseTaskQuery is a general way of deparsing a query based on a task. */ static char * DeparseTaskQuery(Task *task, Query *query) { StringInfo queryString = makeStringInfo(); if (query->commandType == CMD_INSERT) { /* * For INSERT queries we cannot use pg_get_query_def. Mainly because we * cannot run UpdateRelationToShardNames on an INSERT query. This is * because the PG deparsing logic fails when trying to insert into a * RTE_FUNCTION (which is what will happen if you call * UpdateRelationToShardNames). */ deparse_shard_query(query, task->anchorDistributedTableId, task->anchorShardId, queryString); } else { pg_get_query_def(query, queryString); } return queryString->data; } /* * GetTaskQueryType returns the type of the task query. */ int GetTaskQueryType(Task *task) { return task->taskQuery.queryType; } /* * TaskQueryStringAtIndex returns query at given index among the possibly * multiple queries that a task can have. */ char * TaskQueryStringAtIndex(Task *task, int index) { Assert(index < task->queryCount); int taskQueryType = GetTaskQueryType(task); if (taskQueryType == TASK_QUERY_TEXT_LIST) { return list_nth(task->taskQuery.data.queryStringList, index); } return TaskQueryString(task); } /* * TaskQueryString generates task query string text if missing. * * For performance reasons, the queryString is generated lazily. For example * for local queries it is usually not needed to generate it, so this way we * can skip the expensive deparsing+parsing. */ char * TaskQueryString(Task *task) { int taskQueryType = GetTaskQueryType(task); if (taskQueryType == TASK_QUERY_NULL) { /* if task query type is TASK_QUERY_NULL then the data will be NULL, * this is unexpected state */ ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("unexpected task query state: task query type is null"), errdetail("Please report this to the Citus core team."))); } else if (taskQueryType == TASK_QUERY_TEXT_LIST) { return StringJoin(task->taskQuery.data.queryStringList, ';'); } else if (taskQueryType == TASK_QUERY_TEXT) { return task->taskQuery.data.queryStringLazy; } Query *jobQueryReferenceForLazyDeparsing = task->taskQuery.data.jobQueryReferenceForLazyDeparsing; /* * At this point task query type should be TASK_QUERY_OBJECT. */ Assert(task->taskQuery.queryType == TASK_QUERY_OBJECT && jobQueryReferenceForLazyDeparsing != NULL); /* * Switch to the memory context of task->jobQueryReferenceForLazyDeparsing before generating the query * string. This way the query string is not freed in between multiple * executions of a prepared statement. Except when UpdateTaskQueryString is * used to set task->jobQueryReferenceForLazyDeparsing, in that case it is freed but it will be set to * NULL on the next execution of the query because UpdateTaskQueryString * does that. */ MemoryContext previousContext = MemoryContextSwitchTo(GetMemoryChunkContext( jobQueryReferenceForLazyDeparsing)); char *queryString = DeparseTaskQuery(task, jobQueryReferenceForLazyDeparsing); MemoryContextSwitchTo(previousContext); SetTaskQueryString(task, queryString); return task->taskQuery.data.queryStringLazy; }