mirror of https://github.com/citusdata/citus.git
Remove uninstantiated qual logic, use attribute equivalences
In this PR, we aim to deduce whether each of the RTE_RELATION is joined with at least on another RTE_RELATION on their partition keys. If each RTE_RELATION follows the above rule, we can conclude that all RTE_RELATIONs are joined on their partition keys. In order to do that, we invented a new equivalence class namely: AttributeEquivalenceClass. In very simple words, a AttributeEquivalenceClass is identified by an unique id and consists of a list of AttributeEquivalenceMembers. Each AttributeEquivalenceMember is designed to identify attributes uniquely within the whole query. The necessity of this arise since varno attributes are defined within a single level of a query. Instead, here we want to identify each RTE_RELATION uniquely and try to find equality among each RTE_RELATION's partition key. Whenever we find an equality clause A = B, where both A and B originates from relation attributes (i.e., not random expressions), we create an AttributeEquivalenceClass to record this knowledge. If we later find another equivalence B = C, we create another AttributeEquivalenceClass. Finally, we can apply transitity rules and generate a new AttributeEquivalenceClass which includes A, B and C. Note that equality among the members are identified by the varattno and rteIdentity. Each equality among RTE_RELATION is saved using an AttributeEquivalenceClass where each member attribute is identified by a AttributeEquivalenceMember. In the final step, we try generate a common attribute equivalence class that holds as much as AttributeEquivalenceMembers whose attributes are a partition keys.pull/1268/head
parent
12860b1316
commit
1cb6a34ba8
|
@ -26,11 +26,13 @@
|
||||||
#include "executor/executor.h"
|
#include "executor/executor.h"
|
||||||
#include "nodes/makefuncs.h"
|
#include "nodes/makefuncs.h"
|
||||||
#include "nodes/nodeFuncs.h"
|
#include "nodes/nodeFuncs.h"
|
||||||
|
#include "parser/parsetree.h"
|
||||||
|
#include "optimizer/pathnode.h"
|
||||||
#include "optimizer/planner.h"
|
#include "optimizer/planner.h"
|
||||||
#include "utils/memutils.h"
|
#include "utils/memutils.h"
|
||||||
|
|
||||||
|
|
||||||
static List *relationRestrictionContextList = NIL;
|
static List *plannerRestrictionContextList = NIL;
|
||||||
|
|
||||||
/* create custom scan methods for separate executors */
|
/* create custom scan methods for separate executors */
|
||||||
static CustomScanMethods RealTimeCustomScanMethods = {
|
static CustomScanMethods RealTimeCustomScanMethods = {
|
||||||
|
@ -57,7 +59,10 @@ static CustomScanMethods DelayedErrorCustomScanMethods = {
|
||||||
/* local function forward declarations */
|
/* local function forward declarations */
|
||||||
static PlannedStmt * CreateDistributedPlan(PlannedStmt *localPlan, Query *originalQuery,
|
static PlannedStmt * CreateDistributedPlan(PlannedStmt *localPlan, Query *originalQuery,
|
||||||
Query *query, ParamListInfo boundParams,
|
Query *query, ParamListInfo boundParams,
|
||||||
RelationRestrictionContext *restrictionContext);
|
PlannerRestrictionContext *
|
||||||
|
plannerRestrictionContext);
|
||||||
|
static void AssignRTEIdentities(Query *queryTree);
|
||||||
|
static void AssignRTEIdentity(RangeTblEntry *rangeTableEntry, int rteIdentifier);
|
||||||
static Node * SerializeMultiPlan(struct MultiPlan *multiPlan);
|
static Node * SerializeMultiPlan(struct MultiPlan *multiPlan);
|
||||||
static MultiPlan * DeserializeMultiPlan(Node *node);
|
static MultiPlan * DeserializeMultiPlan(Node *node);
|
||||||
static PlannedStmt * FinalizePlan(PlannedStmt *localPlan, MultiPlan *multiPlan);
|
static PlannedStmt * FinalizePlan(PlannedStmt *localPlan, MultiPlan *multiPlan);
|
||||||
|
@ -65,9 +70,11 @@ static PlannedStmt * FinalizeNonRouterPlan(PlannedStmt *localPlan, MultiPlan *mu
|
||||||
CustomScan *customScan);
|
CustomScan *customScan);
|
||||||
static PlannedStmt * FinalizeRouterPlan(PlannedStmt *localPlan, CustomScan *customScan);
|
static PlannedStmt * FinalizeRouterPlan(PlannedStmt *localPlan, CustomScan *customScan);
|
||||||
static void CheckNodeIsDumpable(Node *node);
|
static void CheckNodeIsDumpable(Node *node);
|
||||||
static RelationRestrictionContext * CreateAndPushRestrictionContext(void);
|
static List * CopyPlanParamList(List *originalPlanParamList);
|
||||||
static RelationRestrictionContext * CurrentRestrictionContext(void);
|
static PlannerRestrictionContext * CreateAndPushPlannerRestrictionContext(void);
|
||||||
static void PopRestrictionContext(void);
|
static RelationRestrictionContext * CurrentRelationRestrictionContext(void);
|
||||||
|
static JoinRestrictionContext * CurrentJoinRestrictionContext(void);
|
||||||
|
static void PopPlannerRestrictionContext(void);
|
||||||
static bool HasUnresolvedExternParamsWalker(Node *expression, ParamListInfo boundParams);
|
static bool HasUnresolvedExternParamsWalker(Node *expression, ParamListInfo boundParams);
|
||||||
|
|
||||||
|
|
||||||
|
@ -78,7 +85,7 @@ multi_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
|
||||||
PlannedStmt *result = NULL;
|
PlannedStmt *result = NULL;
|
||||||
bool needsDistributedPlanning = NeedsDistributedPlanning(parse);
|
bool needsDistributedPlanning = NeedsDistributedPlanning(parse);
|
||||||
Query *originalQuery = NULL;
|
Query *originalQuery = NULL;
|
||||||
RelationRestrictionContext *restrictionContext = NULL;
|
PlannerRestrictionContext *plannerRestrictionContext = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* standard_planner scribbles on it's input, but for deparsing we need the
|
* standard_planner scribbles on it's input, but for deparsing we need the
|
||||||
|
@ -88,30 +95,11 @@ multi_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
|
||||||
{
|
{
|
||||||
originalQuery = copyObject(parse);
|
originalQuery = copyObject(parse);
|
||||||
|
|
||||||
/*
|
AssignRTEIdentities(parse);
|
||||||
* We implement INSERT INTO .. SELECT by pushing down the SELECT to
|
|
||||||
* each shard. To compute that we use the router planner, by adding
|
|
||||||
* an "uninstantiated" constraint that the partition column be equal to a
|
|
||||||
* certain value. standard_planner() distributes that constraint to
|
|
||||||
* the baserestrictinfos to all the tables where it knows how to push
|
|
||||||
* the restriction safely. An example is that the tables that are
|
|
||||||
* connected via equi joins.
|
|
||||||
*
|
|
||||||
* The router planner then iterates over the target table's shards,
|
|
||||||
* for each we replace the "uninstantiated" restriction, with one that
|
|
||||||
* PruneShardList() handles, and then generate a query for that
|
|
||||||
* individual shard. If any of the involved tables don't prune down
|
|
||||||
* to a single shard, or if the pruned shards aren't colocated,
|
|
||||||
* we error out.
|
|
||||||
*/
|
|
||||||
if (InsertSelectQuery(parse))
|
|
||||||
{
|
|
||||||
AddUninstantiatedPartitionRestriction(parse);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* create a restriction context and put it at the end if context list */
|
/* create a restriction context and put it at the end if context list */
|
||||||
restrictionContext = CreateAndPushRestrictionContext();
|
plannerRestrictionContext = CreateAndPushPlannerRestrictionContext();
|
||||||
|
|
||||||
PG_TRY();
|
PG_TRY();
|
||||||
{
|
{
|
||||||
|
@ -125,23 +113,92 @@ multi_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
|
||||||
if (needsDistributedPlanning)
|
if (needsDistributedPlanning)
|
||||||
{
|
{
|
||||||
result = CreateDistributedPlan(result, originalQuery, parse,
|
result = CreateDistributedPlan(result, originalQuery, parse,
|
||||||
boundParams, restrictionContext);
|
boundParams, plannerRestrictionContext);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
PG_CATCH();
|
PG_CATCH();
|
||||||
{
|
{
|
||||||
PopRestrictionContext();
|
PopPlannerRestrictionContext();
|
||||||
PG_RE_THROW();
|
PG_RE_THROW();
|
||||||
}
|
}
|
||||||
PG_END_TRY();
|
PG_END_TRY();
|
||||||
|
|
||||||
/* remove the context from the context list */
|
/* remove the context from the context list */
|
||||||
PopRestrictionContext();
|
PopPlannerRestrictionContext();
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AssignRTEIdentities assigns unique identities to the
|
||||||
|
* RTE_RELATIONs in the given query.
|
||||||
|
*
|
||||||
|
* To be able to track individual RTEs through postgres' query
|
||||||
|
* planning, we need to be able to figure out whether an RTE is
|
||||||
|
* actually a copy of another, rather than a different one. We
|
||||||
|
* simply number the RTEs starting from 1.
|
||||||
|
*
|
||||||
|
* Note that we're only interested in RTE_RELATIONs and thus assigning
|
||||||
|
* identifiers to those RTEs only.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AssignRTEIdentities(Query *queryTree)
|
||||||
|
{
|
||||||
|
List *rangeTableList = NIL;
|
||||||
|
ListCell *rangeTableCell = NULL;
|
||||||
|
int rteIdentifier = 1;
|
||||||
|
|
||||||
|
/* extract range table entries for simple relations only */
|
||||||
|
ExtractRangeTableEntryWalker((Node *) queryTree, &rangeTableList);
|
||||||
|
|
||||||
|
foreach(rangeTableCell, rangeTableList)
|
||||||
|
{
|
||||||
|
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell);
|
||||||
|
|
||||||
|
if (rangeTableEntry->rtekind != RTE_RELATION)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
AssignRTEIdentity(rangeTableEntry, rteIdentifier++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AssignRTEIdentity assigns the given rteIdentifier to the given range table
|
||||||
|
* entry.
|
||||||
|
*
|
||||||
|
* To be able to track RTEs through postgres' query planning, which copies and
|
||||||
|
* duplicate, and modifies them, we sometimes need to figure out whether two
|
||||||
|
* RTEs are copies of the same original RTE. For that we, hackishly, use a
|
||||||
|
* field normally unused in RTE_RELATION RTEs.
|
||||||
|
*
|
||||||
|
* The assigned identifier better be unique within a plantree.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AssignRTEIdentity(RangeTblEntry *rangeTableEntry, int rteIdentifier)
|
||||||
|
{
|
||||||
|
Assert(rangeTableEntry->rtekind == RTE_RELATION);
|
||||||
|
Assert(rangeTableEntry->values_lists == NIL);
|
||||||
|
|
||||||
|
rangeTableEntry->values_lists = list_make1_int(rteIdentifier);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* GetRTEIdentity returns the identity assigned with AssignRTEIdentity. */
|
||||||
|
int
|
||||||
|
GetRTEIdentity(RangeTblEntry *rte)
|
||||||
|
{
|
||||||
|
Assert(rte->rtekind == RTE_RELATION);
|
||||||
|
Assert(IsA(rte->values_lists, IntList));
|
||||||
|
Assert(list_length(rte->values_lists) == 1);
|
||||||
|
|
||||||
|
return linitial_int(rte->values_lists);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* IsModifyCommand returns true if the query performs modifications, false
|
* IsModifyCommand returns true if the query performs modifications, false
|
||||||
* otherwise.
|
* otherwise.
|
||||||
|
@ -187,7 +244,7 @@ IsModifyMultiPlan(MultiPlan *multiPlan)
|
||||||
static PlannedStmt *
|
static PlannedStmt *
|
||||||
CreateDistributedPlan(PlannedStmt *localPlan, Query *originalQuery, Query *query,
|
CreateDistributedPlan(PlannedStmt *localPlan, Query *originalQuery, Query *query,
|
||||||
ParamListInfo boundParams,
|
ParamListInfo boundParams,
|
||||||
RelationRestrictionContext *restrictionContext)
|
PlannerRestrictionContext *plannerRestrictionContext)
|
||||||
{
|
{
|
||||||
MultiPlan *distributedPlan = NULL;
|
MultiPlan *distributedPlan = NULL;
|
||||||
PlannedStmt *resultPlan = NULL;
|
PlannedStmt *resultPlan = NULL;
|
||||||
|
@ -201,7 +258,9 @@ CreateDistributedPlan(PlannedStmt *localPlan, Query *originalQuery, Query *query
|
||||||
if (IsModifyCommand(query))
|
if (IsModifyCommand(query))
|
||||||
{
|
{
|
||||||
/* modifications are always routed through the same planner/executor */
|
/* modifications are always routed through the same planner/executor */
|
||||||
distributedPlan = CreateModifyPlan(originalQuery, query, restrictionContext);
|
distributedPlan =
|
||||||
|
CreateModifyPlan(originalQuery, query, plannerRestrictionContext);
|
||||||
|
|
||||||
Assert(distributedPlan);
|
Assert(distributedPlan);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -214,7 +273,11 @@ CreateDistributedPlan(PlannedStmt *localPlan, Query *originalQuery, Query *query
|
||||||
*/
|
*/
|
||||||
if (EnableRouterExecution)
|
if (EnableRouterExecution)
|
||||||
{
|
{
|
||||||
distributedPlan = CreateRouterPlan(originalQuery, query, restrictionContext);
|
RelationRestrictionContext *relationRestrictionContext =
|
||||||
|
plannerRestrictionContext->relationRestrictionContext;
|
||||||
|
|
||||||
|
distributedPlan = CreateRouterPlan(originalQuery, query,
|
||||||
|
relationRestrictionContext);
|
||||||
|
|
||||||
/* for debugging it's useful to display why query was not router plannable */
|
/* for debugging it's useful to display why query was not router plannable */
|
||||||
if (distributedPlan && distributedPlan->planningError)
|
if (distributedPlan && distributedPlan->planningError)
|
||||||
|
@ -566,6 +629,36 @@ CheckNodeIsDumpable(Node *node)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* multi_join_restriction_hook is a hook called by postgresql standard planner
|
||||||
|
* to notify us about various planning information regarding joins. We use
|
||||||
|
* it to learn about the joining column.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
multi_join_restriction_hook(PlannerInfo *root,
|
||||||
|
RelOptInfo *joinrel,
|
||||||
|
RelOptInfo *outerrel,
|
||||||
|
RelOptInfo *innerrel,
|
||||||
|
JoinType jointype,
|
||||||
|
JoinPathExtraData *extra)
|
||||||
|
{
|
||||||
|
JoinRestrictionContext *joinContext = NULL;
|
||||||
|
JoinRestriction *joinRestriction = palloc0(sizeof(JoinRestriction));
|
||||||
|
List *restrictInfoList = NIL;
|
||||||
|
|
||||||
|
restrictInfoList = extra->restrictlist;
|
||||||
|
joinContext = CurrentJoinRestrictionContext();
|
||||||
|
Assert(joinContext != NULL);
|
||||||
|
|
||||||
|
joinRestriction->joinType = jointype;
|
||||||
|
joinRestriction->joinRestrictInfoList = restrictInfoList;
|
||||||
|
joinRestriction->plannerInfo = root;
|
||||||
|
|
||||||
|
joinContext->joinRestrictionList =
|
||||||
|
lappend(joinContext->joinRestrictionList, joinRestriction);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* multi_relation_restriction_hook is a hook called by postgresql standard planner
|
* multi_relation_restriction_hook is a hook called by postgresql standard planner
|
||||||
* to notify us about various planning information regarding a relation. We use
|
* to notify us about various planning information regarding a relation. We use
|
||||||
|
@ -589,7 +682,7 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo, Index
|
||||||
distributedTable = IsDistributedTable(rte->relid);
|
distributedTable = IsDistributedTable(rte->relid);
|
||||||
localTable = !distributedTable;
|
localTable = !distributedTable;
|
||||||
|
|
||||||
restrictionContext = CurrentRestrictionContext();
|
restrictionContext = CurrentRelationRestrictionContext();
|
||||||
Assert(restrictionContext != NULL);
|
Assert(restrictionContext != NULL);
|
||||||
|
|
||||||
relationRestriction = palloc0(sizeof(RelationRestriction));
|
relationRestriction = palloc0(sizeof(RelationRestriction));
|
||||||
|
@ -599,8 +692,16 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo, Index
|
||||||
relationRestriction->relOptInfo = relOptInfo;
|
relationRestriction->relOptInfo = relOptInfo;
|
||||||
relationRestriction->distributedRelation = distributedTable;
|
relationRestriction->distributedRelation = distributedTable;
|
||||||
relationRestriction->plannerInfo = root;
|
relationRestriction->plannerInfo = root;
|
||||||
|
relationRestriction->parentPlannerInfo = root->parent_root;
|
||||||
relationRestriction->prunedShardIntervalList = NIL;
|
relationRestriction->prunedShardIntervalList = NIL;
|
||||||
|
|
||||||
|
/* see comments on GetVarFromAssignedParam() */
|
||||||
|
if (relationRestriction->parentPlannerInfo)
|
||||||
|
{
|
||||||
|
relationRestriction->parentPlannerParamList =
|
||||||
|
CopyPlanParamList(root->parent_root->plan_params);
|
||||||
|
}
|
||||||
|
|
||||||
restrictionContext->hasDistributedRelation |= distributedTable;
|
restrictionContext->hasDistributedRelation |= distributedTable;
|
||||||
restrictionContext->hasLocalRelation |= localTable;
|
restrictionContext->hasLocalRelation |= localTable;
|
||||||
|
|
||||||
|
@ -622,51 +723,111 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo, Index
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CreateAndPushRestrictionContext creates a new restriction context, inserts it to the
|
* CopyPlanParamList deep copies the input PlannerParamItem list and returns the newly
|
||||||
* beginning of the context list, and returns the newly created context.
|
* allocated list.
|
||||||
|
* Note that we cannot use copyObject() function directly since there is no support for
|
||||||
|
* copying PlannerParamItem structs.
|
||||||
*/
|
*/
|
||||||
static RelationRestrictionContext *
|
static List *
|
||||||
CreateAndPushRestrictionContext(void)
|
CopyPlanParamList(List *originalPlanParamList)
|
||||||
{
|
{
|
||||||
RelationRestrictionContext *restrictionContext =
|
ListCell *planParamCell = NULL;
|
||||||
|
List *copiedPlanParamList = NIL;
|
||||||
|
|
||||||
|
foreach(planParamCell, originalPlanParamList)
|
||||||
|
{
|
||||||
|
PlannerParamItem *originalParamItem = lfirst(planParamCell);
|
||||||
|
PlannerParamItem *copiedParamItem = makeNode(PlannerParamItem);
|
||||||
|
|
||||||
|
copiedParamItem->paramId = originalParamItem->paramId;
|
||||||
|
copiedParamItem->item = copyObject(originalParamItem->item);
|
||||||
|
|
||||||
|
copiedPlanParamList = lappend(copiedPlanParamList, copiedParamItem);
|
||||||
|
}
|
||||||
|
|
||||||
|
return copiedPlanParamList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CreateAndPushPlannerRestrictionContext creates a new planner restriction context.
|
||||||
|
* Later, it creates a relation restriction context and a join restriction
|
||||||
|
* context, and sets those contexts in the planner restriction context. Finally,
|
||||||
|
* the planner restriction context is inserted to the beginning of the
|
||||||
|
* plannerRestrictionContextList and it is returned.
|
||||||
|
*/
|
||||||
|
static PlannerRestrictionContext *
|
||||||
|
CreateAndPushPlannerRestrictionContext(void)
|
||||||
|
{
|
||||||
|
PlannerRestrictionContext *plannerRestrictionContext =
|
||||||
|
palloc0(sizeof(PlannerRestrictionContext));
|
||||||
|
|
||||||
|
plannerRestrictionContext->relationRestrictionContext =
|
||||||
palloc0(sizeof(RelationRestrictionContext));
|
palloc0(sizeof(RelationRestrictionContext));
|
||||||
|
|
||||||
|
plannerRestrictionContext->joinRestrictionContext =
|
||||||
|
palloc0(sizeof(JoinRestrictionContext));
|
||||||
|
|
||||||
/* we'll apply logical AND as we add tables */
|
/* we'll apply logical AND as we add tables */
|
||||||
restrictionContext->allReferenceTables = true;
|
plannerRestrictionContext->relationRestrictionContext->allReferenceTables = true;
|
||||||
|
|
||||||
relationRestrictionContextList = lcons(restrictionContext,
|
plannerRestrictionContextList = lcons(plannerRestrictionContext,
|
||||||
relationRestrictionContextList);
|
plannerRestrictionContextList);
|
||||||
|
|
||||||
return restrictionContext;
|
return plannerRestrictionContext;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CurrentRestrictionContext returns the the last restriction context from the
|
* CurrentRelationRestrictionContext returns the the last restriction context from the
|
||||||
* list.
|
* relationRestrictionContext list.
|
||||||
*/
|
*/
|
||||||
static RelationRestrictionContext *
|
static RelationRestrictionContext *
|
||||||
CurrentRestrictionContext(void)
|
CurrentRelationRestrictionContext(void)
|
||||||
{
|
{
|
||||||
RelationRestrictionContext *restrictionContext = NULL;
|
PlannerRestrictionContext *plannerRestrictionContext = NULL;
|
||||||
|
RelationRestrictionContext *relationRestrictionContext = NULL;
|
||||||
|
|
||||||
Assert(relationRestrictionContextList != NIL);
|
Assert(plannerRestrictionContextList != NIL);
|
||||||
|
|
||||||
restrictionContext =
|
plannerRestrictionContext =
|
||||||
(RelationRestrictionContext *) linitial(relationRestrictionContextList);
|
(PlannerRestrictionContext *) linitial(plannerRestrictionContextList);
|
||||||
|
|
||||||
return restrictionContext;
|
relationRestrictionContext = plannerRestrictionContext->relationRestrictionContext;
|
||||||
|
|
||||||
|
return relationRestrictionContext;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* PopRestrictionContext removes the most recently added restriction context from
|
* CurrentJoinRestrictionContext returns the the last restriction context from the
|
||||||
* context list. The function assumes the list is not empty.
|
* list.
|
||||||
|
*/
|
||||||
|
static JoinRestrictionContext *
|
||||||
|
CurrentJoinRestrictionContext(void)
|
||||||
|
{
|
||||||
|
PlannerRestrictionContext *plannerRestrictionContext = NULL;
|
||||||
|
JoinRestrictionContext *joinRestrictionContext = NULL;
|
||||||
|
|
||||||
|
Assert(plannerRestrictionContextList != NIL);
|
||||||
|
|
||||||
|
plannerRestrictionContext =
|
||||||
|
(PlannerRestrictionContext *) linitial(plannerRestrictionContextList);
|
||||||
|
|
||||||
|
joinRestrictionContext = plannerRestrictionContext->joinRestrictionContext;
|
||||||
|
|
||||||
|
return joinRestrictionContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* PopPlannerRestrictionContext removes the most recently added restriction contexts from
|
||||||
|
* the planner restriction context list. The function assumes the list is not empty.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
PopRestrictionContext(void)
|
PopPlannerRestrictionContext(void)
|
||||||
{
|
{
|
||||||
relationRestrictionContextList = list_delete_first(relationRestrictionContextList);
|
plannerRestrictionContextList = list_delete_first(plannerRestrictionContextList);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -694,12 +855,6 @@ HasUnresolvedExternParamsWalker(Node *expression, ParamListInfo boundParams)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* don't care about our special parameter, it'll be removed during planning */
|
|
||||||
if (paramId == UNINSTANTIATED_PARAMETER_ID)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* check whether parameter is available (and valid) */
|
/* check whether parameter is available (and valid) */
|
||||||
if (boundParams && paramId > 0 && paramId <= boundParams->numParams)
|
if (boundParams && paramId > 0 && paramId <= boundParams->numParams)
|
||||||
{
|
{
|
||||||
|
|
|
@ -36,6 +36,7 @@
|
||||||
#include "distributed/multi_router_planner.h"
|
#include "distributed/multi_router_planner.h"
|
||||||
#include "distributed/listutils.h"
|
#include "distributed/listutils.h"
|
||||||
#include "distributed/citus_ruleutils.h"
|
#include "distributed/citus_ruleutils.h"
|
||||||
|
#include "distributed/relation_restriction_equivalence.h"
|
||||||
#include "distributed/relay_utility.h"
|
#include "distributed/relay_utility.h"
|
||||||
#include "distributed/resource_lock.h"
|
#include "distributed/resource_lock.h"
|
||||||
#include "distributed/shardinterval_utils.h"
|
#include "distributed/shardinterval_utils.h"
|
||||||
|
@ -48,6 +49,8 @@
|
||||||
#include "nodes/pg_list.h"
|
#include "nodes/pg_list.h"
|
||||||
#include "nodes/primnodes.h"
|
#include "nodes/primnodes.h"
|
||||||
#include "optimizer/clauses.h"
|
#include "optimizer/clauses.h"
|
||||||
|
#include "optimizer/joininfo.h"
|
||||||
|
#include "optimizer/pathnode.h"
|
||||||
#include "optimizer/paths.h"
|
#include "optimizer/paths.h"
|
||||||
#include "optimizer/predtest.h"
|
#include "optimizer/predtest.h"
|
||||||
#include "optimizer/restrictinfo.h"
|
#include "optimizer/restrictinfo.h"
|
||||||
|
@ -75,21 +78,22 @@ typedef struct WalkerState
|
||||||
|
|
||||||
bool EnableRouterExecution = true;
|
bool EnableRouterExecution = true;
|
||||||
|
|
||||||
|
|
||||||
/* planner functions forward declarations */
|
/* planner functions forward declarations */
|
||||||
static MultiPlan * CreateSingleTaskRouterPlan(Query *originalQuery,
|
static MultiPlan * CreateSingleTaskRouterPlan(Query *originalQuery,
|
||||||
Query *query,
|
Query *query,
|
||||||
RelationRestrictionContext *
|
RelationRestrictionContext *
|
||||||
restrictionContext);
|
restrictionContext);
|
||||||
static MultiPlan * CreateInsertSelectRouterPlan(Query *originalQuery,
|
static MultiPlan * CreateInsertSelectRouterPlan(Query *originalQuery,
|
||||||
RelationRestrictionContext *
|
PlannerRestrictionContext *
|
||||||
restrictionContext);
|
plannerRestrictionContext);
|
||||||
static Task * RouterModifyTaskForShardInterval(Query *originalQuery,
|
static Task * RouterModifyTaskForShardInterval(Query *originalQuery,
|
||||||
ShardInterval *shardInterval,
|
ShardInterval *shardInterval,
|
||||||
RelationRestrictionContext *
|
RelationRestrictionContext *
|
||||||
restrictionContext,
|
restrictionContext,
|
||||||
uint32 taskIdIndex);
|
uint32 taskIdIndex,
|
||||||
static List * HashedShardIntervalOpExpressions(ShardInterval *shardInterval);
|
bool allRelationsJoinedOnPartitionKey);
|
||||||
static Param * UninstantiatedParameterForColumn(Var *relationPartitionKey);
|
static List * ShardIntervalOpExpressions(ShardInterval *shardInterval, Index rteIndex);
|
||||||
static bool MasterIrreducibleExpression(Node *expression, bool *varArgument,
|
static bool MasterIrreducibleExpression(Node *expression, bool *varArgument,
|
||||||
bool *badCoalesce);
|
bool *badCoalesce);
|
||||||
static bool MasterIrreducibleExpressionWalker(Node *expression, WalkerState *state);
|
static bool MasterIrreducibleExpressionWalker(Node *expression, WalkerState *state);
|
||||||
|
@ -129,7 +133,6 @@ static DeferredErrorMessage * InsertPartitionColumnMatchesSelect(Query *query,
|
||||||
subqueryRte,
|
subqueryRte,
|
||||||
Oid *
|
Oid *
|
||||||
selectPartitionColumnTableId);
|
selectPartitionColumnTableId);
|
||||||
static void AddUninstantiatedEqualityQual(Query *query, Var *targetPartitionColumnVar);
|
|
||||||
static DeferredErrorMessage * ErrorIfQueryHasModifyingCTE(Query *queryTree);
|
static DeferredErrorMessage * ErrorIfQueryHasModifyingCTE(Query *queryTree);
|
||||||
|
|
||||||
|
|
||||||
|
@ -165,16 +168,19 @@ CreateRouterPlan(Query *originalQuery, Query *query,
|
||||||
*/
|
*/
|
||||||
MultiPlan *
|
MultiPlan *
|
||||||
CreateModifyPlan(Query *originalQuery, Query *query,
|
CreateModifyPlan(Query *originalQuery, Query *query,
|
||||||
RelationRestrictionContext *restrictionContext)
|
PlannerRestrictionContext *plannerRestrictionContext)
|
||||||
{
|
{
|
||||||
if (InsertSelectQuery(originalQuery))
|
if (InsertSelectQuery(originalQuery))
|
||||||
{
|
{
|
||||||
return CreateInsertSelectRouterPlan(originalQuery, restrictionContext);
|
return CreateInsertSelectRouterPlan(originalQuery, plannerRestrictionContext);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
RelationRestrictionContext *relationRestrictionContext =
|
||||||
|
plannerRestrictionContext->relationRestrictionContext;
|
||||||
|
|
||||||
return CreateSingleTaskRouterPlan(originalQuery, query,
|
return CreateSingleTaskRouterPlan(originalQuery, query,
|
||||||
restrictionContext);
|
relationRestrictionContext);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -258,7 +264,7 @@ CreateSingleTaskRouterPlan(Query *originalQuery, Query *query,
|
||||||
*/
|
*/
|
||||||
static MultiPlan *
|
static MultiPlan *
|
||||||
CreateInsertSelectRouterPlan(Query *originalQuery,
|
CreateInsertSelectRouterPlan(Query *originalQuery,
|
||||||
RelationRestrictionContext *restrictionContext)
|
PlannerRestrictionContext *plannerRestrictionContext)
|
||||||
{
|
{
|
||||||
int shardOffset = 0;
|
int shardOffset = 0;
|
||||||
List *sqlTaskList = NIL;
|
List *sqlTaskList = NIL;
|
||||||
|
@ -271,7 +277,10 @@ CreateInsertSelectRouterPlan(Query *originalQuery,
|
||||||
Oid targetRelationId = insertRte->relid;
|
Oid targetRelationId = insertRte->relid;
|
||||||
DistTableCacheEntry *targetCacheEntry = DistributedTableCacheEntry(targetRelationId);
|
DistTableCacheEntry *targetCacheEntry = DistributedTableCacheEntry(targetRelationId);
|
||||||
int shardCount = targetCacheEntry->shardIntervalArrayLength;
|
int shardCount = targetCacheEntry->shardIntervalArrayLength;
|
||||||
bool allReferenceTables = restrictionContext->allReferenceTables;
|
RelationRestrictionContext *relationRestrictionContext =
|
||||||
|
plannerRestrictionContext->relationRestrictionContext;
|
||||||
|
bool allReferenceTables = relationRestrictionContext->allReferenceTables;
|
||||||
|
bool restrictionEquivalenceForPartitionKeys = false;
|
||||||
|
|
||||||
multiPlan->operation = originalQuery->commandType;
|
multiPlan->operation = originalQuery->commandType;
|
||||||
|
|
||||||
|
@ -287,6 +296,9 @@ CreateInsertSelectRouterPlan(Query *originalQuery,
|
||||||
return multiPlan;
|
return multiPlan;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
restrictionEquivalenceForPartitionKeys =
|
||||||
|
RestrictionEquivalenceForPartitionKeys(plannerRestrictionContext);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Plan select query for each shard in the target table. Do so by replacing the
|
* Plan select query for each shard in the target table. Do so by replacing the
|
||||||
* partitioning qual parameter added in multi_planner() using the current shard's
|
* partitioning qual parameter added in multi_planner() using the current shard's
|
||||||
|
@ -303,7 +315,9 @@ CreateInsertSelectRouterPlan(Query *originalQuery,
|
||||||
Task *modifyTask = NULL;
|
Task *modifyTask = NULL;
|
||||||
|
|
||||||
modifyTask = RouterModifyTaskForShardInterval(originalQuery, targetShardInterval,
|
modifyTask = RouterModifyTaskForShardInterval(originalQuery, targetShardInterval,
|
||||||
restrictionContext, taskIdIndex);
|
relationRestrictionContext,
|
||||||
|
taskIdIndex,
|
||||||
|
restrictionEquivalenceForPartitionKeys);
|
||||||
|
|
||||||
/* add the task if it could be created */
|
/* add the task if it could be created */
|
||||||
if (modifyTask != NULL)
|
if (modifyTask != NULL)
|
||||||
|
@ -354,7 +368,8 @@ CreateInsertSelectRouterPlan(Query *originalQuery,
|
||||||
static Task *
|
static Task *
|
||||||
RouterModifyTaskForShardInterval(Query *originalQuery, ShardInterval *shardInterval,
|
RouterModifyTaskForShardInterval(Query *originalQuery, ShardInterval *shardInterval,
|
||||||
RelationRestrictionContext *restrictionContext,
|
RelationRestrictionContext *restrictionContext,
|
||||||
uint32 taskIdIndex)
|
uint32 taskIdIndex,
|
||||||
|
bool allRelationsJoinedOnPartitionKey)
|
||||||
{
|
{
|
||||||
Query *copiedQuery = copyObject(originalQuery);
|
Query *copiedQuery = copyObject(originalQuery);
|
||||||
RangeTblEntry *copiedInsertRte = ExtractInsertRangeTableEntry(copiedQuery);
|
RangeTblEntry *copiedInsertRte = ExtractInsertRangeTableEntry(copiedQuery);
|
||||||
|
@ -382,6 +397,7 @@ RouterModifyTaskForShardInterval(Query *originalQuery, ShardInterval *shardInter
|
||||||
bool replacePrunedQueryWithDummy = false;
|
bool replacePrunedQueryWithDummy = false;
|
||||||
bool allReferenceTables = restrictionContext->allReferenceTables;
|
bool allReferenceTables = restrictionContext->allReferenceTables;
|
||||||
List *hashedOpExpressions = NIL;
|
List *hashedOpExpressions = NIL;
|
||||||
|
RestrictInfo *hashedRestrictInfo = NULL;
|
||||||
|
|
||||||
/* grab shared metadata lock to stop concurrent placement additions */
|
/* grab shared metadata lock to stop concurrent placement additions */
|
||||||
LockShardDistributionMetadata(shardId, ShareLock);
|
LockShardDistributionMetadata(shardId, ShareLock);
|
||||||
|
@ -394,43 +410,19 @@ RouterModifyTaskForShardInterval(Query *originalQuery, ShardInterval *shardInter
|
||||||
{
|
{
|
||||||
RelationRestriction *restriction = lfirst(restrictionCell);
|
RelationRestriction *restriction = lfirst(restrictionCell);
|
||||||
List *originalBaserestrictInfo = restriction->relOptInfo->baserestrictinfo;
|
List *originalBaserestrictInfo = restriction->relOptInfo->baserestrictinfo;
|
||||||
Var *relationPartitionKey = PartitionColumn(restriction->relationId,
|
Index rteIndex = restriction->index;
|
||||||
restriction->index);
|
|
||||||
Param *uninstantiatedParameter = NULL;
|
|
||||||
|
|
||||||
/*
|
if (!allRelationsJoinedOnPartitionKey || allReferenceTables)
|
||||||
* We don't need to add restriction to reference tables given that they are
|
|
||||||
* already single sharded and always prune to that single shard.
|
|
||||||
*/
|
|
||||||
if (PartitionMethod(restriction->relationId) == DISTRIBUTE_BY_NONE)
|
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
hashedOpExpressions = HashedShardIntervalOpExpressions(shardInterval);
|
hashedOpExpressions = ShardIntervalOpExpressions(shardInterval, rteIndex);
|
||||||
Assert(list_length(hashedOpExpressions) == 2);
|
|
||||||
|
|
||||||
/*
|
hashedRestrictInfo = make_simple_restrictinfo((Expr *) hashedOpExpressions);
|
||||||
* Here we check whether the planner knows an equality between the partition column
|
originalBaserestrictInfo = lappend(originalBaserestrictInfo, hashedRestrictInfo);
|
||||||
* and the uninstantiated parameter. If such an equality exists, we simply add the
|
|
||||||
* shard restrictions.
|
|
||||||
*/
|
|
||||||
uninstantiatedParameter = UninstantiatedParameterForColumn(relationPartitionKey);
|
|
||||||
if (exprs_known_equal(restriction->plannerInfo, (Node *) relationPartitionKey,
|
|
||||||
(Node *) uninstantiatedParameter))
|
|
||||||
{
|
|
||||||
RestrictInfo *geRestrictInfo = NULL;
|
|
||||||
RestrictInfo *leRestrictInfo = NULL;
|
|
||||||
|
|
||||||
OpExpr *hashedGEOpExpr = (OpExpr *) linitial(hashedOpExpressions);
|
restriction->relOptInfo->baserestrictinfo = originalBaserestrictInfo;
|
||||||
OpExpr *hashedLEOpExpr = (OpExpr *) lsecond(hashedOpExpressions);
|
|
||||||
|
|
||||||
geRestrictInfo = make_simple_restrictinfo((Expr *) hashedGEOpExpr);
|
|
||||||
originalBaserestrictInfo = lappend(originalBaserestrictInfo, geRestrictInfo);
|
|
||||||
|
|
||||||
leRestrictInfo = make_simple_restrictinfo((Expr *) hashedLEOpExpr);
|
|
||||||
originalBaserestrictInfo = lappend(originalBaserestrictInfo, leRestrictInfo);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -530,92 +522,53 @@ RouterModifyTaskForShardInterval(Query *originalQuery, ShardInterval *shardInter
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* HashedShardIntervalOpExpressions returns a list of OpExprs with exactly two
|
* ShardIntervalOpExpressions returns a list of OpExprs with exactly two
|
||||||
* items in it. The list consists of shard interval ranges with hashed columns
|
* items in it. The list consists of shard interval ranges with partition columns
|
||||||
* such as (hashColumn >= shardMinValue) and (hashedColumn <= shardMaxValue).
|
* such as (partitionColumn >= shardMinValue) and (partitionColumn <= shardMaxValue).
|
||||||
*
|
*
|
||||||
* The function errors out if the given shard interval does not belong to a hash
|
* The function returns hashed columns generated by MakeInt4Column() for the hash
|
||||||
* distributed table.
|
* partitioned tables in place of partition columns.
|
||||||
|
*
|
||||||
|
* The function errors out if the given shard interval does not belong to a hash,
|
||||||
|
* range and append distributed tables.
|
||||||
*/
|
*/
|
||||||
static List *
|
static List *
|
||||||
HashedShardIntervalOpExpressions(ShardInterval *shardInterval)
|
ShardIntervalOpExpressions(ShardInterval *shardInterval, Index rteIndex)
|
||||||
{
|
{
|
||||||
List *operatorExpressions = NIL;
|
Oid relationId = shardInterval->relationId;
|
||||||
Var *hashedGEColumn = NULL;
|
|
||||||
Var *hashedLEColumn = NULL;
|
|
||||||
OpExpr *hashedGEOpExpr = NULL;
|
|
||||||
OpExpr *hashedLEOpExpr = NULL;
|
|
||||||
Oid integer4GEoperatorId = InvalidOid;
|
|
||||||
Oid integer4LEoperatorId = InvalidOid;
|
|
||||||
|
|
||||||
Datum shardMinValue = shardInterval->minValue;
|
|
||||||
Datum shardMaxValue = shardInterval->maxValue;
|
|
||||||
char partitionMethod = PartitionMethod(shardInterval->relationId);
|
char partitionMethod = PartitionMethod(shardInterval->relationId);
|
||||||
|
Var *partitionColumn = NULL;
|
||||||
|
Node *baseConstraint = NULL;
|
||||||
|
|
||||||
if (partitionMethod != DISTRIBUTE_BY_HASH)
|
if (partitionMethod == DISTRIBUTE_BY_HASH)
|
||||||
|
{
|
||||||
|
partitionColumn = MakeInt4Column();
|
||||||
|
}
|
||||||
|
else if (partitionMethod == DISTRIBUTE_BY_RANGE || partitionMethod ==
|
||||||
|
DISTRIBUTE_BY_APPEND)
|
||||||
|
{
|
||||||
|
Assert(rteIndex > 0);
|
||||||
|
|
||||||
|
partitionColumn = PartitionColumn(relationId, rteIndex);
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
errmsg("cannot create shard interval operator expression for "
|
errmsg("cannot create shard interval operator expression for "
|
||||||
"distributed relations other than hash distributed "
|
"distributed relations other than hash, range and append distributed "
|
||||||
"relations")));
|
"relations")));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get the integer >=, <= operators from the catalog */
|
/* build the base expression for constraint */
|
||||||
integer4GEoperatorId = get_opfamily_member(INTEGER_BTREE_FAM_OID, INT4OID,
|
baseConstraint = BuildBaseConstraint(partitionColumn);
|
||||||
INT4OID,
|
|
||||||
BTGreaterEqualStrategyNumber);
|
|
||||||
integer4LEoperatorId = get_opfamily_member(INTEGER_BTREE_FAM_OID, INT4OID,
|
|
||||||
INT4OID,
|
|
||||||
BTLessEqualStrategyNumber);
|
|
||||||
|
|
||||||
/* generate hashed columns */
|
/* walk over shard list and check if shards can be pruned */
|
||||||
hashedGEColumn = MakeInt4Column();
|
if (shardInterval->minValueExists && shardInterval->maxValueExists)
|
||||||
hashedLEColumn = MakeInt4Column();
|
{
|
||||||
|
UpdateConstraint(baseConstraint, shardInterval);
|
||||||
/* generate the necessary operators */
|
|
||||||
hashedGEOpExpr = (OpExpr *) make_opclause(integer4GEoperatorId, InvalidOid, false,
|
|
||||||
(Expr *) hashedGEColumn,
|
|
||||||
(Expr *) MakeInt4Constant(shardMinValue),
|
|
||||||
InvalidOid, InvalidOid);
|
|
||||||
|
|
||||||
hashedLEOpExpr = (OpExpr *) make_opclause(integer4LEoperatorId, InvalidOid, false,
|
|
||||||
(Expr *) hashedLEColumn,
|
|
||||||
(Expr *) MakeInt4Constant(shardMaxValue),
|
|
||||||
InvalidOid, InvalidOid);
|
|
||||||
|
|
||||||
/* update the operators with correct operator numbers and function ids */
|
|
||||||
hashedGEOpExpr->opfuncid = get_opcode(hashedGEOpExpr->opno);
|
|
||||||
hashedGEOpExpr->opresulttype = get_func_rettype(hashedGEOpExpr->opfuncid);
|
|
||||||
operatorExpressions = lappend(operatorExpressions, hashedGEOpExpr);
|
|
||||||
|
|
||||||
hashedLEOpExpr->opfuncid = get_opcode(hashedLEOpExpr->opno);
|
|
||||||
hashedLEOpExpr->opresulttype = get_func_rettype(hashedLEOpExpr->opfuncid);
|
|
||||||
operatorExpressions = lappend(operatorExpressions, hashedLEOpExpr);
|
|
||||||
|
|
||||||
return operatorExpressions;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return list_make1(baseConstraint);
|
||||||
/*
|
|
||||||
* UninstantiatedParameterForColumn returns a Param that can be used as an uninstantiated
|
|
||||||
* parameter for the given column in the sense that paramtype, paramtypmod and collid
|
|
||||||
* is set to the input Var's corresponding values.
|
|
||||||
*
|
|
||||||
* Note that we're using hard coded UNINSTANTIATED_PARAMETER_ID which is the required parameter
|
|
||||||
* for our purposes. See multi_planner.c@multi_planner for the details.
|
|
||||||
*/
|
|
||||||
static Param *
|
|
||||||
UninstantiatedParameterForColumn(Var *relationPartitionKey)
|
|
||||||
{
|
|
||||||
Param *uninstantiatedParameter = makeNode(Param);
|
|
||||||
|
|
||||||
uninstantiatedParameter->paramkind = PARAM_EXTERN;
|
|
||||||
uninstantiatedParameter->paramid = UNINSTANTIATED_PARAMETER_ID;
|
|
||||||
uninstantiatedParameter->paramtype = relationPartitionKey->vartype;
|
|
||||||
uninstantiatedParameter->paramtypmod = relationPartitionKey->vartypmod;
|
|
||||||
uninstantiatedParameter->paramcollid = relationPartitionKey->varcollid;
|
|
||||||
|
|
||||||
return uninstantiatedParameter;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -924,11 +877,10 @@ MultiTaskRouterSelectQuerySupported(Query *query)
|
||||||
NULL, NULL);
|
NULL, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* see comment on AddUninstantiatedPartitionRestriction() */
|
|
||||||
if (subquery->setOperations != NULL)
|
if (subquery->setOperations != NULL)
|
||||||
{
|
{
|
||||||
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
return DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED,
|
||||||
"set operations are not allowed in INSERT ... SELECT "
|
"Set operations are not allowed in INSERT ... SELECT "
|
||||||
"queries",
|
"queries",
|
||||||
NULL, NULL);
|
NULL, NULL);
|
||||||
}
|
}
|
||||||
|
@ -1190,126 +1142,6 @@ InsertPartitionColumnMatchesSelect(Query *query, RangeTblEntry *insertRte,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* AddUninstantiatedPartitionRestriction() can only be used with
|
|
||||||
* INSERT ... SELECT queries.
|
|
||||||
*
|
|
||||||
* AddUninstantiatedPartitionRestriction adds an equality qual
|
|
||||||
* to the SELECT query of the given originalQuery. The function currently
|
|
||||||
* does NOT add the quals if
|
|
||||||
* (i) Set operations are present on the top level query
|
|
||||||
* (ii) Target list does not include a bare partition column.
|
|
||||||
*
|
|
||||||
* Note that if the input query is not an INSERT ... SELECT the assertion fails. Lastly,
|
|
||||||
* if all the participating tables in the query are reference tables, we implicitly
|
|
||||||
* skip adding the quals to the query since IsPartitionColumnRecursive() always returns
|
|
||||||
* false for reference tables.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
AddUninstantiatedPartitionRestriction(Query *originalQuery)
|
|
||||||
{
|
|
||||||
Query *subquery = NULL;
|
|
||||||
RangeTblEntry *subqueryEntry = NULL;
|
|
||||||
ListCell *targetEntryCell = NULL;
|
|
||||||
Var *targetPartitionColumnVar = NULL;
|
|
||||||
List *targetList = NULL;
|
|
||||||
|
|
||||||
Assert(InsertSelectQuery(originalQuery));
|
|
||||||
|
|
||||||
subqueryEntry = ExtractSelectRangeTableEntry(originalQuery);
|
|
||||||
subquery = subqueryEntry->subquery;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We currently not support the subquery with set operations. The main reason is that
|
|
||||||
* there is an "Assert(parse->jointree->quals == NULL);" on standard planner's execution
|
|
||||||
* path (i.e., plan_set_operations).
|
|
||||||
* If we are to add uninstantiated equality qual to the query, we may end up hitting that
|
|
||||||
* assertion, so it's better not to support for now.
|
|
||||||
*/
|
|
||||||
if (subquery->setOperations != NULL)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* iterate through the target list and find the partition column on the target list */
|
|
||||||
targetList = subquery->targetList;
|
|
||||||
foreach(targetEntryCell, targetList)
|
|
||||||
{
|
|
||||||
TargetEntry *targetEntry = lfirst(targetEntryCell);
|
|
||||||
|
|
||||||
if (IsPartitionColumn(targetEntry->expr, subquery) &&
|
|
||||||
IsA(targetEntry->expr, Var))
|
|
||||||
{
|
|
||||||
targetPartitionColumnVar = (Var *) targetEntry->expr;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If we cannot find the bare partition column, no need to add the qual since
|
|
||||||
* we're already going to error out on the multi planner.
|
|
||||||
*/
|
|
||||||
if (!targetPartitionColumnVar)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* finally add the equality qual of target column to subquery */
|
|
||||||
AddUninstantiatedEqualityQual(subquery, targetPartitionColumnVar);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* AddUninstantiatedEqualityQual adds a qual in the following form
|
|
||||||
* ($1 = partitionColumn) on the input query and partitionColumn.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
AddUninstantiatedEqualityQual(Query *query, Var *partitionColumn)
|
|
||||||
{
|
|
||||||
Param *equalityParameter = UninstantiatedParameterForColumn(partitionColumn);
|
|
||||||
OpExpr *uninstantiatedEqualityQual = NULL;
|
|
||||||
Oid partitionColumnCollid = InvalidOid;
|
|
||||||
Oid lessThanOperator = InvalidOid;
|
|
||||||
Oid equalsOperator = InvalidOid;
|
|
||||||
Oid greaterOperator = InvalidOid;
|
|
||||||
bool hashable = false;
|
|
||||||
|
|
||||||
AssertArg(query->commandType == CMD_SELECT);
|
|
||||||
|
|
||||||
/* get the necessary equality operator */
|
|
||||||
get_sort_group_operators(partitionColumn->vartype, false, true, false,
|
|
||||||
&lessThanOperator, &equalsOperator, &greaterOperator,
|
|
||||||
&hashable);
|
|
||||||
|
|
||||||
|
|
||||||
partitionColumnCollid = partitionColumn->varcollid;
|
|
||||||
|
|
||||||
/* create an equality on the on the target partition column */
|
|
||||||
uninstantiatedEqualityQual = (OpExpr *) make_opclause(equalsOperator, InvalidOid,
|
|
||||||
false,
|
|
||||||
(Expr *) partitionColumn,
|
|
||||||
(Expr *) equalityParameter,
|
|
||||||
partitionColumnCollid,
|
|
||||||
partitionColumnCollid);
|
|
||||||
|
|
||||||
/* update the operators with correct operator numbers and function ids */
|
|
||||||
uninstantiatedEqualityQual->opfuncid = get_opcode(uninstantiatedEqualityQual->opno);
|
|
||||||
uninstantiatedEqualityQual->opresulttype =
|
|
||||||
get_func_rettype(uninstantiatedEqualityQual->opfuncid);
|
|
||||||
|
|
||||||
/* add restriction on partition column */
|
|
||||||
if (query->jointree->quals == NULL)
|
|
||||||
{
|
|
||||||
query->jointree->quals = (Node *) uninstantiatedEqualityQual;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
query->jointree->quals = make_and_qual(query->jointree->quals,
|
|
||||||
(Node *) uninstantiatedEqualityQual);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ModifyQuerySupported returns NULL if the query only contains supported
|
* ModifyQuerySupported returns NULL if the query only contains supported
|
||||||
* features, otherwise it returns an error description.
|
* features, otherwise it returns an error description.
|
||||||
|
|
|
@ -0,0 +1,864 @@
|
||||||
|
/*
|
||||||
|
* relation_restriction_equivalence.c
|
||||||
|
*
|
||||||
|
* This file contains functions helper functions for planning
|
||||||
|
* queries with colocated tables and subqueries.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2017-2017, Citus Data, Inc.
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "distributed/multi_planner.h"
|
||||||
|
#include "distributed/multi_logical_planner.h"
|
||||||
|
#include "distributed/pg_dist_partition.h"
|
||||||
|
#include "distributed/relation_restriction_equivalence.h"
|
||||||
|
#include "nodes/nodeFuncs.h"
|
||||||
|
#include "nodes/pg_list.h"
|
||||||
|
#include "nodes/primnodes.h"
|
||||||
|
#include "nodes/relation.h"
|
||||||
|
#include "parser/parsetree.h"
|
||||||
|
#include "optimizer/pathnode.h"
|
||||||
|
|
||||||
|
static uint32 attributeEquivalenceId = 1;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AttributeEquivalenceClass
|
||||||
|
*
|
||||||
|
* Whenever we find an equality clause A = B, where both A and B originates from
|
||||||
|
* relation attributes (i.e., not random expressions), we create an
|
||||||
|
* AttributeEquivalenceClass to record this knowledge. If we later find another
|
||||||
|
* equivalence B = C, we create another AttributeEquivalenceClass. Finally, we can
|
||||||
|
* apply transitivity rules and generate a new AttributeEquivalenceClass which includes
|
||||||
|
* A, B and C.
|
||||||
|
*
|
||||||
|
* Note that equality among the members are identified by the varattno and rteIdentity.
|
||||||
|
*/
|
||||||
|
typedef struct AttributeEquivalenceClass
|
||||||
|
{
|
||||||
|
uint32 equivalenceId;
|
||||||
|
List *equivalentAttributes;
|
||||||
|
} AttributeEquivalenceClass;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AttributeEquivalenceClassMember - one member expression of an
|
||||||
|
* AttributeEquivalenceClass. The important thing to consider is that
|
||||||
|
* the class member contains "rteIndentity" field. Note that each RTE_RELATION
|
||||||
|
* is assigned a unique rteIdentity in AssignRTEIdentities() function.
|
||||||
|
*
|
||||||
|
* "varno" and "varattno" is directly used from a Var clause that is being added
|
||||||
|
* to the attribute equivalence. Since we only use this class for relations, the member
|
||||||
|
* also includes the relation id field.
|
||||||
|
*/
|
||||||
|
typedef struct AttributeEquivalenceClassMember
|
||||||
|
{
|
||||||
|
Oid relationId;
|
||||||
|
int rteIdentity;
|
||||||
|
Index varno;
|
||||||
|
AttrNumber varattno;
|
||||||
|
} AttributeEquivalenceClassMember;
|
||||||
|
|
||||||
|
|
||||||
|
static uint32 ReferenceRelationCount(RelationRestrictionContext *restrictionContext);
|
||||||
|
static List * GenerateAttributeEquivalencesForRelationRestrictions(
|
||||||
|
RelationRestrictionContext *restrictionContext);
|
||||||
|
static AttributeEquivalenceClass * AttributeEquivalenceClassForEquivalenceClass(
|
||||||
|
EquivalenceClass *plannerEqClass, RelationRestriction *relationRestriction);
|
||||||
|
static void AddToAttributeEquivalenceClass(AttributeEquivalenceClass **
|
||||||
|
attributeEquivalanceClass,
|
||||||
|
PlannerInfo *root, Var *varToBeAdded);
|
||||||
|
static Var * GetVarFromAssignedParam(List *parentPlannerParamList,
|
||||||
|
Param *plannerParam);
|
||||||
|
static List * GenerateAttributeEquivalencesForJoinRestrictions(JoinRestrictionContext
|
||||||
|
*joinRestrictionContext);
|
||||||
|
static bool AttributeClassContainsAttributeClassMember(AttributeEquivalenceClassMember *
|
||||||
|
inputMember,
|
||||||
|
AttributeEquivalenceClass *
|
||||||
|
attributeEquivalenceClass);
|
||||||
|
static List * AddAttributeClassToAttributeClassList(List *attributeEquivalenceList,
|
||||||
|
AttributeEquivalenceClass *
|
||||||
|
attributeEquivalance);
|
||||||
|
static bool AttributeEquivalancesAreEqual(AttributeEquivalenceClass *
|
||||||
|
firstAttributeEquivalance,
|
||||||
|
AttributeEquivalenceClass *
|
||||||
|
secondAttributeEquivalance);
|
||||||
|
static AttributeEquivalenceClass * GenerateCommonEquivalence(List *
|
||||||
|
attributeEquivalenceList);
|
||||||
|
static void ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass **
|
||||||
|
firstClass,
|
||||||
|
AttributeEquivalenceClass *
|
||||||
|
secondClass);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RestrictionEquivalenceForPartitionKeys aims to deduce whether each of the RTE_RELATION
|
||||||
|
* is joined with at least one another RTE_RELATION on their partition keys. If each
|
||||||
|
* RTE_RELATION follows the above rule, we can conclude that all RTE_RELATIONs are
|
||||||
|
* joined on their partition keys.
|
||||||
|
*
|
||||||
|
* The function returns true if all relations are joined on their partition keys.
|
||||||
|
* Otherwise, the function returns false. Since reference tables do not have partition
|
||||||
|
* keys, we skip processing them. Also, if the query includes only a single non-reference
|
||||||
|
* distributed relation, the function returns true since it doesn't make sense to check
|
||||||
|
* for partition key equality in that case.
|
||||||
|
*
|
||||||
|
* In order to do that, we invented a new equivalence class namely:
|
||||||
|
* AttributeEquivalenceClass. In very simple words, a AttributeEquivalenceClass is
|
||||||
|
* identified by an unique id and consists of a list of AttributeEquivalenceMembers.
|
||||||
|
*
|
||||||
|
* Each AttributeEquivalenceMember is designed to identify attributes uniquely within the
|
||||||
|
* whole query. The necessity of this arise since varno attributes are defined within
|
||||||
|
* a single level of a query. Instead, here we want to identify each RTE_RELATION uniquely
|
||||||
|
* and try to find equality among each RTE_RELATION's partition key.
|
||||||
|
*
|
||||||
|
* Each equality among RTE_RELATION is saved using an AttributeEquivalenceClass where
|
||||||
|
* each member attribute is identified by a AttributeEquivalenceMember. In the final
|
||||||
|
* step, we try generate a common attribute equivalence class that holds as much as
|
||||||
|
* AttributeEquivalenceMembers whose attributes are a partition keys.
|
||||||
|
*
|
||||||
|
* AllRelationsJoinedOnPartitionKey uses both relation restrictions and join restrictions
|
||||||
|
* to find as much as information that Postgres planner provides to extensions. For the
|
||||||
|
* details of the usage, please see GenerateAttributeEquivalencesForRelationRestrictions()
|
||||||
|
* and GenerateAttributeEquivalencesForJoinRestrictions()
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
RestrictionEquivalenceForPartitionKeys(
|
||||||
|
PlannerRestrictionContext *plannerRestrictionContext)
|
||||||
|
{
|
||||||
|
RelationRestrictionContext *restrictionContext =
|
||||||
|
plannerRestrictionContext->relationRestrictionContext;
|
||||||
|
JoinRestrictionContext *joinRestrictionContext =
|
||||||
|
plannerRestrictionContext->joinRestrictionContext;
|
||||||
|
|
||||||
|
List *relationRestrictionAttributeEquivalenceList = NIL;
|
||||||
|
List *joinRestrictionAttributeEquivalenceList = NIL;
|
||||||
|
List *allAttributeEquivalenceList = NIL;
|
||||||
|
AttributeEquivalenceClass *commonEquivalenceClass = NULL;
|
||||||
|
uint32 referenceRelationCount = ReferenceRelationCount(restrictionContext);
|
||||||
|
uint32 totalRelationCount = list_length(restrictionContext->relationRestrictionList);
|
||||||
|
uint32 nonReferenceRelationCount = totalRelationCount - referenceRelationCount;
|
||||||
|
ListCell *commonEqClassCell = NULL;
|
||||||
|
ListCell *relationRestrictionCell = NULL;
|
||||||
|
Relids commonRteIdentities = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the query includes a single relation which is not a reference table,
|
||||||
|
* we should not check the partition column equality.
|
||||||
|
* Consider two example cases:
|
||||||
|
* (i) The query includes only a single colocated relation
|
||||||
|
* (ii) A colocated relation is joined with a (or multiple) reference
|
||||||
|
* table(s) where colocated relation is not joined on the partition key
|
||||||
|
*
|
||||||
|
* For the above two cases, we don't need to execute the partition column equality
|
||||||
|
* algorithm. The reason is that the essence of this function is to ensure that the
|
||||||
|
* tasks that are going to be created should not need data from other tasks. In both
|
||||||
|
* cases mentioned above, the necessary data per task would be on available.
|
||||||
|
*/
|
||||||
|
if (nonReferenceRelationCount <= 1)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* reset the equivalence id counter per call to prevent overflows */
|
||||||
|
attributeEquivalenceId = 1;
|
||||||
|
|
||||||
|
relationRestrictionAttributeEquivalenceList =
|
||||||
|
GenerateAttributeEquivalencesForRelationRestrictions(restrictionContext);
|
||||||
|
joinRestrictionAttributeEquivalenceList =
|
||||||
|
GenerateAttributeEquivalencesForJoinRestrictions(joinRestrictionContext);
|
||||||
|
|
||||||
|
allAttributeEquivalenceList =
|
||||||
|
list_concat(relationRestrictionAttributeEquivalenceList,
|
||||||
|
joinRestrictionAttributeEquivalenceList);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In general we're trying to expand existing the equivalence classes to find a
|
||||||
|
* common equivalence class. The main goal is to test whether this main class
|
||||||
|
* contains all partition keys of the existing relations.
|
||||||
|
*/
|
||||||
|
commonEquivalenceClass = GenerateCommonEquivalence(allAttributeEquivalenceList);
|
||||||
|
|
||||||
|
/* add the rte indexes of relations to a bitmap */
|
||||||
|
foreach(commonEqClassCell, commonEquivalenceClass->equivalentAttributes)
|
||||||
|
{
|
||||||
|
AttributeEquivalenceClassMember *classMember =
|
||||||
|
(AttributeEquivalenceClassMember *) lfirst(commonEqClassCell);
|
||||||
|
int rteIdentity = classMember->rteIdentity;
|
||||||
|
|
||||||
|
commonRteIdentities = bms_add_member(commonRteIdentities, rteIdentity);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check whether all relations exists in the main restriction list */
|
||||||
|
foreach(relationRestrictionCell, restrictionContext->relationRestrictionList)
|
||||||
|
{
|
||||||
|
RelationRestriction *relationRestriction =
|
||||||
|
(RelationRestriction *) lfirst(relationRestrictionCell);
|
||||||
|
int rteIdentity = GetRTEIdentity(relationRestriction->rte);
|
||||||
|
|
||||||
|
if (PartitionKey(relationRestriction->relationId) &&
|
||||||
|
!bms_is_member(rteIdentity, commonRteIdentities))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ReferenceRelationCount iterates over the relations and returns the reference table
|
||||||
|
* relation count.
|
||||||
|
*/
|
||||||
|
static uint32
|
||||||
|
ReferenceRelationCount(RelationRestrictionContext *restrictionContext)
|
||||||
|
{
|
||||||
|
ListCell *relationRestrictionCell = NULL;
|
||||||
|
uint32 referenceRelationCount = 0;
|
||||||
|
|
||||||
|
foreach(relationRestrictionCell, restrictionContext->relationRestrictionList)
|
||||||
|
{
|
||||||
|
RelationRestriction *relationRestriction =
|
||||||
|
(RelationRestriction *) lfirst(relationRestrictionCell);
|
||||||
|
|
||||||
|
if (PartitionMethod(relationRestriction->relationId) == DISTRIBUTE_BY_NONE)
|
||||||
|
{
|
||||||
|
referenceRelationCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return referenceRelationCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GenerateAttributeEquivalencesForRelationRestrictions gets a relation restriction
|
||||||
|
* context and returns a list of AttributeEquivalenceClass.
|
||||||
|
*
|
||||||
|
* The algorithm followed can be summarized as below:
|
||||||
|
*
|
||||||
|
* - Per relation restriction
|
||||||
|
* - Per plannerInfo's eq_class
|
||||||
|
* - Create an AttributeEquivalenceClass
|
||||||
|
* - Add all Vars that appear in the plannerInfo's
|
||||||
|
* eq_class to the AttributeEquivalenceClass
|
||||||
|
* - While doing that, consider LATERAL vars as well.
|
||||||
|
* See GetVarFromAssignedParam() for the details. Note
|
||||||
|
* that we're using parentPlannerInfo while adding the
|
||||||
|
* LATERAL vars given that we rely on that plannerInfo.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static List *
|
||||||
|
GenerateAttributeEquivalencesForRelationRestrictions(RelationRestrictionContext
|
||||||
|
*restrictionContext)
|
||||||
|
{
|
||||||
|
List *attributeEquivalenceList = NIL;
|
||||||
|
ListCell *relationRestrictionCell = NULL;
|
||||||
|
|
||||||
|
foreach(relationRestrictionCell, restrictionContext->relationRestrictionList)
|
||||||
|
{
|
||||||
|
RelationRestriction *relationRestriction =
|
||||||
|
(RelationRestriction *) lfirst(relationRestrictionCell);
|
||||||
|
List *equivalenceClasses = relationRestriction->plannerInfo->eq_classes;
|
||||||
|
ListCell *equivalenceClassCell = NULL;
|
||||||
|
|
||||||
|
foreach(equivalenceClassCell, equivalenceClasses)
|
||||||
|
{
|
||||||
|
EquivalenceClass *plannerEqClass =
|
||||||
|
(EquivalenceClass *) lfirst(equivalenceClassCell);
|
||||||
|
|
||||||
|
AttributeEquivalenceClass *attributeEquivalance =
|
||||||
|
AttributeEquivalenceClassForEquivalenceClass(plannerEqClass,
|
||||||
|
relationRestriction);
|
||||||
|
|
||||||
|
attributeEquivalenceList =
|
||||||
|
AddAttributeClassToAttributeClassList(attributeEquivalenceList,
|
||||||
|
attributeEquivalance);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return attributeEquivalenceList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AttributeEquivalenceClassForEquivalenceClass is a helper function for
|
||||||
|
* GenerateAttributeEquivalencesForRelationRestrictions. The function takes an
|
||||||
|
* EquivalenceClass and the relation restriction that the equivalence class
|
||||||
|
* belongs to. The function returns an AttributeEquivalenceClass that is composed
|
||||||
|
* of ec_members that are simple Var references.
|
||||||
|
*
|
||||||
|
* The function also takes case of LATERAL joins by simply replacing the PARAM_EXEC
|
||||||
|
* with the corresponding expression.
|
||||||
|
*/
|
||||||
|
static AttributeEquivalenceClass *
|
||||||
|
AttributeEquivalenceClassForEquivalenceClass(EquivalenceClass *plannerEqClass,
|
||||||
|
RelationRestriction *relationRestriction)
|
||||||
|
{
|
||||||
|
AttributeEquivalenceClass *attributeEquivalance =
|
||||||
|
palloc0(sizeof(AttributeEquivalenceClass));
|
||||||
|
ListCell *equivilanceMemberCell = NULL;
|
||||||
|
PlannerInfo *plannerInfo = relationRestriction->plannerInfo;
|
||||||
|
|
||||||
|
attributeEquivalance->equivalenceId = attributeEquivalenceId++;
|
||||||
|
|
||||||
|
foreach(equivilanceMemberCell, plannerEqClass->ec_members)
|
||||||
|
{
|
||||||
|
EquivalenceMember *equivalenceMember =
|
||||||
|
(EquivalenceMember *) lfirst(equivilanceMemberCell);
|
||||||
|
Node *equivalenceNode = strip_implicit_coercions(
|
||||||
|
(Node *) equivalenceMember->em_expr);
|
||||||
|
Expr *strippedEquivalenceExpr = (Expr *) equivalenceNode;
|
||||||
|
|
||||||
|
Var *expressionVar = NULL;
|
||||||
|
|
||||||
|
if (IsA(strippedEquivalenceExpr, Param))
|
||||||
|
{
|
||||||
|
List *parentParamList = relationRestriction->parentPlannerParamList;
|
||||||
|
Param *equivalenceParam = (Param *) strippedEquivalenceExpr;
|
||||||
|
|
||||||
|
expressionVar = GetVarFromAssignedParam(parentParamList,
|
||||||
|
equivalenceParam);
|
||||||
|
if (expressionVar)
|
||||||
|
{
|
||||||
|
AddToAttributeEquivalenceClass(&attributeEquivalance,
|
||||||
|
relationRestriction->parentPlannerInfo,
|
||||||
|
expressionVar);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (IsA(strippedEquivalenceExpr, Var))
|
||||||
|
{
|
||||||
|
expressionVar = (Var *) strippedEquivalenceExpr;
|
||||||
|
AddToAttributeEquivalenceClass(&attributeEquivalance, plannerInfo,
|
||||||
|
expressionVar);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return attributeEquivalance;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetVarFromAssignedParam returns the Var that is assigned to the given
|
||||||
|
* plannerParam if its kind is PARAM_EXEC.
|
||||||
|
*
|
||||||
|
* If the paramkind is not equal to PARAM_EXEC the function returns NULL. Similarly,
|
||||||
|
* if there is no var that the given param is assigned to, the function returns NULL.
|
||||||
|
*
|
||||||
|
* Rationale behind this function:
|
||||||
|
*
|
||||||
|
* While iterating through the equivalence classes of RTE_RELATIONs, we
|
||||||
|
* observe that there are PARAM type of equivalence member expressions for
|
||||||
|
* the RTE_RELATIONs which actually belong to lateral vars from the other query
|
||||||
|
* levels.
|
||||||
|
*
|
||||||
|
* We're also keeping track of the RTE_RELATION's parent_root's
|
||||||
|
* plan_param list which is expected to hold the parameters that are required
|
||||||
|
* for its lower level queries as it is documented:
|
||||||
|
*
|
||||||
|
* plan_params contains the expressions that this query level needs to
|
||||||
|
* make available to a lower query level that is currently being planned.
|
||||||
|
*
|
||||||
|
* This function is a helper function to iterate through the parent query's
|
||||||
|
* plan_params and looks for the param that the equivalence member has. The
|
||||||
|
* comparison is done via the "paramid" field. Finally, if the found parameter's
|
||||||
|
* item is a Var, we conclude that Postgres standard_planner replaced the Var
|
||||||
|
* with the Param on assign_param_for_var() function
|
||||||
|
* @src/backend/optimizer//plan/subselect.c.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static Var *
|
||||||
|
GetVarFromAssignedParam(List *parentPlannerParamList, Param *plannerParam)
|
||||||
|
{
|
||||||
|
Var *assignedVar = NULL;
|
||||||
|
ListCell *plannerParameterCell = NULL;
|
||||||
|
|
||||||
|
Assert(plannerParam != NULL);
|
||||||
|
|
||||||
|
/* we're only interested in parameters that Postgres added for execution */
|
||||||
|
if (plannerParam->paramkind != PARAM_EXEC)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach(plannerParameterCell, parentPlannerParamList)
|
||||||
|
{
|
||||||
|
PlannerParamItem *plannerParamItem =
|
||||||
|
(PlannerParamItem *) lfirst(plannerParameterCell);
|
||||||
|
|
||||||
|
if (plannerParamItem->paramId != plannerParam->paramid)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO: Should we consider PlaceHolderVar?? */
|
||||||
|
if (!IsA(plannerParamItem->item, Var))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
assignedVar = (Var *) plannerParamItem->item;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return assignedVar;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GenerateCommonEquivalence gets a list of unrelated AttributeEquiavalanceClass
|
||||||
|
* whose all members are partition keys.
|
||||||
|
*
|
||||||
|
* With the equivalence classes, the function follows the algorithm
|
||||||
|
* outlined below:
|
||||||
|
*
|
||||||
|
* - Add the first equivalence class to the common equivalence class
|
||||||
|
* - Then, iterate on the remaining equivalence classes
|
||||||
|
* - If any of the members equal to the common equivalence class
|
||||||
|
* add all the members of the equivalence class to the common
|
||||||
|
* class
|
||||||
|
* - Start the iteration from the beginning. The reason is that
|
||||||
|
* in case any of the classes we've passed is equivalent to the
|
||||||
|
* newly added one. To optimize the algorithm, we utilze the
|
||||||
|
* equivalence class ids and skip the ones that are already added.
|
||||||
|
* - Finally, return the common equivalence class.
|
||||||
|
*/
|
||||||
|
static AttributeEquivalenceClass *
|
||||||
|
GenerateCommonEquivalence(List *attributeEquivalenceList)
|
||||||
|
{
|
||||||
|
AttributeEquivalenceClass *commonEquivalenceClass = NULL;
|
||||||
|
AttributeEquivalenceClass *firstEquivalenceClass = NULL;
|
||||||
|
Bitmapset *addedEquivalenceIds = NULL;
|
||||||
|
uint32 equivalenceListSize = list_length(attributeEquivalenceList);
|
||||||
|
uint32 equivalenceClassIndex = 0;
|
||||||
|
|
||||||
|
commonEquivalenceClass = palloc0(sizeof(AttributeEquivalenceClass));
|
||||||
|
commonEquivalenceClass->equivalenceId = 0;
|
||||||
|
|
||||||
|
/* think more on this. */
|
||||||
|
if (equivalenceListSize < 1)
|
||||||
|
{
|
||||||
|
return commonEquivalenceClass;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* setup the initial state of the main equivalence class */
|
||||||
|
firstEquivalenceClass = linitial(attributeEquivalenceList);
|
||||||
|
commonEquivalenceClass->equivalentAttributes =
|
||||||
|
firstEquivalenceClass->equivalentAttributes;
|
||||||
|
addedEquivalenceIds = bms_add_member(addedEquivalenceIds,
|
||||||
|
firstEquivalenceClass->equivalenceId);
|
||||||
|
|
||||||
|
for (; equivalenceClassIndex < equivalenceListSize; ++equivalenceClassIndex)
|
||||||
|
{
|
||||||
|
AttributeEquivalenceClass *currentEquivalenceClass =
|
||||||
|
list_nth(attributeEquivalenceList, equivalenceClassIndex);
|
||||||
|
ListCell *equivalenceMemberCell = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is an optimization. If we already added the same equivalence class,
|
||||||
|
* we could skip it since we've already added all the relevant equivalence
|
||||||
|
* members.
|
||||||
|
*/
|
||||||
|
if (bms_is_member(currentEquivalenceClass->equivalenceId, addedEquivalenceIds))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach(equivalenceMemberCell, currentEquivalenceClass->equivalentAttributes)
|
||||||
|
{
|
||||||
|
AttributeEquivalenceClassMember *attributeEquialanceMember =
|
||||||
|
(AttributeEquivalenceClassMember *) lfirst(equivalenceMemberCell);
|
||||||
|
|
||||||
|
if (AttributeClassContainsAttributeClassMember(attributeEquialanceMember,
|
||||||
|
commonEquivalenceClass))
|
||||||
|
{
|
||||||
|
ListConcatUniqueAttributeClassMemberLists(&commonEquivalenceClass,
|
||||||
|
currentEquivalenceClass);
|
||||||
|
|
||||||
|
addedEquivalenceIds = bms_add_member(addedEquivalenceIds,
|
||||||
|
currentEquivalenceClass->
|
||||||
|
equivalenceId);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It seems inefficient to start from the beginning.
|
||||||
|
* But, we should somehow restart from the beginning to test that
|
||||||
|
* whether the already skipped ones are equal or not.
|
||||||
|
*/
|
||||||
|
equivalenceClassIndex = 0;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return commonEquivalenceClass;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ListConcatUniqueAttributeClassMemberLists gets two attribute equivalence classes. It
|
||||||
|
* basically concatenates attribute equivalence member lists uniquely and updates the
|
||||||
|
* firstClass' member list with the list.
|
||||||
|
*
|
||||||
|
* Basically, the function iterates over the secondClass' member list and checks whether
|
||||||
|
* it already exists in the firstClass' member list. If not, the member is added to the
|
||||||
|
* firstClass.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
ListConcatUniqueAttributeClassMemberLists(AttributeEquivalenceClass **firstClass,
|
||||||
|
AttributeEquivalenceClass *secondClass)
|
||||||
|
{
|
||||||
|
ListCell *equivalenceClassMemberCell = NULL;
|
||||||
|
List *equivalenceMemberList = secondClass->equivalentAttributes;
|
||||||
|
|
||||||
|
foreach(equivalenceClassMemberCell, equivalenceMemberList)
|
||||||
|
{
|
||||||
|
AttributeEquivalenceClassMember *newEqMember =
|
||||||
|
(AttributeEquivalenceClassMember *) lfirst(equivalenceClassMemberCell);
|
||||||
|
|
||||||
|
if (AttributeClassContainsAttributeClassMember(newEqMember, *firstClass))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
(*firstClass)->equivalentAttributes = lappend((*firstClass)->equivalentAttributes,
|
||||||
|
newEqMember);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GenerateAttributeEquivalencesForJoinRestrictions gets a join restriction
|
||||||
|
* context and returns a list of AttrributeEquivalenceClass.
|
||||||
|
*
|
||||||
|
* The algorithm followed can be summarized as below:
|
||||||
|
*
|
||||||
|
* - Per join restriction
|
||||||
|
* - Per RestrictInfo of the join restriction
|
||||||
|
* - Check whether the join restriction is in the form of (Var1 = Var2)
|
||||||
|
* - Create an AttributeEquivalenceClass
|
||||||
|
* - Add both Var1 and Var2 to the AttributeEquivalenceClass
|
||||||
|
*/
|
||||||
|
static List *
|
||||||
|
GenerateAttributeEquivalencesForJoinRestrictions(JoinRestrictionContext *
|
||||||
|
joinRestrictionContext)
|
||||||
|
{
|
||||||
|
List *attributeEquivalenceList = NIL;
|
||||||
|
ListCell *joinRestrictionCell = NULL;
|
||||||
|
|
||||||
|
foreach(joinRestrictionCell, joinRestrictionContext->joinRestrictionList)
|
||||||
|
{
|
||||||
|
JoinRestriction *joinRestriction =
|
||||||
|
(JoinRestriction *) lfirst(joinRestrictionCell);
|
||||||
|
ListCell *restrictionInfoList = NULL;
|
||||||
|
|
||||||
|
foreach(restrictionInfoList, joinRestriction->joinRestrictInfoList)
|
||||||
|
{
|
||||||
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(restrictionInfoList);
|
||||||
|
OpExpr *restrictionOpExpr = NULL;
|
||||||
|
Node *leftNode = NULL;
|
||||||
|
Node *rightNode = NULL;
|
||||||
|
Expr *strippedLeftExpr = NULL;
|
||||||
|
Expr *strippedRightExpr = NULL;
|
||||||
|
Var *leftVar = NULL;
|
||||||
|
Var *rightVar = NULL;
|
||||||
|
Expr *restrictionClause = rinfo->clause;
|
||||||
|
AttributeEquivalenceClass *attributeEquivalance = NULL;
|
||||||
|
|
||||||
|
if (!IsA(restrictionClause, OpExpr))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
restrictionOpExpr = (OpExpr *) restrictionClause;
|
||||||
|
if (list_length(restrictionOpExpr->args) != 2)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!OperatorImplementsEquality(restrictionOpExpr->opno))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
leftNode = linitial(restrictionOpExpr->args);
|
||||||
|
rightNode = lsecond(restrictionOpExpr->args);
|
||||||
|
|
||||||
|
/* we also don't want implicit coercions */
|
||||||
|
strippedLeftExpr = (Expr *) strip_implicit_coercions((Node *) leftNode);
|
||||||
|
strippedRightExpr = (Expr *) strip_implicit_coercions((Node *) rightNode);
|
||||||
|
|
||||||
|
if (!(IsA(strippedLeftExpr, Var) && IsA(strippedRightExpr, Var)))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
leftVar = (Var *) strippedLeftExpr;
|
||||||
|
rightVar = (Var *) strippedRightExpr;
|
||||||
|
|
||||||
|
attributeEquivalance = palloc0(sizeof(AttributeEquivalenceClass));
|
||||||
|
attributeEquivalance->equivalenceId = attributeEquivalenceId++;
|
||||||
|
|
||||||
|
AddToAttributeEquivalenceClass(&attributeEquivalance,
|
||||||
|
joinRestriction->plannerInfo, leftVar);
|
||||||
|
|
||||||
|
AddToAttributeEquivalenceClass(&attributeEquivalance,
|
||||||
|
joinRestriction->plannerInfo, rightVar);
|
||||||
|
|
||||||
|
attributeEquivalenceList =
|
||||||
|
AddAttributeClassToAttributeClassList(attributeEquivalenceList,
|
||||||
|
attributeEquivalance);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return attributeEquivalenceList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AddToAttributeEquivalenceClass is a key function for building the attribute
|
||||||
|
* equivalences. The function gets a plannerInfo, var and attribute equivalence
|
||||||
|
* class. It searches for the RTE_RELATION(s) that the input var belongs to and
|
||||||
|
* adds the found Var(s) to the input attribute equivalence class.
|
||||||
|
*
|
||||||
|
* Note that the input var could come from a subquery (i.e., not directly from an
|
||||||
|
* RTE_RELATION). That's the reason we recursively call the function until the
|
||||||
|
* RTE_RELATION found.
|
||||||
|
*
|
||||||
|
* The algorithm could be summarized as follows:
|
||||||
|
*
|
||||||
|
* - If the RTE that corresponds to a relation
|
||||||
|
* - Generate an AttributeEquivalenceMember and add to the input
|
||||||
|
* AttributeEquivalenceClass
|
||||||
|
* - If the RTE that corresponds to a subquery
|
||||||
|
* - Find the corresponding target entry via varno
|
||||||
|
* - if subquery entry is a set operation (i.e., only UNION/UNION ALL allowed)
|
||||||
|
* - recursively add both left and right sides of the set operation's
|
||||||
|
* corresponding target entries
|
||||||
|
* - if subquery is not a set operation
|
||||||
|
* - recursively try to add the corresponding target entry to the
|
||||||
|
* equivalence class
|
||||||
|
*
|
||||||
|
* Note that this function only adds partition keys to the attributeEquivalanceClass.
|
||||||
|
* This implies that there wouldn't be any columns for reference tables.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AddToAttributeEquivalenceClass(AttributeEquivalenceClass **attributeEquivalanceClass,
|
||||||
|
PlannerInfo *root, Var *varToBeAdded)
|
||||||
|
{
|
||||||
|
RangeTblEntry *rangeTableEntry = root->simple_rte_array[varToBeAdded->varno];
|
||||||
|
|
||||||
|
if (rangeTableEntry->rtekind == RTE_RELATION)
|
||||||
|
{
|
||||||
|
AttributeEquivalenceClassMember *attributeEqMember = NULL;
|
||||||
|
Oid relationId = rangeTableEntry->relid;
|
||||||
|
Var *relationPartitionKey = NULL;
|
||||||
|
|
||||||
|
if (PartitionMethod(relationId) == DISTRIBUTE_BY_NONE)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
relationPartitionKey = PartitionKey(relationId);
|
||||||
|
if (relationPartitionKey->varattno != varToBeAdded->varattno)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
attributeEqMember = palloc0(sizeof(AttributeEquivalenceClassMember));
|
||||||
|
|
||||||
|
attributeEqMember->varattno = varToBeAdded->varattno;
|
||||||
|
attributeEqMember->varno = varToBeAdded->varno;
|
||||||
|
attributeEqMember->rteIdentity = GetRTEIdentity(rangeTableEntry);
|
||||||
|
attributeEqMember->relationId = rangeTableEntry->relid;
|
||||||
|
|
||||||
|
(*attributeEquivalanceClass)->equivalentAttributes =
|
||||||
|
lappend((*attributeEquivalanceClass)->equivalentAttributes,
|
||||||
|
attributeEqMember);
|
||||||
|
}
|
||||||
|
else if (rangeTableEntry->rtekind == RTE_SUBQUERY && !rangeTableEntry->inh)
|
||||||
|
{
|
||||||
|
Query *subquery = rangeTableEntry->subquery;
|
||||||
|
RelOptInfo *baseRelOptInfo = NULL;
|
||||||
|
TargetEntry *subqueryTargetEntry = NULL;
|
||||||
|
|
||||||
|
/* punt if it's a whole-row var rather than a plain column reference */
|
||||||
|
if (varToBeAdded->varattno == InvalidAttrNumber)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we also don't want to process ctid, tableoid etc */
|
||||||
|
if (varToBeAdded->varattno < InvalidAttrNumber)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
baseRelOptInfo = find_base_rel(root, varToBeAdded->varno);
|
||||||
|
|
||||||
|
/* If the subquery hasn't been planned yet, we have to punt */
|
||||||
|
if (baseRelOptInfo->subroot == NULL)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert(IsA(baseRelOptInfo->subroot, PlannerInfo));
|
||||||
|
|
||||||
|
subquery = baseRelOptInfo->subroot->parse;
|
||||||
|
Assert(IsA(subquery, Query));
|
||||||
|
|
||||||
|
/* Get the subquery output expression referenced by the upper Var */
|
||||||
|
subqueryTargetEntry = get_tle_by_resno(subquery->targetList,
|
||||||
|
varToBeAdded->varattno);
|
||||||
|
if (subqueryTargetEntry == NULL || subqueryTargetEntry->resjunk)
|
||||||
|
{
|
||||||
|
ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||||
|
errmsg("subquery %s does not have attribute %d",
|
||||||
|
rangeTableEntry->eref->aliasname,
|
||||||
|
varToBeAdded->varattno)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!IsA(subqueryTargetEntry->expr, Var))
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
varToBeAdded = (Var *) subqueryTargetEntry->expr;
|
||||||
|
|
||||||
|
if (varToBeAdded && IsA(varToBeAdded, Var) && varToBeAdded->varlevelsup == 0)
|
||||||
|
{
|
||||||
|
AddToAttributeEquivalenceClass(attributeEquivalanceClass,
|
||||||
|
baseRelOptInfo->subroot, varToBeAdded);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AttributeClassContainsAttributeClassMember returns true if it the input class member
|
||||||
|
* is already exists in the attributeEquivalenceClass. An equality is identified by the
|
||||||
|
* varattno and rteIdentity.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
AttributeClassContainsAttributeClassMember(AttributeEquivalenceClassMember *inputMember,
|
||||||
|
AttributeEquivalenceClass *
|
||||||
|
attributeEquivalenceClass)
|
||||||
|
{
|
||||||
|
ListCell *classCell = NULL;
|
||||||
|
foreach(classCell, attributeEquivalenceClass->equivalentAttributes)
|
||||||
|
{
|
||||||
|
AttributeEquivalenceClassMember *memberOfClass =
|
||||||
|
(AttributeEquivalenceClassMember *) lfirst(classCell);
|
||||||
|
if (memberOfClass->rteIdentity == inputMember->rteIdentity &&
|
||||||
|
memberOfClass->varattno == inputMember->varattno)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AddAttributeClassToAttributeClassList checks for certain properties of the
|
||||||
|
* input attributeEquivalance before adding it to the attributeEquivalenceList.
|
||||||
|
*
|
||||||
|
* Firstly, the function skips adding NULL attributeEquivalance to the list.
|
||||||
|
* Secondly, since an attribute equivalence class with a single member does
|
||||||
|
* not contribute to our purposes, we skip such classed adding to the list.
|
||||||
|
* Finally, we don't want to add an equivalence class whose exact equivalent
|
||||||
|
* already exists in the list.
|
||||||
|
*/
|
||||||
|
static List *
|
||||||
|
AddAttributeClassToAttributeClassList(List *attributeEquivalenceList,
|
||||||
|
AttributeEquivalenceClass *attributeEquivalance)
|
||||||
|
{
|
||||||
|
List *equivalentAttributes = NULL;
|
||||||
|
ListCell *attributeEquivalanceCell = NULL;
|
||||||
|
|
||||||
|
if (attributeEquivalance == NULL)
|
||||||
|
{
|
||||||
|
return attributeEquivalenceList;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note that in some cases we allow having equivalentAttributes with zero or
|
||||||
|
* one elements. For the details, see AddToAttributeEquivalenceClass().
|
||||||
|
*/
|
||||||
|
equivalentAttributes = attributeEquivalance->equivalentAttributes;
|
||||||
|
if (list_length(equivalentAttributes) < 2)
|
||||||
|
{
|
||||||
|
return attributeEquivalenceList;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we don't want to add an attributeEquivalance which already exists */
|
||||||
|
foreach(attributeEquivalanceCell, attributeEquivalenceList)
|
||||||
|
{
|
||||||
|
AttributeEquivalenceClass *currentAttributeEquivalance =
|
||||||
|
(AttributeEquivalenceClass *) lfirst(attributeEquivalanceCell);
|
||||||
|
|
||||||
|
if (AttributeEquivalancesAreEqual(currentAttributeEquivalance,
|
||||||
|
attributeEquivalance))
|
||||||
|
{
|
||||||
|
return attributeEquivalenceList;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
attributeEquivalenceList = lappend(attributeEquivalenceList,
|
||||||
|
attributeEquivalance);
|
||||||
|
|
||||||
|
return attributeEquivalenceList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AttributeEquivalancesAreEqual returns true if both input attribute equivalence
|
||||||
|
* classes contains exactly the same members.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
AttributeEquivalancesAreEqual(AttributeEquivalenceClass *firstAttributeEquivalance,
|
||||||
|
AttributeEquivalenceClass *secondAttributeEquivalance)
|
||||||
|
{
|
||||||
|
List *firstEquivalenceMemberList = firstAttributeEquivalance->equivalentAttributes;
|
||||||
|
List *secondEquivalenceMemberList = secondAttributeEquivalance->equivalentAttributes;
|
||||||
|
ListCell *firstAttributeEquivalanceCell = NULL;
|
||||||
|
ListCell *secondAttributeEquivalanceCell = NULL;
|
||||||
|
|
||||||
|
if (list_length(firstEquivalenceMemberList) != list_length(
|
||||||
|
secondEquivalenceMemberList))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
foreach(firstAttributeEquivalanceCell, firstEquivalenceMemberList)
|
||||||
|
{
|
||||||
|
AttributeEquivalenceClassMember *firstEqMember =
|
||||||
|
(AttributeEquivalenceClassMember *) lfirst(firstAttributeEquivalanceCell);
|
||||||
|
bool foundAnEquivalentMember = false;
|
||||||
|
|
||||||
|
foreach(secondAttributeEquivalanceCell, secondEquivalenceMemberList)
|
||||||
|
{
|
||||||
|
AttributeEquivalenceClassMember *secondEqMember =
|
||||||
|
(AttributeEquivalenceClassMember *) lfirst(
|
||||||
|
secondAttributeEquivalanceCell);
|
||||||
|
|
||||||
|
if (firstEqMember->rteIdentity == secondEqMember->rteIdentity &&
|
||||||
|
firstEqMember->varattno == secondEqMember->varattno)
|
||||||
|
{
|
||||||
|
foundAnEquivalentMember = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we couldn't find an equivalent member */
|
||||||
|
if (!foundAnEquivalentMember)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
|
@ -148,6 +148,7 @@ _PG_init(void)
|
||||||
|
|
||||||
/* register for planner hook */
|
/* register for planner hook */
|
||||||
set_rel_pathlist_hook = multi_relation_restriction_hook;
|
set_rel_pathlist_hook = multi_relation_restriction_hook;
|
||||||
|
set_join_pathlist_hook = multi_join_restriction_hook;
|
||||||
|
|
||||||
/* organize that task tracker is started once server is up */
|
/* organize that task tracker is started once server is up */
|
||||||
TaskTrackerRegister();
|
TaskTrackerRegister();
|
||||||
|
|
|
@ -37,9 +37,29 @@ typedef struct RelationRestriction
|
||||||
RangeTblEntry *rte;
|
RangeTblEntry *rte;
|
||||||
RelOptInfo *relOptInfo;
|
RelOptInfo *relOptInfo;
|
||||||
PlannerInfo *plannerInfo;
|
PlannerInfo *plannerInfo;
|
||||||
|
PlannerInfo *parentPlannerInfo;
|
||||||
|
List *parentPlannerParamList;
|
||||||
List *prunedShardIntervalList;
|
List *prunedShardIntervalList;
|
||||||
} RelationRestriction;
|
} RelationRestriction;
|
||||||
|
|
||||||
|
typedef struct JoinRestrictionContext
|
||||||
|
{
|
||||||
|
List *joinRestrictionList;
|
||||||
|
} JoinRestrictionContext;
|
||||||
|
|
||||||
|
typedef struct JoinRestriction
|
||||||
|
{
|
||||||
|
JoinType joinType;
|
||||||
|
List *joinRestrictInfoList;
|
||||||
|
PlannerInfo *plannerInfo;
|
||||||
|
} JoinRestriction;
|
||||||
|
|
||||||
|
typedef struct PlannerRestrictionContext
|
||||||
|
{
|
||||||
|
RelationRestrictionContext *relationRestrictionContext;
|
||||||
|
JoinRestrictionContext *joinRestrictionContext;
|
||||||
|
} PlannerRestrictionContext;
|
||||||
|
|
||||||
typedef struct RelationShard
|
typedef struct RelationShard
|
||||||
{
|
{
|
||||||
CitusNode type;
|
CitusNode type;
|
||||||
|
@ -55,9 +75,17 @@ struct MultiPlan;
|
||||||
extern struct MultiPlan * GetMultiPlan(CustomScan *node);
|
extern struct MultiPlan * GetMultiPlan(CustomScan *node);
|
||||||
extern void multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo,
|
extern void multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo,
|
||||||
Index index, RangeTblEntry *rte);
|
Index index, RangeTblEntry *rte);
|
||||||
|
extern void multi_join_restriction_hook(PlannerInfo *root,
|
||||||
|
RelOptInfo *joinrel,
|
||||||
|
RelOptInfo *outerrel,
|
||||||
|
RelOptInfo *innerrel,
|
||||||
|
JoinType jointype,
|
||||||
|
JoinPathExtraData *extra);
|
||||||
extern bool IsModifyCommand(Query *query);
|
extern bool IsModifyCommand(Query *query);
|
||||||
extern bool IsModifyMultiPlan(struct MultiPlan *multiPlan);
|
extern bool IsModifyMultiPlan(struct MultiPlan *multiPlan);
|
||||||
extern RangeTblEntry * RemoteScanRangeTableEntry(List *columnNameList);
|
extern RangeTblEntry * RemoteScanRangeTableEntry(List *columnNameList);
|
||||||
|
|
||||||
|
|
||||||
|
extern int GetRTEIdentity(RangeTblEntry *rte);
|
||||||
|
|
||||||
#endif /* MULTI_PLANNER_H */
|
#endif /* MULTI_PLANNER_H */
|
||||||
|
|
|
@ -21,9 +21,6 @@
|
||||||
#include "nodes/parsenodes.h"
|
#include "nodes/parsenodes.h"
|
||||||
|
|
||||||
|
|
||||||
/* reserved parameted id, we chose a negative number since it is not assigned by postgres */
|
|
||||||
#define UNINSTANTIATED_PARAMETER_ID INT_MIN
|
|
||||||
|
|
||||||
/* reserved alias name for UPSERTs */
|
/* reserved alias name for UPSERTs */
|
||||||
#define CITUS_TABLE_ALIAS "citus_table_alias"
|
#define CITUS_TABLE_ALIAS "citus_table_alias"
|
||||||
|
|
||||||
|
@ -32,9 +29,9 @@ extern bool EnableRouterExecution;
|
||||||
extern MultiPlan * CreateRouterPlan(Query *originalQuery, Query *query,
|
extern MultiPlan * CreateRouterPlan(Query *originalQuery, Query *query,
|
||||||
RelationRestrictionContext *restrictionContext);
|
RelationRestrictionContext *restrictionContext);
|
||||||
extern MultiPlan * CreateModifyPlan(Query *originalQuery, Query *query,
|
extern MultiPlan * CreateModifyPlan(Query *originalQuery, Query *query,
|
||||||
RelationRestrictionContext *restrictionContext);
|
PlannerRestrictionContext *
|
||||||
|
plannerRestrictionContext);
|
||||||
|
|
||||||
extern void AddUninstantiatedPartitionRestriction(Query *originalQuery);
|
|
||||||
extern DeferredErrorMessage * ModifyQuerySupported(Query *queryTree);
|
extern DeferredErrorMessage * ModifyQuerySupported(Query *queryTree);
|
||||||
extern Query * ReorderInsertSelectTargetLists(Query *originalQuery,
|
extern Query * ReorderInsertSelectTargetLists(Query *originalQuery,
|
||||||
RangeTblEntry *insertRte,
|
RangeTblEntry *insertRte,
|
||||||
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
/*
|
||||||
|
* relation_restriction_equivalence.h
|
||||||
|
*
|
||||||
|
* This file contains functions helper functions for planning
|
||||||
|
* queries with colocated tables and subqueries.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2017-2017, Citus Data, Inc.
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef RELATION_RESTRICTION_EQUIVALENCE_H
|
||||||
|
#define RELATION_RESTRICTION_EQUIVALENCE_H
|
||||||
|
|
||||||
|
#include "distributed/multi_planner.h"
|
||||||
|
|
||||||
|
|
||||||
|
extern bool RestrictionEquivalenceForPartitionKeys(PlannerRestrictionContext *
|
||||||
|
plannerRestrictionContext);
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* RELATION_RESTRICTION_EQUIVALENCE_H */
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -35,6 +35,13 @@ SELECT create_reference_table('reference_table');
|
||||||
|
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
CREATE TABLE insert_select_varchar_test (key varchar, value int);
|
||||||
|
SELECT create_distributed_table('insert_select_varchar_test', 'key', 'hash');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
-- set back to the defaults
|
-- set back to the defaults
|
||||||
SET citus.shard_count = DEFAULT;
|
SET citus.shard_count = DEFAULT;
|
||||||
SET citus.shard_replication_factor = DEFAULT;
|
SET citus.shard_replication_factor = DEFAULT;
|
||||||
|
@ -1125,13 +1132,13 @@ SELECT
|
||||||
FROM
|
FROM
|
||||||
((SELECT user_id FROM raw_events_first) UNION
|
((SELECT user_id FROM raw_events_first) UNION
|
||||||
(SELECT user_id FROM raw_events_second)) as foo;
|
(SELECT user_id FROM raw_events_second)) as foo;
|
||||||
ERROR: set operations are not allowed in INSERT ... SELECT queries
|
ERROR: Set operations are not allowed in INSERT ... SELECT queries
|
||||||
-- We do not support any set operations
|
-- We do not support any set operations
|
||||||
INSERT INTO
|
INSERT INTO
|
||||||
raw_events_first(user_id)
|
raw_events_first(user_id)
|
||||||
(SELECT user_id FROM raw_events_first) INTERSECT
|
(SELECT user_id FROM raw_events_first) INTERSECT
|
||||||
(SELECT user_id FROM raw_events_first);
|
(SELECT user_id FROM raw_events_first);
|
||||||
ERROR: set operations are not allowed in INSERT ... SELECT queries
|
ERROR: Set operations are not allowed in INSERT ... SELECT queries
|
||||||
-- We do not support any set operations
|
-- We do not support any set operations
|
||||||
INSERT INTO
|
INSERT INTO
|
||||||
raw_events_first(user_id)
|
raw_events_first(user_id)
|
||||||
|
@ -1140,7 +1147,7 @@ SELECT
|
||||||
FROM
|
FROM
|
||||||
((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT
|
((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT
|
||||||
(SELECT user_id FROM raw_events_second where user_id = 17)) as foo;
|
(SELECT user_id FROM raw_events_second where user_id = 17)) as foo;
|
||||||
ERROR: set operations are not allowed in INSERT ... SELECT queries
|
ERROR: Set operations are not allowed in INSERT ... SELECT queries
|
||||||
-- some supported LEFT joins
|
-- some supported LEFT joins
|
||||||
INSERT INTO agg_events (user_id)
|
INSERT INTO agg_events (user_id)
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -1406,35 +1413,8 @@ DEBUG: Plan is router executable
|
||||||
raw_events_second
|
raw_events_second
|
||||||
WHERE raw_events_second.user_id = raw_events_first.value_1
|
WHERE raw_events_second.user_id = raw_events_first.value_1
|
||||||
AND raw_events_first.value_1 = 12;
|
AND raw_events_first.value_1 = 12;
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
|
||||||
DEBUG: predicate pruning for shardId 13300004
|
|
||||||
DEBUG: predicate pruning for shardId 13300005
|
|
||||||
DEBUG: predicate pruning for shardId 13300006
|
|
||||||
DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM public.raw_events_first_13300000 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (((raw_events_second.user_id = raw_events_first.value_1) AND (raw_events_first.value_1 = 12)) AND ((hashint4(raw_events_first.user_id) >= '-2147483648'::integer) AND (hashint4(raw_events_first.user_id) <= '-1073741825'::integer)))
|
|
||||||
DEBUG: predicate pruning for shardId 13300000
|
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
|
||||||
DEBUG: predicate pruning for shardId 13300004
|
|
||||||
DEBUG: predicate pruning for shardId 13300005
|
|
||||||
DEBUG: predicate pruning for shardId 13300006
|
|
||||||
DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM public.raw_events_first_13300001 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (((raw_events_second.user_id = raw_events_first.value_1) AND (raw_events_first.value_1 = 12)) AND ((hashint4(raw_events_first.user_id) >= '-1073741824'::integer) AND (hashint4(raw_events_first.user_id) <= '-1'::integer)))
|
|
||||||
DEBUG: predicate pruning for shardId 13300000
|
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
|
||||||
DEBUG: predicate pruning for shardId 13300004
|
|
||||||
DEBUG: predicate pruning for shardId 13300005
|
|
||||||
DEBUG: predicate pruning for shardId 13300006
|
|
||||||
DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM public.raw_events_first_13300002 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (((raw_events_second.user_id = raw_events_first.value_1) AND (raw_events_first.value_1 = 12)) AND ((hashint4(raw_events_first.user_id) >= 0) AND (hashint4(raw_events_first.user_id) <= 1073741823)))
|
|
||||||
DEBUG: predicate pruning for shardId 13300000
|
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
|
||||||
DEBUG: predicate pruning for shardId 13300004
|
|
||||||
DEBUG: predicate pruning for shardId 13300005
|
|
||||||
DEBUG: predicate pruning for shardId 13300006
|
|
||||||
DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM public.raw_events_first_13300003 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (((raw_events_second.user_id = raw_events_first.value_1) AND (raw_events_first.value_1 = 12)) AND ((hashint4(raw_events_first.user_id) >= 1073741824) AND (hashint4(raw_events_first.user_id) <= 2147483647)))
|
|
||||||
DEBUG: Plan is router executable
|
|
||||||
|
|
||||||
-- some unsupported LEFT/INNER JOINs
|
-- some unsupported LEFT/INNER JOINs
|
||||||
-- JOIN on one table with partition column other is not
|
-- JOIN on one table with partition column other is not
|
||||||
|
@ -1443,9 +1423,6 @@ DEBUG: Plan is router executable
|
||||||
raw_events_first.user_id
|
raw_events_first.user_id
|
||||||
FROM
|
FROM
|
||||||
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1;
|
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1;
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
|
||||||
|
@ -1455,9 +1432,6 @@ DETAIL: Select query cannot be pushed down to the worker.
|
||||||
raw_events_first.user_id
|
raw_events_first.user_id
|
||||||
FROM
|
FROM
|
||||||
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1;
|
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1;
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
|
||||||
|
@ -1477,9 +1451,6 @@ DETAIL: Select query cannot be pushed down to the worker.
|
||||||
raw_events_first.user_id
|
raw_events_first.user_id
|
||||||
FROM
|
FROM
|
||||||
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
|
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
|
||||||
|
@ -1489,9 +1460,6 @@ DETAIL: Select query cannot be pushed down to the worker.
|
||||||
raw_events_first.user_id
|
raw_events_first.user_id
|
||||||
FROM
|
FROM
|
||||||
raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
|
raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
|
||||||
|
@ -1530,9 +1498,6 @@ DETAIL: Select query cannot be pushed down to the worker.
|
||||||
FROM
|
FROM
|
||||||
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1
|
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1
|
||||||
WHERE raw_events_first.value_1 IN (10, 11,12) OR raw_events_second.user_id IN (1,2,3,4);
|
WHERE raw_events_first.value_1 IN (10, 11,12) OR raw_events_second.user_id IN (1,2,3,4);
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
|
||||||
|
@ -1543,9 +1508,6 @@ DETAIL: Select query cannot be pushed down to the worker.
|
||||||
FROM raw_events_first,
|
FROM raw_events_first,
|
||||||
raw_events_second
|
raw_events_second
|
||||||
WHERE raw_events_second.user_id = raw_events_first.value_1;
|
WHERE raw_events_second.user_id = raw_events_first.value_1;
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
|
||||||
|
@ -1559,9 +1521,6 @@ DETAIL: Select query cannot be pushed down to the worker.
|
||||||
raw_events_second
|
raw_events_second
|
||||||
WHERE raw_events_second.user_id = raw_events_first.value_1
|
WHERE raw_events_second.user_id = raw_events_first.value_1
|
||||||
AND raw_events_first.value_2 = 12;
|
AND raw_events_first.value_2 = 12;
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
|
||||||
|
@ -1596,12 +1555,6 @@ DETAIL: Select query cannot be pushed down to the worker.
|
||||||
ON (f.id = f2.id)) as outer_most
|
ON (f.id = f2.id)) as outer_most
|
||||||
GROUP BY
|
GROUP BY
|
||||||
outer_most.id;
|
outer_most.id;
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
|
||||||
DEBUG: predicate pruning for shardId 13300005
|
|
||||||
DEBUG: predicate pruning for shardId 13300006
|
|
||||||
DEBUG: predicate pruning for shardId 13300007
|
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
INSERT INTO agg_events
|
INSERT INTO agg_events
|
||||||
|
@ -1793,12 +1746,6 @@ outer_most.id, max(outer_most.value)
|
||||||
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||||
ON (f.id != f2.id)) as outer_most
|
ON (f.id != f2.id)) as outer_most
|
||||||
GROUP BY outer_most.id;
|
GROUP BY outer_most.id;
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
|
||||||
DEBUG: predicate pruning for shardId 13300005
|
|
||||||
DEBUG: predicate pruning for shardId 13300006
|
|
||||||
DEBUG: predicate pruning for shardId 13300007
|
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
-- cannot pushdown since foo2 is not join on partition key
|
-- cannot pushdown since foo2 is not join on partition key
|
||||||
|
@ -1861,12 +1808,6 @@ FROM
|
||||||
ON (f.id = f2.id)) as outer_most
|
ON (f.id = f2.id)) as outer_most
|
||||||
GROUP BY
|
GROUP BY
|
||||||
outer_most.id;
|
outer_most.id;
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
|
||||||
DEBUG: predicate pruning for shardId 13300005
|
|
||||||
DEBUG: predicate pruning for shardId 13300006
|
|
||||||
DEBUG: predicate pruning for shardId 13300007
|
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
-- some unsupported LATERAL JOINs
|
-- some unsupported LATERAL JOINs
|
||||||
|
@ -1925,12 +1866,6 @@ FROM
|
||||||
JOIN LATERAL
|
JOIN LATERAL
|
||||||
(SELECT user_id, value_4 FROM agg_events) as agg_ids ON (agg_ids.value_4 = averages.user_id)
|
(SELECT user_id, value_4 FROM agg_events) as agg_ids ON (agg_ids.value_4 = averages.user_id)
|
||||||
GROUP BY averages.user_id;
|
GROUP BY averages.user_id;
|
||||||
DEBUG: predicate pruning for shardId 13300005
|
|
||||||
DEBUG: predicate pruning for shardId 13300006
|
|
||||||
DEBUG: predicate pruning for shardId 13300007
|
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
-- not supported subqueries in WHERE clause
|
-- not supported subqueries in WHERE clause
|
||||||
|
@ -1942,9 +1877,6 @@ SELECT user_id
|
||||||
FROM raw_events_first
|
FROM raw_events_first
|
||||||
WHERE user_id IN (SELECT value_1
|
WHERE user_id IN (SELECT value_1
|
||||||
FROM raw_events_second);
|
FROM raw_events_second);
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
-- same as above but slightly more complex
|
-- same as above but slightly more complex
|
||||||
|
@ -1973,6 +1905,17 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
ON (f.id = f2.id)
|
ON (f.id = f2.id)
|
||||||
WHERE f.id IN (SELECT value_1
|
WHERE f.id IN (SELECT value_1
|
||||||
FROM raw_events_second);
|
FROM raw_events_second);
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
-- some more semi-anti join tests
|
||||||
|
-- join in where
|
||||||
|
INSERT INTO raw_events_second
|
||||||
|
(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM raw_events_first
|
||||||
|
WHERE user_id IN (SELECT raw_events_second.user_id
|
||||||
|
FROM raw_events_second, raw_events_first
|
||||||
|
WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200);
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
@ -1982,6 +1925,389 @@ DEBUG: predicate pruning for shardId 13300007
|
||||||
DEBUG: predicate pruning for shardId 13300001
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
DEBUG: predicate pruning for shardId 13300002
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
DEBUG: predicate pruning for shardId 13300003
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300000 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300004 raw_events_second, public.raw_events_first_13300000 raw_events_first_1 WHERE ((raw_events_second.user_id = raw_events_first_1.user_id) AND (raw_events_first_1.user_id = 200)))) AND ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)))
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away
|
||||||
|
DEBUG: Plan is router executable
|
||||||
|
-- we cannot push this down since it is NOT IN
|
||||||
|
INSERT INTO raw_events_second
|
||||||
|
(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM raw_events_first
|
||||||
|
WHERE user_id NOT IN (SELECT raw_events_second.user_id
|
||||||
|
FROM raw_events_second, raw_events_first
|
||||||
|
WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200);
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
-- safe to push down
|
||||||
|
INSERT INTO raw_events_second
|
||||||
|
(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM raw_events_first
|
||||||
|
WHERE EXISTS (SELECT 1
|
||||||
|
FROM raw_events_second
|
||||||
|
WHERE raw_events_second.user_id =raw_events_first.user_id);
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300000 raw_events_first WHERE ((EXISTS (SELECT 1 FROM public.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id = raw_events_first.user_id))) AND ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)))
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300001 raw_events_first WHERE ((EXISTS (SELECT 1 FROM public.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id = raw_events_first.user_id))) AND ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer)))
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300002 raw_events_first WHERE ((EXISTS (SELECT 1 FROM public.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id = raw_events_first.user_id))) AND ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823)))
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((EXISTS (SELECT 1 FROM public.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id = raw_events_first.user_id))) AND ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)))
|
||||||
|
DEBUG: Plan is router executable
|
||||||
|
-- we cannot push down
|
||||||
|
INSERT INTO raw_events_second
|
||||||
|
(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM raw_events_first
|
||||||
|
WHERE NOT EXISTS (SELECT 1
|
||||||
|
FROM raw_events_second
|
||||||
|
WHERE raw_events_second.user_id =raw_events_first.user_id);
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300000 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM public.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id = raw_events_first.user_id)))) AND ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)))
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300001 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM public.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id = raw_events_first.user_id)))) AND ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer)))
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300002 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM public.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id = raw_events_first.user_id)))) AND ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823)))
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM public.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id = raw_events_first.user_id)))) AND ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)))
|
||||||
|
DEBUG: Plan is router executable
|
||||||
|
-- more complex LEFT JOINs
|
||||||
|
INSERT INTO agg_events
|
||||||
|
(user_id, value_4_agg)
|
||||||
|
SELECT
|
||||||
|
outer_most.id, max(outer_most.value)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT f2.id as id, f2.v4 as value FROM
|
||||||
|
(SELECT
|
||||||
|
id
|
||||||
|
FROM (SELECT raw_events_first.user_id AS id
|
||||||
|
FROM raw_events_first LEFT JOIN
|
||||||
|
reference_table
|
||||||
|
ON (raw_events_first.user_id = reference_table.user_id)) AS foo) as f
|
||||||
|
LEFT JOIN
|
||||||
|
(SELECT v4,
|
||||||
|
v1,
|
||||||
|
id
|
||||||
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
|
SUM(raw_events_first.value_1) AS v1,
|
||||||
|
raw_events_second.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
raw_events_second
|
||||||
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||||
|
GROUP BY raw_events_second.user_id
|
||||||
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||||
|
ON (f.id = f2.id)) as outer_most
|
||||||
|
GROUP BY
|
||||||
|
outer_most.id;
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id, value_4_agg) SELECT id, max(value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (public.raw_events_first_13300000 raw_events_first LEFT JOIN public.reference_table_13300012 reference_table ON ((raw_events_first.user_id = reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300000 raw_events_first, public.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id)))) outer_most WHERE ((hashint4(id) >= '-2147483648'::integer) AND (hashint4(id) <= '-1073741825'::integer)) GROUP BY id
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id, value_4_agg) SELECT id, max(value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (public.raw_events_first_13300001 raw_events_first LEFT JOIN public.reference_table_13300012 reference_table ON ((raw_events_first.user_id = reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300001 raw_events_first, public.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id)))) outer_most WHERE ((hashint4(id) >= '-1073741824'::integer) AND (hashint4(id) <= '-1'::integer)) GROUP BY id
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id, value_4_agg) SELECT id, max(value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (public.raw_events_first_13300002 raw_events_first LEFT JOIN public.reference_table_13300012 reference_table ON ((raw_events_first.user_id = reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300002 raw_events_first, public.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id)))) outer_most WHERE ((hashint4(id) >= 0) AND (hashint4(id) <= 1073741823)) GROUP BY id
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id, value_4_agg) SELECT id, max(value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (public.raw_events_first_13300003 raw_events_first LEFT JOIN public.reference_table_13300012 reference_table ON ((raw_events_first.user_id = reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300003 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id)))) outer_most WHERE ((hashint4(id) >= 1073741824) AND (hashint4(id) <= 2147483647)) GROUP BY id
|
||||||
|
DEBUG: Plan is router executable
|
||||||
|
-- cannot push down since the f.id IN is matched with value_1
|
||||||
|
INSERT INTO raw_events_second
|
||||||
|
(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM raw_events_first
|
||||||
|
WHERE user_id IN (
|
||||||
|
SELECT f2.id FROM
|
||||||
|
(SELECT
|
||||||
|
id
|
||||||
|
FROM (SELECT reference_table.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
reference_table
|
||||||
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT v4,
|
||||||
|
v1,
|
||||||
|
id
|
||||||
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
|
SUM(raw_events_first.value_1) AS v1,
|
||||||
|
raw_events_second.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
raw_events_second
|
||||||
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||||
|
GROUP BY raw_events_second.user_id
|
||||||
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||||
|
ON (f.id = f2.id)
|
||||||
|
WHERE f.id IN (SELECT value_1
|
||||||
|
FROM raw_events_second));
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
-- same as above, but this time is it safe to push down since
|
||||||
|
-- f.id IN is matched with user_id
|
||||||
|
INSERT INTO raw_events_second
|
||||||
|
(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM raw_events_first
|
||||||
|
WHERE user_id IN (
|
||||||
|
SELECT f2.id FROM
|
||||||
|
(SELECT
|
||||||
|
id
|
||||||
|
FROM (SELECT reference_table.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
reference_table
|
||||||
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT v4,
|
||||||
|
v1,
|
||||||
|
id
|
||||||
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
|
SUM(raw_events_first.value_1) AS v1,
|
||||||
|
raw_events_second.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
raw_events_second
|
||||||
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||||
|
GROUP BY raw_events_second.user_id
|
||||||
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||||
|
ON (f.id = f2.id)
|
||||||
|
WHERE f.id IN (SELECT user_id
|
||||||
|
FROM raw_events_second));
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300000 raw_events_first WHERE ((user_id IN (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300000 raw_events_first_1, public.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300000 raw_events_first_1, public.raw_events_second_13300004 raw_events_second WHERE (raw_events_first_1.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id))) WHERE (f.id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300004 raw_events_second)))) AND ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)))
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300001 raw_events_first WHERE ((user_id IN (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300001 raw_events_first_1, public.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300001 raw_events_first_1, public.raw_events_second_13300005 raw_events_second WHERE (raw_events_first_1.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id))) WHERE (f.id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300005 raw_events_second)))) AND ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer)))
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300003
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300007
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300002 raw_events_first WHERE ((user_id IN (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300002 raw_events_first_1, public.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300002 raw_events_first_1, public.raw_events_second_13300006 raw_events_second WHERE (raw_events_first_1.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id))) WHERE (f.id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300006 raw_events_second)))) AND ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823)))
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300000
|
||||||
|
DEBUG: predicate pruning for shardId 13300001
|
||||||
|
DEBUG: predicate pruning for shardId 13300002
|
||||||
|
DEBUG: predicate pruning for shardId 13300004
|
||||||
|
DEBUG: predicate pruning for shardId 13300005
|
||||||
|
DEBUG: predicate pruning for shardId 13300006
|
||||||
|
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((user_id IN (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300003 raw_events_first_1, public.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300003 raw_events_first_1, public.raw_events_second_13300007 raw_events_second WHERE (raw_events_first_1.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id))) WHERE (f.id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300007 raw_events_second)))) AND ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)))
|
||||||
|
DEBUG: Plan is router executable
|
||||||
|
-- cannot push down since top level user_id is matched with NOT IN
|
||||||
|
INSERT INTO raw_events_second
|
||||||
|
(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM raw_events_first
|
||||||
|
WHERE user_id NOT IN (
|
||||||
|
SELECT f2.id FROM
|
||||||
|
(SELECT
|
||||||
|
id
|
||||||
|
FROM (SELECT reference_table.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
reference_table
|
||||||
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT v4,
|
||||||
|
v1,
|
||||||
|
id
|
||||||
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
|
SUM(raw_events_first.value_1) AS v1,
|
||||||
|
raw_events_second.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
raw_events_second
|
||||||
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||||
|
GROUP BY raw_events_second.user_id
|
||||||
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||||
|
ON (f.id = f2.id)
|
||||||
|
WHERE f.id IN (SELECT user_id
|
||||||
|
FROM raw_events_second));
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
-- cannot push down since join is not equi join (f.id > f2.id)
|
||||||
|
INSERT INTO raw_events_second
|
||||||
|
(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM raw_events_first
|
||||||
|
WHERE user_id IN (
|
||||||
|
SELECT f2.id FROM
|
||||||
|
(SELECT
|
||||||
|
id
|
||||||
|
FROM (SELECT reference_table.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
reference_table
|
||||||
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT v4,
|
||||||
|
v1,
|
||||||
|
id
|
||||||
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
|
SUM(raw_events_first.value_1) AS v1,
|
||||||
|
raw_events_second.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
raw_events_second
|
||||||
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||||
|
GROUP BY raw_events_second.user_id
|
||||||
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||||
|
ON (f.id > f2.id)
|
||||||
|
WHERE f.id IN (SELECT user_id
|
||||||
|
FROM raw_events_second));
|
||||||
ERROR: cannot perform distributed planning for the given modification
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
DETAIL: Select query cannot be pushed down to the worker.
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
-- we currently not support grouping sets
|
-- we currently not support grouping sets
|
||||||
|
@ -2054,7 +2380,7 @@ INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4)
|
||||||
SELECT count(*) FROM raw_events_second;
|
SELECT count(*) FROM raw_events_second;
|
||||||
count
|
count
|
||||||
-------
|
-------
|
||||||
9
|
18
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
INSERT INTO raw_events_second SELECT * FROM test_view;
|
INSERT INTO raw_events_second SELECT * FROM test_view;
|
||||||
|
@ -2064,7 +2390,7 @@ INSERT INTO raw_events_second SELECT * FROM test_view WHERE user_id = 17 GROUP B
|
||||||
SELECT count(*) FROM raw_events_second;
|
SELECT count(*) FROM raw_events_second;
|
||||||
count
|
count
|
||||||
-------
|
-------
|
||||||
11
|
20
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
-- inserting into views does not
|
-- inserting into views does not
|
||||||
|
@ -2210,6 +2536,32 @@ DEBUG: predicate pruning for shardId 13300007
|
||||||
DEBUG: Skipping target shard interval 13300003 since SELECT query for it pruned away
|
DEBUG: Skipping target shard interval 13300003 since SELECT query for it pruned away
|
||||||
DEBUG: Plan is router executable
|
DEBUG: Plan is router executable
|
||||||
SET client_min_messages TO INFO;
|
SET client_min_messages TO INFO;
|
||||||
|
-- now do some tests with varchars
|
||||||
|
INSERT INTO insert_select_varchar_test VALUES ('test_1', 10);
|
||||||
|
INSERT INTO insert_select_varchar_test VALUES ('test_2', 30);
|
||||||
|
INSERT INTO insert_select_varchar_test (key, value)
|
||||||
|
SELECT *, 100
|
||||||
|
FROM (SELECT f1.key
|
||||||
|
FROM (SELECT key
|
||||||
|
FROM insert_select_varchar_test
|
||||||
|
GROUP BY 1
|
||||||
|
HAVING Count(key) < 3) AS f1,
|
||||||
|
(SELECT key
|
||||||
|
FROM insert_select_varchar_test
|
||||||
|
GROUP BY 1
|
||||||
|
HAVING Sum(COALESCE(insert_select_varchar_test.value, 0)) >
|
||||||
|
20.0)
|
||||||
|
AS f2
|
||||||
|
WHERE f1.key = f2.key
|
||||||
|
GROUP BY 1) AS foo;
|
||||||
|
SELECT * FROM insert_select_varchar_test;
|
||||||
|
key | value
|
||||||
|
--------+-------
|
||||||
|
test_2 | 30
|
||||||
|
test_2 | 100
|
||||||
|
test_1 | 10
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
-- some tests with DEFAULT columns and constant values
|
-- some tests with DEFAULT columns and constant values
|
||||||
-- this test is mostly importantly intended for deparsing the query correctly
|
-- this test is mostly importantly intended for deparsing the query correctly
|
||||||
-- but still it is preferable to have this test here instead of multi_deparse_shard_query
|
-- but still it is preferable to have this test here instead of multi_deparse_shard_query
|
||||||
|
@ -2233,10 +2585,10 @@ SELECT create_distributed_table('table_with_defaults', 'store_id');
|
||||||
SET client_min_messages TO DEBUG2;
|
SET client_min_messages TO DEBUG2;
|
||||||
-- a very simple query
|
-- a very simple query
|
||||||
INSERT INTO table_with_defaults SELECT * FROM table_with_defaults;
|
INSERT INTO table_with_defaults SELECT * FROM table_with_defaults;
|
||||||
DEBUG: predicate pruning for shardId 13300014
|
DEBUG: predicate pruning for shardId 13300018
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, first_name, default_1, last_name, default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, first_name, default_1, last_name, default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||||
DEBUG: predicate pruning for shardId 13300013
|
DEBUG: predicate pruning for shardId 13300017
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, first_name, default_1, last_name, default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, first_name, default_1, last_name, default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||||
DEBUG: Plan is router executable
|
DEBUG: Plan is router executable
|
||||||
-- see that defaults are filled
|
-- see that defaults are filled
|
||||||
INSERT INTO table_with_defaults (store_id, first_name)
|
INSERT INTO table_with_defaults (store_id, first_name)
|
||||||
|
@ -2244,10 +2596,10 @@ SELECT
|
||||||
store_id, first_name
|
store_id, first_name
|
||||||
FROM
|
FROM
|
||||||
table_with_defaults;
|
table_with_defaults;
|
||||||
DEBUG: predicate pruning for shardId 13300014
|
DEBUG: predicate pruning for shardId 13300018
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, 1 AS default_1, '2'::text AS default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, 1 AS default_1, '2'::text AS default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||||
DEBUG: predicate pruning for shardId 13300013
|
DEBUG: predicate pruning for shardId 13300017
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, 1 AS default_1, '2'::text AS default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, 1 AS default_1, '2'::text AS default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||||
DEBUG: Plan is router executable
|
DEBUG: Plan is router executable
|
||||||
-- shuffle one of the defaults and skip the other
|
-- shuffle one of the defaults and skip the other
|
||||||
INSERT INTO table_with_defaults (default_2, store_id, first_name)
|
INSERT INTO table_with_defaults (default_2, store_id, first_name)
|
||||||
|
@ -2255,10 +2607,10 @@ SELECT
|
||||||
default_2, store_id, first_name
|
default_2, store_id, first_name
|
||||||
FROM
|
FROM
|
||||||
table_with_defaults;
|
table_with_defaults;
|
||||||
DEBUG: predicate pruning for shardId 13300014
|
DEBUG: predicate pruning for shardId 13300018
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, 1 AS default_1, default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, 1 AS default_1, default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||||
DEBUG: predicate pruning for shardId 13300013
|
DEBUG: predicate pruning for shardId 13300017
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, 1 AS default_1, default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, 1 AS default_1, default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||||
DEBUG: Plan is router executable
|
DEBUG: Plan is router executable
|
||||||
-- shuffle both defaults
|
-- shuffle both defaults
|
||||||
INSERT INTO table_with_defaults (default_2, store_id, default_1, first_name)
|
INSERT INTO table_with_defaults (default_2, store_id, default_1, first_name)
|
||||||
|
@ -2266,10 +2618,10 @@ SELECT
|
||||||
default_2, store_id, default_1, first_name
|
default_2, store_id, default_1, first_name
|
||||||
FROM
|
FROM
|
||||||
table_with_defaults;
|
table_with_defaults;
|
||||||
DEBUG: predicate pruning for shardId 13300014
|
DEBUG: predicate pruning for shardId 13300018
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, default_1, default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, default_1, default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||||
DEBUG: predicate pruning for shardId 13300013
|
DEBUG: predicate pruning for shardId 13300017
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, default_1, default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, default_1, default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||||
DEBUG: Plan is router executable
|
DEBUG: Plan is router executable
|
||||||
-- use constants instead of non-default column
|
-- use constants instead of non-default column
|
||||||
INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name)
|
INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name)
|
||||||
|
@ -2277,10 +2629,10 @@ SELECT
|
||||||
default_2, 'Freund', store_id, 'Andres'
|
default_2, 'Freund', store_id, 'Andres'
|
||||||
FROM
|
FROM
|
||||||
table_with_defaults;
|
table_with_defaults;
|
||||||
DEBUG: predicate pruning for shardId 13300014
|
DEBUG: predicate pruning for shardId 13300018
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||||
DEBUG: predicate pruning for shardId 13300013
|
DEBUG: predicate pruning for shardId 13300017
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||||
DEBUG: Plan is router executable
|
DEBUG: Plan is router executable
|
||||||
-- use constants instead of non-default column and skip both defauls
|
-- use constants instead of non-default column and skip both defauls
|
||||||
INSERT INTO table_with_defaults (last_name, store_id, first_name)
|
INSERT INTO table_with_defaults (last_name, store_id, first_name)
|
||||||
|
@ -2288,10 +2640,10 @@ SELECT
|
||||||
'Freund', store_id, 'Andres'
|
'Freund', store_id, 'Andres'
|
||||||
FROM
|
FROM
|
||||||
table_with_defaults;
|
table_with_defaults;
|
||||||
DEBUG: predicate pruning for shardId 13300014
|
DEBUG: predicate pruning for shardId 13300018
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||||
DEBUG: predicate pruning for shardId 13300013
|
DEBUG: predicate pruning for shardId 13300017
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||||
DEBUG: Plan is router executable
|
DEBUG: Plan is router executable
|
||||||
-- use constants instead of default columns
|
-- use constants instead of default columns
|
||||||
INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1)
|
INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1)
|
||||||
|
@ -2299,10 +2651,10 @@ SELECT
|
||||||
20, last_name, store_id, first_name, 10
|
20, last_name, store_id, first_name, 10
|
||||||
FROM
|
FROM
|
||||||
table_with_defaults;
|
table_with_defaults;
|
||||||
DEBUG: predicate pruning for shardId 13300014
|
DEBUG: predicate pruning for shardId 13300018
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, first_name, 10, last_name, 20 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, first_name, 10, last_name, 20 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||||
DEBUG: predicate pruning for shardId 13300013
|
DEBUG: predicate pruning for shardId 13300017
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, first_name, 10, last_name, 20 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, first_name, 10, last_name, 20 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||||
DEBUG: Plan is router executable
|
DEBUG: Plan is router executable
|
||||||
-- use constants instead of both default columns and non-default columns
|
-- use constants instead of both default columns and non-default columns
|
||||||
INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1)
|
INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1)
|
||||||
|
@ -2310,10 +2662,10 @@ SELECT
|
||||||
20, 'Freund', store_id, 'Andres', 10
|
20, 'Freund', store_id, 'Andres', 10
|
||||||
FROM
|
FROM
|
||||||
table_with_defaults;
|
table_with_defaults;
|
||||||
DEBUG: predicate pruning for shardId 13300014
|
DEBUG: predicate pruning for shardId 13300018
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||||
DEBUG: predicate pruning for shardId 13300013
|
DEBUG: predicate pruning for shardId 13300017
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||||
DEBUG: Plan is router executable
|
DEBUG: Plan is router executable
|
||||||
-- some of the the ultimate queries where we have constants,
|
-- some of the the ultimate queries where we have constants,
|
||||||
-- defaults and group by entry is not on the target entry
|
-- defaults and group by entry is not on the target entry
|
||||||
|
@ -2324,10 +2676,10 @@ FROM
|
||||||
table_with_defaults
|
table_with_defaults
|
||||||
GROUP BY
|
GROUP BY
|
||||||
last_name, store_id;
|
last_name, store_id;
|
||||||
DEBUG: predicate pruning for shardId 13300014
|
DEBUG: predicate pruning for shardId 13300018
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer)) GROUP BY last_name, store_id
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer)) GROUP BY last_name, store_id
|
||||||
DEBUG: predicate pruning for shardId 13300013
|
DEBUG: predicate pruning for shardId 13300017
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647)) GROUP BY last_name, store_id
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647)) GROUP BY last_name, store_id
|
||||||
DEBUG: Plan is router executable
|
DEBUG: Plan is router executable
|
||||||
INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2)
|
INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2)
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -2336,10 +2688,10 @@ FROM
|
||||||
table_with_defaults
|
table_with_defaults
|
||||||
GROUP BY
|
GROUP BY
|
||||||
last_name, store_id, first_name;
|
last_name, store_id, first_name;
|
||||||
DEBUG: predicate pruning for shardId 13300014
|
DEBUG: predicate pruning for shardId 13300018
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer)) GROUP BY last_name, store_id, first_name
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer)) GROUP BY last_name, store_id, first_name
|
||||||
DEBUG: predicate pruning for shardId 13300013
|
DEBUG: predicate pruning for shardId 13300017
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647)) GROUP BY last_name, store_id, first_name
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647)) GROUP BY last_name, store_id, first_name
|
||||||
DEBUG: Plan is router executable
|
DEBUG: Plan is router executable
|
||||||
INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2)
|
INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2)
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -2348,10 +2700,10 @@ FROM
|
||||||
table_with_defaults
|
table_with_defaults
|
||||||
GROUP BY
|
GROUP BY
|
||||||
last_name, store_id, first_name, default_2;
|
last_name, store_id, first_name, default_2;
|
||||||
DEBUG: predicate pruning for shardId 13300014
|
DEBUG: predicate pruning for shardId 13300018
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer)) GROUP BY last_name, store_id, first_name, default_2
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer)) GROUP BY last_name, store_id, first_name, default_2
|
||||||
DEBUG: predicate pruning for shardId 13300013
|
DEBUG: predicate pruning for shardId 13300017
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647)) GROUP BY last_name, store_id, first_name, default_2
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647)) GROUP BY last_name, store_id, first_name, default_2
|
||||||
DEBUG: Plan is router executable
|
DEBUG: Plan is router executable
|
||||||
INSERT INTO table_with_defaults (default_1, store_id, first_name)
|
INSERT INTO table_with_defaults (default_1, store_id, first_name)
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -2360,10 +2712,10 @@ FROM
|
||||||
table_with_defaults
|
table_with_defaults
|
||||||
GROUP BY
|
GROUP BY
|
||||||
last_name, store_id, first_name, default_2;
|
last_name, store_id, first_name, default_2;
|
||||||
DEBUG: predicate pruning for shardId 13300014
|
DEBUG: predicate pruning for shardId 13300018
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer)) GROUP BY last_name, store_id, first_name, default_2
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer)) GROUP BY last_name, store_id, first_name, default_2
|
||||||
DEBUG: predicate pruning for shardId 13300013
|
DEBUG: predicate pruning for shardId 13300017
|
||||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647)) GROUP BY last_name, store_id, first_name, default_2
|
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647)) GROUP BY last_name, store_id, first_name, default_2
|
||||||
DEBUG: Plan is router executable
|
DEBUG: Plan is router executable
|
||||||
RESET client_min_messages;
|
RESET client_min_messages;
|
||||||
-- Stable function in default should be allowed
|
-- Stable function in default should be allowed
|
||||||
|
|
|
@ -0,0 +1,449 @@
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Vanilla funnel query
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg)
|
||||||
|
SELECT user_id, array_length(events_table, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||||
|
FROM (
|
||||||
|
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
) t
|
||||||
|
GROUP BY user_id
|
||||||
|
) q;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
5 | 5 | 15.6000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Funnel grouped by whether or not a user has done an event
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg )
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (103, 104, 105)
|
||||||
|
)
|
||||||
|
) t1 LEFT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type IN (106, 107, 108)
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event;
|
||||||
|
ERROR: Set operations are not allowed in INSERT ... SELECT queries
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Funnel, grouped by the number of times a user has done an event
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
avg(array_length(events_table, 1)) AS event_average,
|
||||||
|
count_pay
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
subquery_1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(count_pay, 0) AS count_pay
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id,
|
||||||
|
'action=>1'AS event,
|
||||||
|
events_table.time
|
||||||
|
FROM
|
||||||
|
users_table,
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id AND
|
||||||
|
users_table.user_id >= 10 AND
|
||||||
|
users_table.user_id <= 70 AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id,
|
||||||
|
'action=>2'AS event,
|
||||||
|
events_table.time
|
||||||
|
FROM
|
||||||
|
users_table,
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id AND
|
||||||
|
users_table.user_id >= 10 AND
|
||||||
|
users_table.user_id <= 70 AND
|
||||||
|
events_table.event_type > 12 AND events_table.event_type < 14
|
||||||
|
)
|
||||||
|
) AS subquery_1
|
||||||
|
LEFT JOIN
|
||||||
|
(SELECT
|
||||||
|
user_id,
|
||||||
|
COUNT(*) AS count_pay
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
users_table.value_1 > 15 AND users_table.value_1 < 17
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
HAVING
|
||||||
|
COUNT(*) > 1) AS subquery_2
|
||||||
|
ON
|
||||||
|
subquery_1.user_id = subquery_2.user_id
|
||||||
|
GROUP BY
|
||||||
|
subquery_1.user_id,
|
||||||
|
count_pay) AS subquery_top
|
||||||
|
WHERE
|
||||||
|
array_ndims(events_table) > 0
|
||||||
|
GROUP BY
|
||||||
|
count_pay, user_id
|
||||||
|
ORDER BY
|
||||||
|
count_pay;
|
||||||
|
ERROR: Set operations are not allowed in INSERT ... SELECT queries
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Most recently seen users_table events_table
|
||||||
|
------------------------------------
|
||||||
|
-- Note that we don't use ORDER BY/LIMIT yet
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
user_lastseen,
|
||||||
|
array_length(event_array, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
max(u.time) as user_lastseen,
|
||||||
|
array_agg(event_type ORDER BY u.time) AS event_array
|
||||||
|
FROM (
|
||||||
|
|
||||||
|
SELECT user_id, time
|
||||||
|
FROM users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||||
|
) u LEFT JOIN LATERAL (
|
||||||
|
SELECT event_type, time
|
||||||
|
FROM events_table
|
||||||
|
WHERE user_id = u.user_id AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
) t ON true
|
||||||
|
GROUP BY user_id
|
||||||
|
) AS shard_union
|
||||||
|
ORDER BY user_lastseen DESC;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
6 | 6 | 42.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Count the number of distinct users_table who are in segment X and Y and Z
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results (user_id)
|
||||||
|
SELECT DISTINCT user_id
|
||||||
|
FROM users_table
|
||||||
|
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
33 | 33 | 50.3939393939393939
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Count the number of distinct users_table who are in at least two of X and Y and Z segments
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM users_table
|
||||||
|
WHERE (value_1 = 10
|
||||||
|
OR value_1 = 11
|
||||||
|
OR value_1 = 12)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING count(distinct value_1) >= 2;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
4 | 4 | 51.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find customers who have done X, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 101 AND value_1 < 110
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
34 | 27 | 40.5588235294117647
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who haven’t done X, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 = 101
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
8 | 7 | 39.7500000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X and Y, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 100
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type!=100 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
1202 | 14 | 47.7462562396006656
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||||
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
205 | 2 | 55.2195121951219512
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT user_id,
|
||||||
|
value_2
|
||||||
|
FROM users_table
|
||||||
|
WHERE value_1 > 100
|
||||||
|
AND value_1 < 124
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type > 100
|
||||||
|
AND event_type < 124
|
||||||
|
AND value_3 > 100
|
||||||
|
AND user_id = users_table.user_id
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING Count(*) > 2);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
78 | 34 | 52.4230769230769231
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find me all users_table who logged in more than once
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT user_id, value_1 from
|
||||||
|
(
|
||||||
|
SELECT user_id, value_1 From users_table
|
||||||
|
WHERE value_2 > 100 and user_id = 15 GROUP BY value_1, user_id HAVING count(*) > 1
|
||||||
|
) as a;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
6 | 1 | 15.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find me all users_table who has done some event and has filters
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id)
|
||||||
|
Select user_id
|
||||||
|
From events_table
|
||||||
|
Where event_type = 16
|
||||||
|
And value_2 > 50
|
||||||
|
And user_id in
|
||||||
|
(select user_id
|
||||||
|
From users_table
|
||||||
|
Where value_1 = 15
|
||||||
|
And value_2 > 25);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
2 | 2 | 30.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Which events_table did people who has done some specific events_table
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT user_id, event_type FROM events_table
|
||||||
|
WHERE user_id in (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
|
||||||
|
GROUP BY user_id, event_type;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
3084 | 32 | 44.1498054474708171
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find me all the users_table who has done some event more than three times
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id)
|
||||||
|
select user_id from
|
||||||
|
(
|
||||||
|
select
|
||||||
|
user_id
|
||||||
|
from
|
||||||
|
events_table
|
||||||
|
where event_type = 901 group by user_id having count(*) > 3
|
||||||
|
) as a;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
1 | 1 | 57.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find my assets that have the highest probability and fetch their metadata
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg, value_3_agg)
|
||||||
|
SELECT
|
||||||
|
users_table.user_id, users_table.value_1, prob
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||||||
|
FROM
|
||||||
|
users_table AS ma, events_table as short_list
|
||||||
|
WHERE
|
||||||
|
short_list.user_id = ma.user_id and ma.value_1 < 50 and short_list.event_type < 50
|
||||||
|
) temp
|
||||||
|
ON users_table.user_id = temp.user_id
|
||||||
|
WHERE users_table.value_1 < 50;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
14371 | 101 | 50.5232064574490293
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,420 @@
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Vanilla funnel query -- single shard
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
INSERT INTO agg_results_second (user_id, value_1_agg)
|
||||||
|
SELECT user_id, array_length(events_table, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||||
|
FROM (
|
||||||
|
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
) t
|
||||||
|
GROUP BY user_id
|
||||||
|
) q
|
||||||
|
WHERE user_id = 20;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
1 | 1 | 20.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Vanilla funnel query -- two shards
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
INSERT INTO agg_results_second (user_id, value_1_agg)
|
||||||
|
SELECT user_id, array_length(events_table, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||||
|
FROM (
|
||||||
|
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id AND
|
||||||
|
(u.user_id = 13 OR u.user_id = 20) AND
|
||||||
|
(e.user_id = 13 OR e.user_id = 20)
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
) t
|
||||||
|
GROUP BY user_id
|
||||||
|
) q
|
||||||
|
WHERE (user_id = 13 OR user_id = 20);
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
2 | 2 | 16.5000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Funnel grouped by whether or not a user has done an event -- single shard query
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
INSERT INTO agg_results_second (user_id, value_1_agg, value_2_agg )
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (103, 104, 105)
|
||||||
|
)
|
||||||
|
) t1 LEFT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type IN (106, 107, 108)
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
WHERE t1.user_id = 20
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event;
|
||||||
|
ERROR: Set operations are not allowed in INSERT ... SELECT queries
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Funnel grouped by whether or not a user has done an event -- two shards query
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
INSERT INTO agg_results_second (user_id, value_1_agg, value_2_agg )
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND (e.user_id = 20 OR e.user_id = 17)
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND (e.user_id = 20 OR e.user_id = 17)
|
||||||
|
AND e.event_type IN (103, 104, 105)
|
||||||
|
)
|
||||||
|
) t1 LEFT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
|
||||||
|
WHERE
|
||||||
|
(e.user_id = 20 OR e.user_id = 17)
|
||||||
|
AND e.event_type IN (106, 107, 108)
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
WHERE (t1.user_id = 20 OR t1.user_id = 17)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event;
|
||||||
|
ERROR: Set operations are not allowed in INSERT ... SELECT queries
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Most recently seen users_table events_table -- single shard query
|
||||||
|
------------------------------------
|
||||||
|
-- Note that we don't use ORDER BY/LIMIT yet
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
INSERT INTO agg_results_second (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
user_lastseen,
|
||||||
|
array_length(event_array, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
max(u.time) as user_lastseen,
|
||||||
|
array_agg(event_type ORDER BY u.time) AS event_array
|
||||||
|
FROM (
|
||||||
|
|
||||||
|
SELECT user_id, time
|
||||||
|
FROM users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||||
|
) u LEFT JOIN LATERAL (
|
||||||
|
SELECT event_type, time
|
||||||
|
FROM events_table
|
||||||
|
WHERE user_id = u.user_id AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
) t ON true
|
||||||
|
WHERE user_id = 65
|
||||||
|
GROUP BY user_id
|
||||||
|
) AS shard_union
|
||||||
|
ORDER BY user_lastseen DESC;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
1 | 1 | 65.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Most recently seen users_table events_table -- two shards query
|
||||||
|
------------------------------------
|
||||||
|
-- Note that we don't use ORDER BY/LIMIT yet
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
INSERT INTO agg_results_second (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
user_lastseen,
|
||||||
|
array_length(event_array, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
max(u.time) as user_lastseen,
|
||||||
|
array_agg(event_type ORDER BY u.time) AS event_array
|
||||||
|
FROM (
|
||||||
|
|
||||||
|
SELECT user_id, time
|
||||||
|
FROM users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
(user_id = 65 OR user_id = 12) AND
|
||||||
|
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||||
|
) u LEFT JOIN LATERAL (
|
||||||
|
SELECT event_type, time
|
||||||
|
FROM events_table
|
||||||
|
WHERE user_id = u.user_id AND (user_id = 65 OR user_id = 12) AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
) t ON true
|
||||||
|
WHERE (user_id = 65 OR user_id = 12)
|
||||||
|
GROUP BY user_id
|
||||||
|
) AS shard_union
|
||||||
|
ORDER BY user_lastseen DESC;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
2 | 2 | 38.5000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Count the number of distinct users_table who are in segment X and Y and Z -- single shard
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
INSERT INTO agg_results_second (user_id)
|
||||||
|
SELECT DISTINCT user_id
|
||||||
|
FROM users_table
|
||||||
|
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60)
|
||||||
|
AND user_id = 7;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+--------------------
|
||||||
|
1 | 1 | 7.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Count the number of distinct users_table who are in segment X and Y and Z -- two shards
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
INSERT INTO agg_results_second (user_id)
|
||||||
|
SELECT DISTINCT user_id
|
||||||
|
FROM users_table
|
||||||
|
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20 AND (user_id = 7 OR user_id = 20))
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40 AND (user_id = 7 OR user_id = 20))
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60 AND (user_id = 7 OR user_id = 20))
|
||||||
|
AND (user_id = 7 OR user_id = 20);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
2 | 2 | 13.5000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find customers who have done X, and satisfy other customer specific criteria -- single shard
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 101 AND value_1 < 110
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||||
|
AND user_id = 61;
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
1 | 1 | 61.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find customers who have done X, and satisfy other customer specific criteria -- two shards
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 101 AND value_1 < 110
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND (user_id = 61 OR user_id = 51) AND user_id=users_table.user_id)
|
||||||
|
AND (user_id = 61 OR user_id = 51);
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
2 | 2 | 56.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria -- single shard
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_2 >= 5
|
||||||
|
AND user_id = 96
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||||
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
110 | 1 | 96.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria -- two shards
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_2 >= 5
|
||||||
|
AND (user_id = 96 OR user_id = 8)
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id=users_table.user_id AND (user_id = 96 OR user_id = 8))
|
||||||
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id=users_table.user_id AND (user_id = 96 OR user_id = 8));
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
205 | 2 | 55.2195121951219512
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria -- single shard
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||||
|
SELECT user_id,
|
||||||
|
value_2
|
||||||
|
FROM users_table
|
||||||
|
WHERE value_1 > 100
|
||||||
|
AND value_1 < 124
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND user_id = 47
|
||||||
|
AND EXISTS (SELECT user_id
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type > 100
|
||||||
|
AND event_type < 124
|
||||||
|
AND value_3 > 100
|
||||||
|
AND user_id = users_table.user_id
|
||||||
|
AND user_id = 47
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING Count(*) > 2);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
6 | 1 | 47.0000000000000000
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria -- two shards
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||||
|
SELECT user_id,
|
||||||
|
value_2
|
||||||
|
FROM users_table
|
||||||
|
WHERE value_1 > 100
|
||||||
|
AND value_1 < 124
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND (user_id = 47 or user_id = 81)
|
||||||
|
AND EXISTS (SELECT user_id
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type > 100
|
||||||
|
AND event_type < 124
|
||||||
|
AND value_3 > 100
|
||||||
|
AND user_id = users_table.user_id
|
||||||
|
AND (user_id = 47 or user_id = 81)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING Count(*) > 2);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
count | count | avg
|
||||||
|
-------+-------+---------------------
|
||||||
|
7 | 2 | 51.8571428571428571
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,662 @@
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Vanilla funnel query
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- not pushable since the JOIN is not an equi join
|
||||||
|
INSERT INTO agg_results_third (user_id, value_1_agg)
|
||||||
|
SELECT user_id, array_length(events_table, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||||
|
FROM (
|
||||||
|
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id != e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
) t
|
||||||
|
GROUP BY user_id
|
||||||
|
) q;
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Funnel grouped by whether or not a user has done an event
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- not pushable since the JOIN is not an equi join left part of the UNION
|
||||||
|
-- is not equi join
|
||||||
|
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg )
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id != e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (103, 104, 105)
|
||||||
|
)
|
||||||
|
) t1 LEFT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type IN (106, 107, 108)
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event;
|
||||||
|
ERROR: Set operations are not allowed in INSERT ... SELECT queries
|
||||||
|
-- not pushable since the JOIN is not an equi join right part of the UNION
|
||||||
|
-- is not joined on the partition key
|
||||||
|
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg )
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.event_type
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (103, 104, 105)
|
||||||
|
)
|
||||||
|
) t1 LEFT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type IN (106, 107, 108)
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event;
|
||||||
|
ERROR: Set operations are not allowed in INSERT ... SELECT queries
|
||||||
|
-- the LEFT JOIN conditon is not on the partition column (i.e., is it part_key divided by 2)
|
||||||
|
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg )
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (103, 104, 105)
|
||||||
|
)
|
||||||
|
) t1 LEFT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type IN (106, 107, 108)
|
||||||
|
) t2 ON (t1.user_id = (t2.user_id)/2)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event;
|
||||||
|
ERROR: Set operations are not allowed in INSERT ... SELECT queries
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Funnel, grouped by the number of times a user has done an event
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- not pushable since the right of the UNION query is not joined on
|
||||||
|
-- the partition key
|
||||||
|
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
avg(array_length(events_table, 1)) AS event_average,
|
||||||
|
count_pay
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
subquery_1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(count_pay, 0) AS count_pay
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id,
|
||||||
|
'action=>1'AS event,
|
||||||
|
events_table.time
|
||||||
|
FROM
|
||||||
|
users_table,
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id AND
|
||||||
|
users_table.user_id >= 10 AND
|
||||||
|
users_table.user_id <= 70 AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id,
|
||||||
|
'action=>2'AS event,
|
||||||
|
events_table.time
|
||||||
|
FROM
|
||||||
|
users_table,
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id != events_table.user_id AND
|
||||||
|
users_table.user_id >= 10 AND
|
||||||
|
users_table.user_id <= 70 AND
|
||||||
|
events_table.event_type > 12 AND events_table.event_type < 14
|
||||||
|
)
|
||||||
|
) AS subquery_1
|
||||||
|
LEFT JOIN
|
||||||
|
(SELECT
|
||||||
|
user_id,
|
||||||
|
COUNT(*) AS count_pay
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
users_table.value_1 > 15 AND users_table.value_1 < 17
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
HAVING
|
||||||
|
COUNT(*) > 1) AS subquery_2
|
||||||
|
ON
|
||||||
|
subquery_1.user_id = subquery_2.user_id
|
||||||
|
GROUP BY
|
||||||
|
subquery_1.user_id,
|
||||||
|
count_pay) AS subquery_top
|
||||||
|
WHERE
|
||||||
|
array_ndims(events_table) > 0
|
||||||
|
GROUP BY
|
||||||
|
count_pay, user_id
|
||||||
|
ORDER BY
|
||||||
|
count_pay;
|
||||||
|
ERROR: Set operations are not allowed in INSERT ... SELECT queries
|
||||||
|
-- not pushable since the JOIN condition is not equi JOIN
|
||||||
|
-- (subquery_1 JOIN subquery_2)
|
||||||
|
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
avg(array_length(events_table, 1)) AS event_average,
|
||||||
|
count_pay
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
subquery_1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(count_pay, 0) AS count_pay
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id,
|
||||||
|
'action=>1'AS event,
|
||||||
|
events_table.time
|
||||||
|
FROM
|
||||||
|
users_table,
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id AND
|
||||||
|
users_table.user_id >= 10 AND
|
||||||
|
users_table.user_id <= 70 AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id,
|
||||||
|
'action=>2'AS event,
|
||||||
|
events_table.time
|
||||||
|
FROM
|
||||||
|
users_table,
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id AND
|
||||||
|
users_table.user_id >= 10 AND
|
||||||
|
users_table.user_id <= 70 AND
|
||||||
|
events_table.event_type > 12 AND events_table.event_type < 14
|
||||||
|
)
|
||||||
|
) AS subquery_1
|
||||||
|
LEFT JOIN
|
||||||
|
(SELECT
|
||||||
|
user_id,
|
||||||
|
COUNT(*) AS count_pay
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
users_table.value_1 > 15 AND users_table.value_1 < 17
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
HAVING
|
||||||
|
COUNT(*) > 1) AS subquery_2
|
||||||
|
ON
|
||||||
|
subquery_1.user_id > subquery_2.user_id
|
||||||
|
GROUP BY
|
||||||
|
subquery_1.user_id,
|
||||||
|
count_pay) AS subquery_top
|
||||||
|
WHERE
|
||||||
|
array_ndims(events_table) > 0
|
||||||
|
GROUP BY
|
||||||
|
count_pay, user_id
|
||||||
|
ORDER BY
|
||||||
|
count_pay;
|
||||||
|
ERROR: Set operations are not allowed in INSERT ... SELECT queries
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Most recently seen users_table events_table
|
||||||
|
------------------------------------
|
||||||
|
-- Note that we don't use ORDER BY/LIMIT yet
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- not pushable since lateral join is not an equi join
|
||||||
|
INSERT INTO agg_results_third (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
user_lastseen,
|
||||||
|
array_length(event_array, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
max(u.time) as user_lastseen,
|
||||||
|
array_agg(event_type ORDER BY u.time) AS event_array
|
||||||
|
FROM (
|
||||||
|
|
||||||
|
SELECT user_id, time
|
||||||
|
FROM users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||||
|
) u LEFT JOIN LATERAL (
|
||||||
|
SELECT event_type, time
|
||||||
|
FROM events_table
|
||||||
|
WHERE user_id != u.user_id AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
) t ON true
|
||||||
|
GROUP BY user_id
|
||||||
|
) AS shard_union
|
||||||
|
ORDER BY user_lastseen DESC;
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
-- not pushable since lateral join is not on the partition key
|
||||||
|
INSERT INTO agg_results_third (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
user_lastseen,
|
||||||
|
array_length(event_array, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
max(u.time) as user_lastseen,
|
||||||
|
array_agg(event_type ORDER BY u.time) AS event_array
|
||||||
|
FROM (
|
||||||
|
|
||||||
|
SELECT user_id, time
|
||||||
|
FROM users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||||
|
) u LEFT JOIN LATERAL (
|
||||||
|
SELECT event_type, time
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type = u.user_id AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
) t ON true
|
||||||
|
GROUP BY user_id
|
||||||
|
) AS shard_union
|
||||||
|
ORDER BY user_lastseen DESC;
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
-- not pushable since lateral join is not on the partition key
|
||||||
|
INSERT INTO agg_results_third (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
user_lastseen,
|
||||||
|
array_length(event_array, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
max(u.time) as user_lastseen,
|
||||||
|
array_agg(event_type ORDER BY u.time) AS event_array
|
||||||
|
FROM (
|
||||||
|
|
||||||
|
SELECT user_id, time, value_3 as val_3
|
||||||
|
FROM users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||||
|
) u LEFT JOIN LATERAL (
|
||||||
|
SELECT event_type, time
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type = u.val_3 AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
) t ON true
|
||||||
|
GROUP BY user_id
|
||||||
|
) AS shard_union
|
||||||
|
ORDER BY user_lastseen DESC;
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Count the number of distinct users_table who are in segment X and Y and Z
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- not pushable since partition key is NOT IN
|
||||||
|
INSERT INTO agg_results_third (user_id)
|
||||||
|
SELECT DISTINCT user_id
|
||||||
|
FROM users_table
|
||||||
|
WHERE user_id NOT IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
-- not pushable since partition key is not selected from the second subquery
|
||||||
|
INSERT INTO agg_results_third (user_id)
|
||||||
|
SELECT DISTINCT user_id
|
||||||
|
FROM users_table
|
||||||
|
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||||
|
AND user_id IN (SELECT value_1 FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
-- not pushable since second subquery does not return bare partition key
|
||||||
|
INSERT INTO agg_results_third (user_id)
|
||||||
|
SELECT DISTINCT user_id
|
||||||
|
FROM users_table
|
||||||
|
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||||
|
AND user_id IN (SELECT 3 * user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find customers who have done X, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- not pushable since join is not an euqi join
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 101 AND value_1 < 110
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND user_id!=users_table.user_id);
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
-- not pushable since the join is not on the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 101 AND value_1 < 110
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND event_type = users_table.user_id);
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who haven’t done X, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- not pushable since the join is not an equi join
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 = 101
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id!=users_table.user_id);
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
-- not pushable since the join is not the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 = 101
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND event_type=users_table.user_id);
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X and Y, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- not pushable since the second join is not on the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 100
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type!=100 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id!=users_table.user_id);
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- not pushable since the first join is not on the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id!=users_table.user_id)
|
||||||
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- not pushable since the second join is not an equi join
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id,
|
||||||
|
value_2
|
||||||
|
FROM users_table
|
||||||
|
WHERE value_1 > 100
|
||||||
|
AND value_1 < 124
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type > 100
|
||||||
|
AND event_type < 124
|
||||||
|
AND value_3 > 100
|
||||||
|
AND user_id != users_table.user_id
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING Count(*) > 2);
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
-- not pushable since the second join is not on the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id,
|
||||||
|
value_2
|
||||||
|
FROM users_table
|
||||||
|
WHERE value_1 > 100
|
||||||
|
AND value_1 < 124
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type > 100
|
||||||
|
AND event_type < 124
|
||||||
|
AND value_3 > 100
|
||||||
|
AND event_type = users_table.user_id
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING Count(*) > 2);
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
-- not pushable since the second join is not on the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id,
|
||||||
|
value_2
|
||||||
|
FROM users_table
|
||||||
|
WHERE value_1 > 100
|
||||||
|
AND value_1 < 124
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type > 100
|
||||||
|
AND event_type < 124
|
||||||
|
AND value_3 > 100
|
||||||
|
AND user_id = users_table.value_1
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING Count(*) > 2);
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find me all users_table who has done some event and has filters
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- not pushable due to NOT IN
|
||||||
|
INSERT INTO agg_results_third(user_id)
|
||||||
|
Select user_id
|
||||||
|
From events_table
|
||||||
|
Where event_type = 16
|
||||||
|
And value_2 > 50
|
||||||
|
And user_id NOT in
|
||||||
|
(select user_id
|
||||||
|
From users_table
|
||||||
|
Where value_1 = 15
|
||||||
|
And value_2 > 25);
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
-- not pushable since we're not selecting the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id)
|
||||||
|
Select user_id
|
||||||
|
From events_table
|
||||||
|
Where event_type = 16
|
||||||
|
And value_2 > 50
|
||||||
|
And user_id in
|
||||||
|
(select value_3
|
||||||
|
From users_table
|
||||||
|
Where value_1 = 15
|
||||||
|
And value_2 > 25);
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
|
||||||
|
-- not pushable since we're not selecting the partition key
|
||||||
|
-- from the events table
|
||||||
|
INSERT INTO agg_results_third(user_id)
|
||||||
|
Select user_id
|
||||||
|
From events_table
|
||||||
|
Where event_type = 16
|
||||||
|
And value_2 > 50
|
||||||
|
And event_type in
|
||||||
|
(select user_id
|
||||||
|
From users_table
|
||||||
|
Where value_1 = 15
|
||||||
|
And value_2 > 25);
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Which events_table did people who has done some specific events_table
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- not pushable due to NOT IN
|
||||||
|
INSERT INTO agg_results_third(user_id, value_1_agg)
|
||||||
|
SELECT user_id, event_type FROM events_table
|
||||||
|
WHERE user_id NOT IN (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
|
||||||
|
GROUP BY user_id, event_type;
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
-- not pushable due to not selecting the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id, value_1_agg)
|
||||||
|
SELECT user_id, event_type FROM events_table
|
||||||
|
WHERE user_id IN (SELECT value_2 from events_table WHERE event_type > 500 and event_type < 505)
|
||||||
|
GROUP BY user_id, event_type;
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
-- not pushable due to not comparing user id from the events table
|
||||||
|
INSERT INTO agg_results_third(user_id, value_1_agg)
|
||||||
|
SELECT user_id, event_type FROM events_table
|
||||||
|
WHERE event_type IN (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
|
||||||
|
GROUP BY user_id, event_type;
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find my assets that have the highest probability and fetch their metadata
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- not pushable since the join is not an equi join
|
||||||
|
INSERT INTO agg_results_third(user_id, value_1_agg, value_3_agg)
|
||||||
|
SELECT
|
||||||
|
users_table.user_id, users_table.value_1, prob
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||||||
|
FROM
|
||||||
|
users_table AS ma, events_table as short_list
|
||||||
|
WHERE
|
||||||
|
short_list.user_id != ma.user_id and ma.value_1 < 50 and short_list.event_type < 50
|
||||||
|
) temp
|
||||||
|
ON users_table.user_id = temp.user_id
|
||||||
|
WHERE users_table.value_1 < 50;
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
||||||
|
-- not pushable since the join is not on the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id, value_1_agg, value_3_agg)
|
||||||
|
SELECT
|
||||||
|
users_table.user_id, users_table.value_1, prob
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||||||
|
FROM
|
||||||
|
users_table AS ma, events_table as short_list
|
||||||
|
WHERE
|
||||||
|
short_list.user_id = ma.value_2 and ma.value_1 < 50 and short_list.event_type < 50
|
||||||
|
) temp
|
||||||
|
ON users_table.user_id = temp.user_id
|
||||||
|
WHERE users_table.value_1 < 50;
|
||||||
|
ERROR: cannot perform distributed planning for the given modification
|
||||||
|
DETAIL: Select query cannot be pushed down to the worker.
|
|
@ -0,0 +1,43 @@
|
||||||
|
--
|
||||||
|
-- multi insert select behavioral analytics
|
||||||
|
-- this file is intended to create the table requires for the tests
|
||||||
|
--
|
||||||
|
|
||||||
|
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1400000;
|
||||||
|
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
|
||||||
|
|
||||||
|
SET citus.shard_replication_factor = 1;
|
||||||
|
SET citus.shard_count = 4;
|
||||||
|
|
||||||
|
CREATE TABLE users_table (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint);
|
||||||
|
SELECT create_distributed_table('users_table', 'user_id');
|
||||||
|
|
||||||
|
CREATE TABLE events_table (user_id int, time timestamp, event_type int, value_2 int, value_3 float, value_4 bigint);
|
||||||
|
SELECT create_distributed_table('events_table', 'user_id');
|
||||||
|
|
||||||
|
CREATE TABLE agg_results (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||||
|
SELECT create_distributed_table('agg_results', 'user_id');
|
||||||
|
|
||||||
|
-- we need this to improve the concurrency on the regression tests
|
||||||
|
CREATE TABLE agg_results_second (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||||
|
SELECT create_distributed_table('agg_results_second', 'user_id');
|
||||||
|
|
||||||
|
-- same as agg_results_second
|
||||||
|
CREATE TABLE agg_results_third (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||||
|
SELECT create_distributed_table('agg_results_third', 'user_id');
|
||||||
|
|
||||||
|
-- same as agg_results_second
|
||||||
|
CREATE TABLE agg_results_fourth (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||||
|
SELECT create_distributed_table('agg_results_fourth', 'user_id');
|
||||||
|
|
||||||
|
COPY users_table FROM '@abs_srcdir@/data/users_table.data' WITH CSV;
|
||||||
|
COPY events_table FROM '@abs_srcdir@/data/events_table.data' WITH CSV;
|
||||||
|
|
||||||
|
-- create indexes for
|
||||||
|
CREATE INDEX is_index1 ON users_table(user_id);
|
||||||
|
CREATE INDEX is_index2 ON events_table(user_id);
|
||||||
|
CREATE INDEX is_index3 ON users_table(value_1);
|
||||||
|
CREATE INDEX is_index4 ON events_table(event_type);
|
||||||
|
CREATE INDEX is_index5 ON users_table(value_2);
|
||||||
|
CREATE INDEX is_index6 ON events_table(value_2);
|
||||||
|
|
|
@ -30,6 +30,8 @@ test: multi_create_table_constraints
|
||||||
test: multi_master_protocol
|
test: multi_master_protocol
|
||||||
test: multi_load_data
|
test: multi_load_data
|
||||||
|
|
||||||
|
test: multi_insert_select_behavioral_analytics_create_table
|
||||||
|
test: multi_insert_select_behavioral_analytics_basics multi_insert_select_behavioral_analytics_single_shard_queries multi_insert_select_non_pushable_queries
|
||||||
test: multi_insert_select
|
test: multi_insert_select
|
||||||
|
|
||||||
# ----------
|
# ----------
|
||||||
|
|
|
@ -0,0 +1,64 @@
|
||||||
|
--
|
||||||
|
-- multi insert select behavioral analytics
|
||||||
|
-- this file is intended to create the table requires for the tests
|
||||||
|
--
|
||||||
|
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1400000;
|
||||||
|
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
|
||||||
|
SET citus.shard_replication_factor = 1;
|
||||||
|
SET citus.shard_count = 4;
|
||||||
|
CREATE TABLE users_table (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint);
|
||||||
|
SELECT create_distributed_table('users_table', 'user_id');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
CREATE TABLE events_table (user_id int, time timestamp, event_type int, value_2 int, value_3 float, value_4 bigint);
|
||||||
|
SELECT create_distributed_table('events_table', 'user_id');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
CREATE TABLE agg_results (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||||
|
SELECT create_distributed_table('agg_results', 'user_id');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- we need this to improve the concurrency on the regression tests
|
||||||
|
CREATE TABLE agg_results_second (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||||
|
SELECT create_distributed_table('agg_results_second', 'user_id');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- same as agg_results_second
|
||||||
|
CREATE TABLE agg_results_third (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||||
|
SELECT create_distributed_table('agg_results_third', 'user_id');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- same as agg_results_second
|
||||||
|
CREATE TABLE agg_results_fourth (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||||
|
SELECT create_distributed_table('agg_results_fourth', 'user_id');
|
||||||
|
create_distributed_table
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
COPY users_table FROM '@abs_srcdir@/data/users_table.data' WITH CSV;
|
||||||
|
COPY events_table FROM '@abs_srcdir@/data/events_table.data' WITH CSV;
|
||||||
|
-- create indexes for
|
||||||
|
CREATE INDEX is_index1 ON users_table(user_id);
|
||||||
|
NOTICE: using one-phase commit for distributed DDL commands
|
||||||
|
HINT: You can enable two-phase commit for extra safety with: SET citus.multi_shard_commit_protocol TO '2pc'
|
||||||
|
CREATE INDEX is_index2 ON events_table(user_id);
|
||||||
|
CREATE INDEX is_index3 ON users_table(value_1);
|
||||||
|
CREATE INDEX is_index4 ON events_table(event_type);
|
||||||
|
CREATE INDEX is_index5 ON users_table(value_2);
|
||||||
|
CREATE INDEX is_index6 ON events_table(value_2);
|
|
@ -22,6 +22,9 @@ SELECT create_distributed_table('agg_events', 'user_id');;
|
||||||
CREATE TABLE reference_table (user_id int);
|
CREATE TABLE reference_table (user_id int);
|
||||||
SELECT create_reference_table('reference_table');
|
SELECT create_reference_table('reference_table');
|
||||||
|
|
||||||
|
CREATE TABLE insert_select_varchar_test (key varchar, value int);
|
||||||
|
SELECT create_distributed_table('insert_select_varchar_test', 'key', 'hash');
|
||||||
|
|
||||||
-- set back to the defaults
|
-- set back to the defaults
|
||||||
SET citus.shard_count = DEFAULT;
|
SET citus.shard_count = DEFAULT;
|
||||||
SET citus.shard_replication_factor = DEFAULT;
|
SET citus.shard_replication_factor = DEFAULT;
|
||||||
|
@ -1063,7 +1066,193 @@ ON (f.id = f2.id)
|
||||||
WHERE f.id IN (SELECT value_1
|
WHERE f.id IN (SELECT value_1
|
||||||
FROM raw_events_second);
|
FROM raw_events_second);
|
||||||
|
|
||||||
|
-- some more semi-anti join tests
|
||||||
|
|
||||||
|
-- join in where
|
||||||
|
INSERT INTO raw_events_second
|
||||||
|
(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM raw_events_first
|
||||||
|
WHERE user_id IN (SELECT raw_events_second.user_id
|
||||||
|
FROM raw_events_second, raw_events_first
|
||||||
|
WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200);
|
||||||
|
|
||||||
|
-- we cannot push this down since it is NOT IN
|
||||||
|
INSERT INTO raw_events_second
|
||||||
|
(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM raw_events_first
|
||||||
|
WHERE user_id NOT IN (SELECT raw_events_second.user_id
|
||||||
|
FROM raw_events_second, raw_events_first
|
||||||
|
WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200);
|
||||||
|
|
||||||
|
|
||||||
|
-- safe to push down
|
||||||
|
INSERT INTO raw_events_second
|
||||||
|
(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM raw_events_first
|
||||||
|
WHERE EXISTS (SELECT 1
|
||||||
|
FROM raw_events_second
|
||||||
|
WHERE raw_events_second.user_id =raw_events_first.user_id);
|
||||||
|
|
||||||
|
-- we cannot push down
|
||||||
|
INSERT INTO raw_events_second
|
||||||
|
(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM raw_events_first
|
||||||
|
WHERE NOT EXISTS (SELECT 1
|
||||||
|
FROM raw_events_second
|
||||||
|
WHERE raw_events_second.user_id =raw_events_first.user_id);
|
||||||
|
|
||||||
|
|
||||||
|
-- more complex LEFT JOINs
|
||||||
|
INSERT INTO agg_events
|
||||||
|
(user_id, value_4_agg)
|
||||||
|
SELECT
|
||||||
|
outer_most.id, max(outer_most.value)
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT f2.id as id, f2.v4 as value FROM
|
||||||
|
(SELECT
|
||||||
|
id
|
||||||
|
FROM (SELECT raw_events_first.user_id AS id
|
||||||
|
FROM raw_events_first LEFT JOIN
|
||||||
|
reference_table
|
||||||
|
ON (raw_events_first.user_id = reference_table.user_id)) AS foo) as f
|
||||||
|
LEFT JOIN
|
||||||
|
(SELECT v4,
|
||||||
|
v1,
|
||||||
|
id
|
||||||
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
|
SUM(raw_events_first.value_1) AS v1,
|
||||||
|
raw_events_second.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
raw_events_second
|
||||||
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||||
|
GROUP BY raw_events_second.user_id
|
||||||
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||||
|
ON (f.id = f2.id)) as outer_most
|
||||||
|
GROUP BY
|
||||||
|
outer_most.id;
|
||||||
|
|
||||||
|
|
||||||
|
-- cannot push down since the f.id IN is matched with value_1
|
||||||
|
INSERT INTO raw_events_second
|
||||||
|
(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM raw_events_first
|
||||||
|
WHERE user_id IN (
|
||||||
|
SELECT f2.id FROM
|
||||||
|
(SELECT
|
||||||
|
id
|
||||||
|
FROM (SELECT reference_table.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
reference_table
|
||||||
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT v4,
|
||||||
|
v1,
|
||||||
|
id
|
||||||
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
|
SUM(raw_events_first.value_1) AS v1,
|
||||||
|
raw_events_second.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
raw_events_second
|
||||||
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||||
|
GROUP BY raw_events_second.user_id
|
||||||
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||||
|
ON (f.id = f2.id)
|
||||||
|
WHERE f.id IN (SELECT value_1
|
||||||
|
FROM raw_events_second));
|
||||||
|
|
||||||
|
-- same as above, but this time is it safe to push down since
|
||||||
|
-- f.id IN is matched with user_id
|
||||||
|
INSERT INTO raw_events_second
|
||||||
|
(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM raw_events_first
|
||||||
|
WHERE user_id IN (
|
||||||
|
SELECT f2.id FROM
|
||||||
|
(SELECT
|
||||||
|
id
|
||||||
|
FROM (SELECT reference_table.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
reference_table
|
||||||
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT v4,
|
||||||
|
v1,
|
||||||
|
id
|
||||||
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
|
SUM(raw_events_first.value_1) AS v1,
|
||||||
|
raw_events_second.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
raw_events_second
|
||||||
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||||
|
GROUP BY raw_events_second.user_id
|
||||||
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||||
|
ON (f.id = f2.id)
|
||||||
|
WHERE f.id IN (SELECT user_id
|
||||||
|
FROM raw_events_second));
|
||||||
|
|
||||||
|
-- cannot push down since top level user_id is matched with NOT IN
|
||||||
|
INSERT INTO raw_events_second
|
||||||
|
(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM raw_events_first
|
||||||
|
WHERE user_id NOT IN (
|
||||||
|
SELECT f2.id FROM
|
||||||
|
(SELECT
|
||||||
|
id
|
||||||
|
FROM (SELECT reference_table.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
reference_table
|
||||||
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT v4,
|
||||||
|
v1,
|
||||||
|
id
|
||||||
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
|
SUM(raw_events_first.value_1) AS v1,
|
||||||
|
raw_events_second.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
raw_events_second
|
||||||
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||||
|
GROUP BY raw_events_second.user_id
|
||||||
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||||
|
ON (f.id = f2.id)
|
||||||
|
WHERE f.id IN (SELECT user_id
|
||||||
|
FROM raw_events_second));
|
||||||
|
|
||||||
|
-- cannot push down since join is not equi join (f.id > f2.id)
|
||||||
|
INSERT INTO raw_events_second
|
||||||
|
(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM raw_events_first
|
||||||
|
WHERE user_id IN (
|
||||||
|
SELECT f2.id FROM
|
||||||
|
(SELECT
|
||||||
|
id
|
||||||
|
FROM (SELECT reference_table.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
reference_table
|
||||||
|
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
||||||
|
INNER JOIN
|
||||||
|
(SELECT v4,
|
||||||
|
v1,
|
||||||
|
id
|
||||||
|
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||||
|
SUM(raw_events_first.value_1) AS v1,
|
||||||
|
raw_events_second.user_id AS id
|
||||||
|
FROM raw_events_first,
|
||||||
|
raw_events_second
|
||||||
|
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||||
|
GROUP BY raw_events_second.user_id
|
||||||
|
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||||
|
ON (f.id > f2.id)
|
||||||
|
WHERE f.id IN (SELECT user_id
|
||||||
|
FROM raw_events_second));
|
||||||
|
|
||||||
-- we currently not support grouping sets
|
-- we currently not support grouping sets
|
||||||
INSERT INTO agg_events
|
INSERT INTO agg_events
|
||||||
|
@ -1198,8 +1387,30 @@ SET client_min_messages TO DEBUG2;
|
||||||
-- this should also work
|
-- this should also work
|
||||||
INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 5;
|
INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 5;
|
||||||
|
|
||||||
|
|
||||||
SET client_min_messages TO INFO;
|
SET client_min_messages TO INFO;
|
||||||
|
|
||||||
|
-- now do some tests with varchars
|
||||||
|
INSERT INTO insert_select_varchar_test VALUES ('test_1', 10);
|
||||||
|
INSERT INTO insert_select_varchar_test VALUES ('test_2', 30);
|
||||||
|
|
||||||
|
INSERT INTO insert_select_varchar_test (key, value)
|
||||||
|
SELECT *, 100
|
||||||
|
FROM (SELECT f1.key
|
||||||
|
FROM (SELECT key
|
||||||
|
FROM insert_select_varchar_test
|
||||||
|
GROUP BY 1
|
||||||
|
HAVING Count(key) < 3) AS f1,
|
||||||
|
(SELECT key
|
||||||
|
FROM insert_select_varchar_test
|
||||||
|
GROUP BY 1
|
||||||
|
HAVING Sum(COALESCE(insert_select_varchar_test.value, 0)) >
|
||||||
|
20.0)
|
||||||
|
AS f2
|
||||||
|
WHERE f1.key = f2.key
|
||||||
|
GROUP BY 1) AS foo;
|
||||||
|
|
||||||
|
SELECT * FROM insert_select_varchar_test;
|
||||||
|
|
||||||
-- some tests with DEFAULT columns and constant values
|
-- some tests with DEFAULT columns and constant values
|
||||||
-- this test is mostly importantly intended for deparsing the query correctly
|
-- this test is mostly importantly intended for deparsing the query correctly
|
||||||
-- but still it is preferable to have this test here instead of multi_deparse_shard_query
|
-- but still it is preferable to have this test here instead of multi_deparse_shard_query
|
||||||
|
|
|
@ -0,0 +1,420 @@
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Vanilla funnel query
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg)
|
||||||
|
SELECT user_id, array_length(events_table, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||||
|
FROM (
|
||||||
|
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
) t
|
||||||
|
GROUP BY user_id
|
||||||
|
) q;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Funnel grouped by whether or not a user has done an event
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg )
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (103, 104, 105)
|
||||||
|
)
|
||||||
|
) t1 LEFT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type IN (106, 107, 108)
|
||||||
|
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Funnel, grouped by the number of times a user has done an event
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
avg(array_length(events_table, 1)) AS event_average,
|
||||||
|
count_pay
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
subquery_1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(count_pay, 0) AS count_pay
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id,
|
||||||
|
'action=>1'AS event,
|
||||||
|
events_table.time
|
||||||
|
FROM
|
||||||
|
users_table,
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id AND
|
||||||
|
users_table.user_id >= 10 AND
|
||||||
|
users_table.user_id <= 70 AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id,
|
||||||
|
'action=>2'AS event,
|
||||||
|
events_table.time
|
||||||
|
FROM
|
||||||
|
users_table,
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id AND
|
||||||
|
users_table.user_id >= 10 AND
|
||||||
|
users_table.user_id <= 70 AND
|
||||||
|
events_table.event_type > 12 AND events_table.event_type < 14
|
||||||
|
)
|
||||||
|
) AS subquery_1
|
||||||
|
LEFT JOIN
|
||||||
|
(SELECT
|
||||||
|
user_id,
|
||||||
|
COUNT(*) AS count_pay
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
users_table.value_1 > 15 AND users_table.value_1 < 17
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
HAVING
|
||||||
|
COUNT(*) > 1) AS subquery_2
|
||||||
|
ON
|
||||||
|
subquery_1.user_id = subquery_2.user_id
|
||||||
|
GROUP BY
|
||||||
|
subquery_1.user_id,
|
||||||
|
count_pay) AS subquery_top
|
||||||
|
WHERE
|
||||||
|
array_ndims(events_table) > 0
|
||||||
|
GROUP BY
|
||||||
|
count_pay, user_id
|
||||||
|
ORDER BY
|
||||||
|
count_pay;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Most recently seen users_table events_table
|
||||||
|
------------------------------------
|
||||||
|
-- Note that we don't use ORDER BY/LIMIT yet
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
user_lastseen,
|
||||||
|
array_length(event_array, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
max(u.time) as user_lastseen,
|
||||||
|
array_agg(event_type ORDER BY u.time) AS event_array
|
||||||
|
FROM (
|
||||||
|
|
||||||
|
SELECT user_id, time
|
||||||
|
FROM users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||||
|
|
||||||
|
) u LEFT JOIN LATERAL (
|
||||||
|
SELECT event_type, time
|
||||||
|
FROM events_table
|
||||||
|
WHERE user_id = u.user_id AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
) t ON true
|
||||||
|
GROUP BY user_id
|
||||||
|
) AS shard_union
|
||||||
|
ORDER BY user_lastseen DESC;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Count the number of distinct users_table who are in segment X and Y and Z
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results (user_id)
|
||||||
|
SELECT DISTINCT user_id
|
||||||
|
FROM users_table
|
||||||
|
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Count the number of distinct users_table who are in at least two of X and Y and Z segments
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id)
|
||||||
|
SELECT user_id
|
||||||
|
FROM users_table
|
||||||
|
WHERE (value_1 = 10
|
||||||
|
OR value_1 = 11
|
||||||
|
OR value_1 = 12)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING count(distinct value_1) >= 2;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find customers who have done X, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 101 AND value_1 < 110
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who haven’t done X, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 = 101
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X and Y, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 100
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type!=100 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||||
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_2_agg)
|
||||||
|
SELECT user_id,
|
||||||
|
value_2
|
||||||
|
FROM users_table
|
||||||
|
WHERE value_1 > 100
|
||||||
|
AND value_1 < 124
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type > 100
|
||||||
|
AND event_type < 124
|
||||||
|
AND value_3 > 100
|
||||||
|
AND user_id = users_table.user_id
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING Count(*) > 2);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find me all users_table who logged in more than once
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT user_id, value_1 from
|
||||||
|
(
|
||||||
|
SELECT user_id, value_1 From users_table
|
||||||
|
WHERE value_2 > 100 and user_id = 15 GROUP BY value_1, user_id HAVING count(*) > 1
|
||||||
|
) as a;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find me all users_table who has done some event and has filters
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id)
|
||||||
|
Select user_id
|
||||||
|
From events_table
|
||||||
|
Where event_type = 16
|
||||||
|
And value_2 > 50
|
||||||
|
And user_id in
|
||||||
|
(select user_id
|
||||||
|
From users_table
|
||||||
|
Where value_1 = 15
|
||||||
|
And value_2 > 25);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Which events_table did people who has done some specific events_table
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg)
|
||||||
|
SELECT user_id, event_type FROM events_table
|
||||||
|
WHERE user_id in (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
|
||||||
|
GROUP BY user_id, event_type;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find me all the users_table who has done some event more than three times
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id)
|
||||||
|
select user_id from
|
||||||
|
(
|
||||||
|
select
|
||||||
|
user_id
|
||||||
|
from
|
||||||
|
events_table
|
||||||
|
where event_type = 901 group by user_id having count(*) > 3
|
||||||
|
) as a;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find my assets that have the highest probability and fetch their metadata
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results;
|
||||||
|
|
||||||
|
INSERT INTO agg_results(user_id, value_1_agg, value_3_agg)
|
||||||
|
SELECT
|
||||||
|
users_table.user_id, users_table.value_1, prob
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||||||
|
FROM
|
||||||
|
users_table AS ma, events_table as short_list
|
||||||
|
WHERE
|
||||||
|
short_list.user_id = ma.user_id and ma.value_1 < 50 and short_list.event_type < 50
|
||||||
|
) temp
|
||||||
|
ON users_table.user_id = temp.user_id
|
||||||
|
WHERE users_table.value_1 < 50;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||||
|
|
|
@ -0,0 +1,401 @@
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Vanilla funnel query -- single shard
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
|
||||||
|
INSERT INTO agg_results_second (user_id, value_1_agg)
|
||||||
|
SELECT user_id, array_length(events_table, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||||
|
FROM (
|
||||||
|
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
) t
|
||||||
|
GROUP BY user_id
|
||||||
|
) q
|
||||||
|
WHERE user_id = 20;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Vanilla funnel query -- two shards
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
|
||||||
|
INSERT INTO agg_results_second (user_id, value_1_agg)
|
||||||
|
SELECT user_id, array_length(events_table, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||||
|
FROM (
|
||||||
|
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id AND
|
||||||
|
(u.user_id = 13 OR u.user_id = 20) AND
|
||||||
|
(e.user_id = 13 OR e.user_id = 20)
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
) t
|
||||||
|
GROUP BY user_id
|
||||||
|
) q
|
||||||
|
WHERE (user_id = 13 OR user_id = 20);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Funnel grouped by whether or not a user has done an event -- single shard query
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
|
||||||
|
INSERT INTO agg_results_second (user_id, value_1_agg, value_2_agg )
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (103, 104, 105)
|
||||||
|
)
|
||||||
|
) t1 LEFT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type IN (106, 107, 108)
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
WHERE t1.user_id = 20
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event;
|
||||||
|
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Funnel grouped by whether or not a user has done an event -- two shards query
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
|
||||||
|
INSERT INTO agg_results_second (user_id, value_1_agg, value_2_agg )
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND (e.user_id = 20 OR e.user_id = 17)
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND (e.user_id = 20 OR e.user_id = 17)
|
||||||
|
AND e.event_type IN (103, 104, 105)
|
||||||
|
)
|
||||||
|
) t1 LEFT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
|
||||||
|
WHERE
|
||||||
|
(e.user_id = 20 OR e.user_id = 17)
|
||||||
|
AND e.event_type IN (106, 107, 108)
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
WHERE (t1.user_id = 20 OR t1.user_id = 17)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
-- SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Most recently seen users_table events_table -- single shard query
|
||||||
|
------------------------------------
|
||||||
|
-- Note that we don't use ORDER BY/LIMIT yet
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
|
||||||
|
INSERT INTO agg_results_second (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
user_lastseen,
|
||||||
|
array_length(event_array, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
max(u.time) as user_lastseen,
|
||||||
|
array_agg(event_type ORDER BY u.time) AS event_array
|
||||||
|
FROM (
|
||||||
|
|
||||||
|
SELECT user_id, time
|
||||||
|
FROM users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||||
|
|
||||||
|
) u LEFT JOIN LATERAL (
|
||||||
|
SELECT event_type, time
|
||||||
|
FROM events_table
|
||||||
|
WHERE user_id = u.user_id AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
) t ON true
|
||||||
|
WHERE user_id = 65
|
||||||
|
GROUP BY user_id
|
||||||
|
) AS shard_union
|
||||||
|
ORDER BY user_lastseen DESC;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Most recently seen users_table events_table -- two shards query
|
||||||
|
------------------------------------
|
||||||
|
-- Note that we don't use ORDER BY/LIMIT yet
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
|
||||||
|
INSERT INTO agg_results_second (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
user_lastseen,
|
||||||
|
array_length(event_array, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
max(u.time) as user_lastseen,
|
||||||
|
array_agg(event_type ORDER BY u.time) AS event_array
|
||||||
|
FROM (
|
||||||
|
|
||||||
|
SELECT user_id, time
|
||||||
|
FROM users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
(user_id = 65 OR user_id = 12) AND
|
||||||
|
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||||
|
|
||||||
|
) u LEFT JOIN LATERAL (
|
||||||
|
SELECT event_type, time
|
||||||
|
FROM events_table
|
||||||
|
WHERE user_id = u.user_id AND (user_id = 65 OR user_id = 12) AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
) t ON true
|
||||||
|
WHERE (user_id = 65 OR user_id = 12)
|
||||||
|
GROUP BY user_id
|
||||||
|
) AS shard_union
|
||||||
|
ORDER BY user_lastseen DESC;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Count the number of distinct users_table who are in segment X and Y and Z -- single shard
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
|
||||||
|
INSERT INTO agg_results_second (user_id)
|
||||||
|
SELECT DISTINCT user_id
|
||||||
|
FROM users_table
|
||||||
|
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60)
|
||||||
|
AND user_id = 7;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Count the number of distinct users_table who are in segment X and Y and Z -- two shards
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
|
||||||
|
INSERT INTO agg_results_second (user_id)
|
||||||
|
SELECT DISTINCT user_id
|
||||||
|
FROM users_table
|
||||||
|
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20 AND (user_id = 7 OR user_id = 20))
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40 AND (user_id = 7 OR user_id = 20))
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60 AND (user_id = 7 OR user_id = 20))
|
||||||
|
AND (user_id = 7 OR user_id = 20);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find customers who have done X, and satisfy other customer specific criteria -- single shard
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
|
||||||
|
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 101 AND value_1 < 110
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||||
|
AND user_id = 61;
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find customers who have done X, and satisfy other customer specific criteria -- two shards
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
|
||||||
|
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 101 AND value_1 < 110
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND (user_id = 61 OR user_id = 51) AND user_id=users_table.user_id)
|
||||||
|
AND (user_id = 61 OR user_id = 51);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria -- single shard
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
|
||||||
|
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_2 >= 5
|
||||||
|
AND user_id = 96
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||||
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria -- two shards
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
|
||||||
|
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_2 >= 5
|
||||||
|
AND (user_id = 96 OR user_id = 8)
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id=users_table.user_id AND (user_id = 96 OR user_id = 8))
|
||||||
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id=users_table.user_id AND (user_id = 96 OR user_id = 8));
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria -- single shard
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
|
||||||
|
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||||
|
SELECT user_id,
|
||||||
|
value_2
|
||||||
|
FROM users_table
|
||||||
|
WHERE value_1 > 100
|
||||||
|
AND value_1 < 124
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND user_id = 47
|
||||||
|
AND EXISTS (SELECT user_id
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type > 100
|
||||||
|
AND event_type < 124
|
||||||
|
AND value_3 > 100
|
||||||
|
AND user_id = users_table.user_id
|
||||||
|
AND user_id = 47
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING Count(*) > 2);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria -- two shards
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
TRUNCATE agg_results_second;
|
||||||
|
|
||||||
|
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||||
|
SELECT user_id,
|
||||||
|
value_2
|
||||||
|
FROM users_table
|
||||||
|
WHERE value_1 > 100
|
||||||
|
AND value_1 < 124
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND (user_id = 47 or user_id = 81)
|
||||||
|
AND EXISTS (SELECT user_id
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type > 100
|
||||||
|
AND event_type < 124
|
||||||
|
AND value_3 > 100
|
||||||
|
AND user_id = users_table.user_id
|
||||||
|
AND (user_id = 47 or user_id = 81)
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING Count(*) > 2);
|
||||||
|
|
||||||
|
-- get some statistics from the aggregated results to ensure the results are correct
|
||||||
|
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||||
|
|
|
@ -0,0 +1,651 @@
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Vanilla funnel query
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
-- not pushable since the JOIN is not an equi join
|
||||||
|
INSERT INTO agg_results_third (user_id, value_1_agg)
|
||||||
|
SELECT user_id, array_length(events_table, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||||
|
FROM (
|
||||||
|
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id != e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
) t
|
||||||
|
GROUP BY user_id
|
||||||
|
) q;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Funnel grouped by whether or not a user has done an event
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
-- not pushable since the JOIN is not an equi join left part of the UNION
|
||||||
|
-- is not equi join
|
||||||
|
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg )
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id != e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (103, 104, 105)
|
||||||
|
)
|
||||||
|
) t1 LEFT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type IN (106, 107, 108)
|
||||||
|
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event;
|
||||||
|
|
||||||
|
-- not pushable since the JOIN is not an equi join right part of the UNION
|
||||||
|
-- is not joined on the partition key
|
||||||
|
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg )
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.event_type
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (103, 104, 105)
|
||||||
|
)
|
||||||
|
) t1 LEFT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type IN (106, 107, 108)
|
||||||
|
|
||||||
|
) t2 ON (t1.user_id = t2.user_id)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event;
|
||||||
|
|
||||||
|
-- the LEFT JOIN conditon is not on the partition column (i.e., is it part_key divided by 2)
|
||||||
|
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg )
|
||||||
|
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
t1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||||
|
FROM (
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (100, 101, 102)
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(
|
||||||
|
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||||
|
FROM users_table AS u,
|
||||||
|
events_table AS e
|
||||||
|
WHERE u.user_id = e.user_id
|
||||||
|
AND u.user_id >= 10
|
||||||
|
AND u.user_id <= 25
|
||||||
|
AND e.event_type IN (103, 104, 105)
|
||||||
|
)
|
||||||
|
) t1 LEFT JOIN (
|
||||||
|
SELECT DISTINCT user_id,
|
||||||
|
'Has done event'::TEXT AS hasdone_event
|
||||||
|
FROM events_table AS e
|
||||||
|
|
||||||
|
WHERE e.user_id >= 10
|
||||||
|
AND e.user_id <= 25
|
||||||
|
AND e.event_type IN (106, 107, 108)
|
||||||
|
|
||||||
|
) t2 ON (t1.user_id = (t2.user_id)/2)
|
||||||
|
GROUP BY t1.user_id, hasdone_event
|
||||||
|
) t GROUP BY user_id, hasdone_event;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Funnel, grouped by the number of times a user has done an event
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
-- not pushable since the right of the UNION query is not joined on
|
||||||
|
-- the partition key
|
||||||
|
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
avg(array_length(events_table, 1)) AS event_average,
|
||||||
|
count_pay
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
subquery_1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(count_pay, 0) AS count_pay
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id,
|
||||||
|
'action=>1'AS event,
|
||||||
|
events_table.time
|
||||||
|
FROM
|
||||||
|
users_table,
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id AND
|
||||||
|
users_table.user_id >= 10 AND
|
||||||
|
users_table.user_id <= 70 AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id,
|
||||||
|
'action=>2'AS event,
|
||||||
|
events_table.time
|
||||||
|
FROM
|
||||||
|
users_table,
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id != events_table.user_id AND
|
||||||
|
users_table.user_id >= 10 AND
|
||||||
|
users_table.user_id <= 70 AND
|
||||||
|
events_table.event_type > 12 AND events_table.event_type < 14
|
||||||
|
)
|
||||||
|
) AS subquery_1
|
||||||
|
LEFT JOIN
|
||||||
|
(SELECT
|
||||||
|
user_id,
|
||||||
|
COUNT(*) AS count_pay
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
users_table.value_1 > 15 AND users_table.value_1 < 17
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
HAVING
|
||||||
|
COUNT(*) > 1) AS subquery_2
|
||||||
|
ON
|
||||||
|
subquery_1.user_id = subquery_2.user_id
|
||||||
|
GROUP BY
|
||||||
|
subquery_1.user_id,
|
||||||
|
count_pay) AS subquery_top
|
||||||
|
WHERE
|
||||||
|
array_ndims(events_table) > 0
|
||||||
|
GROUP BY
|
||||||
|
count_pay, user_id
|
||||||
|
ORDER BY
|
||||||
|
count_pay;
|
||||||
|
|
||||||
|
-- not pushable since the JOIN condition is not equi JOIN
|
||||||
|
-- (subquery_1 JOIN subquery_2)
|
||||||
|
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
avg(array_length(events_table, 1)) AS event_average,
|
||||||
|
count_pay
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
subquery_1.user_id,
|
||||||
|
array_agg(event ORDER BY time) AS events_table,
|
||||||
|
COALESCE(count_pay, 0) AS count_pay
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id,
|
||||||
|
'action=>1'AS event,
|
||||||
|
events_table.time
|
||||||
|
FROM
|
||||||
|
users_table,
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id AND
|
||||||
|
users_table.user_id >= 10 AND
|
||||||
|
users_table.user_id <= 70 AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
)
|
||||||
|
UNION
|
||||||
|
(SELECT
|
||||||
|
users_table.user_id,
|
||||||
|
'action=>2'AS event,
|
||||||
|
events_table.time
|
||||||
|
FROM
|
||||||
|
users_table,
|
||||||
|
events_table
|
||||||
|
WHERE
|
||||||
|
users_table.user_id = events_table.user_id AND
|
||||||
|
users_table.user_id >= 10 AND
|
||||||
|
users_table.user_id <= 70 AND
|
||||||
|
events_table.event_type > 12 AND events_table.event_type < 14
|
||||||
|
)
|
||||||
|
) AS subquery_1
|
||||||
|
LEFT JOIN
|
||||||
|
(SELECT
|
||||||
|
user_id,
|
||||||
|
COUNT(*) AS count_pay
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
users_table.value_1 > 15 AND users_table.value_1 < 17
|
||||||
|
GROUP BY
|
||||||
|
user_id
|
||||||
|
HAVING
|
||||||
|
COUNT(*) > 1) AS subquery_2
|
||||||
|
ON
|
||||||
|
subquery_1.user_id > subquery_2.user_id
|
||||||
|
GROUP BY
|
||||||
|
subquery_1.user_id,
|
||||||
|
count_pay) AS subquery_top
|
||||||
|
WHERE
|
||||||
|
array_ndims(events_table) > 0
|
||||||
|
GROUP BY
|
||||||
|
count_pay, user_id
|
||||||
|
ORDER BY
|
||||||
|
count_pay;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Most recently seen users_table events_table
|
||||||
|
------------------------------------
|
||||||
|
-- Note that we don't use ORDER BY/LIMIT yet
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- not pushable since lateral join is not an equi join
|
||||||
|
INSERT INTO agg_results_third (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
user_lastseen,
|
||||||
|
array_length(event_array, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
max(u.time) as user_lastseen,
|
||||||
|
array_agg(event_type ORDER BY u.time) AS event_array
|
||||||
|
FROM (
|
||||||
|
|
||||||
|
SELECT user_id, time
|
||||||
|
FROM users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||||
|
|
||||||
|
) u LEFT JOIN LATERAL (
|
||||||
|
SELECT event_type, time
|
||||||
|
FROM events_table
|
||||||
|
WHERE user_id != u.user_id AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
) t ON true
|
||||||
|
GROUP BY user_id
|
||||||
|
) AS shard_union
|
||||||
|
ORDER BY user_lastseen DESC;
|
||||||
|
|
||||||
|
-- not pushable since lateral join is not on the partition key
|
||||||
|
INSERT INTO agg_results_third (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
user_lastseen,
|
||||||
|
array_length(event_array, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
max(u.time) as user_lastseen,
|
||||||
|
array_agg(event_type ORDER BY u.time) AS event_array
|
||||||
|
FROM (
|
||||||
|
|
||||||
|
SELECT user_id, time
|
||||||
|
FROM users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||||
|
|
||||||
|
) u LEFT JOIN LATERAL (
|
||||||
|
SELECT event_type, time
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type = u.user_id AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
) t ON true
|
||||||
|
GROUP BY user_id
|
||||||
|
) AS shard_union
|
||||||
|
ORDER BY user_lastseen DESC;
|
||||||
|
|
||||||
|
-- not pushable since lateral join is not on the partition key
|
||||||
|
INSERT INTO agg_results_third (user_id, agg_time, value_2_agg)
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
user_lastseen,
|
||||||
|
array_length(event_array, 1)
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
max(u.time) as user_lastseen,
|
||||||
|
array_agg(event_type ORDER BY u.time) AS event_array
|
||||||
|
FROM (
|
||||||
|
|
||||||
|
SELECT user_id, time, value_3 as val_3
|
||||||
|
FROM users_table
|
||||||
|
WHERE
|
||||||
|
user_id >= 10 AND
|
||||||
|
user_id <= 70 AND
|
||||||
|
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||||
|
|
||||||
|
) u LEFT JOIN LATERAL (
|
||||||
|
SELECT event_type, time
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type = u.val_3 AND
|
||||||
|
events_table.event_type > 10 AND events_table.event_type < 12
|
||||||
|
) t ON true
|
||||||
|
GROUP BY user_id
|
||||||
|
) AS shard_union
|
||||||
|
ORDER BY user_lastseen DESC;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Count the number of distinct users_table who are in segment X and Y and Z
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
-- not pushable since partition key is NOT IN
|
||||||
|
INSERT INTO agg_results_third (user_id)
|
||||||
|
SELECT DISTINCT user_id
|
||||||
|
FROM users_table
|
||||||
|
WHERE user_id NOT IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||||
|
|
||||||
|
-- not pushable since partition key is not selected from the second subquery
|
||||||
|
INSERT INTO agg_results_third (user_id)
|
||||||
|
SELECT DISTINCT user_id
|
||||||
|
FROM users_table
|
||||||
|
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||||
|
AND user_id IN (SELECT value_1 FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||||
|
|
||||||
|
-- not pushable since second subquery does not return bare partition key
|
||||||
|
INSERT INTO agg_results_third (user_id)
|
||||||
|
SELECT DISTINCT user_id
|
||||||
|
FROM users_table
|
||||||
|
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||||
|
AND user_id IN (SELECT 3 * user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||||
|
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find customers who have done X, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
-- not pushable since join is not an euqi join
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 101 AND value_1 < 110
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND user_id!=users_table.user_id);
|
||||||
|
|
||||||
|
-- not pushable since the join is not on the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 101 AND value_1 < 110
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND event_type = users_table.user_id);
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who haven’t done X, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- not pushable since the join is not an equi join
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 = 101
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id!=users_table.user_id);
|
||||||
|
|
||||||
|
-- not pushable since the join is not the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 = 101
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND event_type=users_table.user_id);
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X and Y, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- not pushable since the second join is not on the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_1 > 100
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type!=100 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id!=users_table.user_id);
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
-- not pushable since the first join is not on the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id, value_2 FROM users_table WHERE
|
||||||
|
value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id!=users_table.user_id)
|
||||||
|
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
-- not pushable since the second join is not an equi join
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id,
|
||||||
|
value_2
|
||||||
|
FROM users_table
|
||||||
|
WHERE value_1 > 100
|
||||||
|
AND value_1 < 124
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type > 100
|
||||||
|
AND event_type < 124
|
||||||
|
AND value_3 > 100
|
||||||
|
AND user_id != users_table.user_id
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING Count(*) > 2);
|
||||||
|
|
||||||
|
-- not pushable since the second join is not on the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id,
|
||||||
|
value_2
|
||||||
|
FROM users_table
|
||||||
|
WHERE value_1 > 100
|
||||||
|
AND value_1 < 124
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type > 100
|
||||||
|
AND event_type < 124
|
||||||
|
AND value_3 > 100
|
||||||
|
AND event_type = users_table.user_id
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING Count(*) > 2);
|
||||||
|
|
||||||
|
-- not pushable since the second join is not on the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||||
|
SELECT user_id,
|
||||||
|
value_2
|
||||||
|
FROM users_table
|
||||||
|
WHERE value_1 > 100
|
||||||
|
AND value_1 < 124
|
||||||
|
AND value_2 >= 5
|
||||||
|
AND EXISTS (SELECT user_id
|
||||||
|
FROM events_table
|
||||||
|
WHERE event_type > 100
|
||||||
|
AND event_type < 124
|
||||||
|
AND value_3 > 100
|
||||||
|
AND user_id = users_table.value_1
|
||||||
|
GROUP BY user_id
|
||||||
|
HAVING Count(*) > 2);
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find me all users_table who has done some event and has filters
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
-- not pushable due to NOT IN
|
||||||
|
INSERT INTO agg_results_third(user_id)
|
||||||
|
Select user_id
|
||||||
|
From events_table
|
||||||
|
Where event_type = 16
|
||||||
|
And value_2 > 50
|
||||||
|
And user_id NOT in
|
||||||
|
(select user_id
|
||||||
|
From users_table
|
||||||
|
Where value_1 = 15
|
||||||
|
And value_2 > 25);
|
||||||
|
|
||||||
|
-- not pushable since we're not selecting the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id)
|
||||||
|
Select user_id
|
||||||
|
From events_table
|
||||||
|
Where event_type = 16
|
||||||
|
And value_2 > 50
|
||||||
|
And user_id in
|
||||||
|
(select value_3
|
||||||
|
From users_table
|
||||||
|
Where value_1 = 15
|
||||||
|
And value_2 > 25);
|
||||||
|
|
||||||
|
-- not pushable since we're not selecting the partition key
|
||||||
|
-- from the events table
|
||||||
|
INSERT INTO agg_results_third(user_id)
|
||||||
|
Select user_id
|
||||||
|
From events_table
|
||||||
|
Where event_type = 16
|
||||||
|
And value_2 > 50
|
||||||
|
And event_type in
|
||||||
|
(select user_id
|
||||||
|
From users_table
|
||||||
|
Where value_1 = 15
|
||||||
|
And value_2 > 25);
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Which events_table did people who has done some specific events_table
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
-- not pushable due to NOT IN
|
||||||
|
INSERT INTO agg_results_third(user_id, value_1_agg)
|
||||||
|
SELECT user_id, event_type FROM events_table
|
||||||
|
WHERE user_id NOT IN (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
|
||||||
|
GROUP BY user_id, event_type;
|
||||||
|
|
||||||
|
-- not pushable due to not selecting the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id, value_1_agg)
|
||||||
|
SELECT user_id, event_type FROM events_table
|
||||||
|
WHERE user_id IN (SELECT value_2 from events_table WHERE event_type > 500 and event_type < 505)
|
||||||
|
GROUP BY user_id, event_type;
|
||||||
|
|
||||||
|
-- not pushable due to not comparing user id from the events table
|
||||||
|
INSERT INTO agg_results_third(user_id, value_1_agg)
|
||||||
|
SELECT user_id, event_type FROM events_table
|
||||||
|
WHERE event_type IN (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
|
||||||
|
GROUP BY user_id, event_type;
|
||||||
|
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
-- Find my assets that have the highest probability and fetch their metadata
|
||||||
|
------------------------------------
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
-- not pushable since the join is not an equi join
|
||||||
|
INSERT INTO agg_results_third(user_id, value_1_agg, value_3_agg)
|
||||||
|
SELECT
|
||||||
|
users_table.user_id, users_table.value_1, prob
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||||||
|
FROM
|
||||||
|
users_table AS ma, events_table as short_list
|
||||||
|
WHERE
|
||||||
|
short_list.user_id != ma.user_id and ma.value_1 < 50 and short_list.event_type < 50
|
||||||
|
) temp
|
||||||
|
ON users_table.user_id = temp.user_id
|
||||||
|
WHERE users_table.value_1 < 50;
|
||||||
|
|
||||||
|
-- not pushable since the join is not on the partition key
|
||||||
|
INSERT INTO agg_results_third(user_id, value_1_agg, value_3_agg)
|
||||||
|
SELECT
|
||||||
|
users_table.user_id, users_table.value_1, prob
|
||||||
|
FROM
|
||||||
|
users_table
|
||||||
|
JOIN
|
||||||
|
(SELECT
|
||||||
|
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||||||
|
FROM
|
||||||
|
users_table AS ma, events_table as short_list
|
||||||
|
WHERE
|
||||||
|
short_list.user_id = ma.value_2 and ma.value_1 < 50 and short_list.event_type < 50
|
||||||
|
) temp
|
||||||
|
ON users_table.user_id = temp.user_id
|
||||||
|
WHERE users_table.value_1 < 50;
|
Loading…
Reference in New Issue