mirror of https://github.com/citusdata/citus.git
Remove uninstantiated qual logic, use attribute equivalences
In this PR, we aim to deduce whether each of the RTE_RELATION is joined with at least on another RTE_RELATION on their partition keys. If each RTE_RELATION follows the above rule, we can conclude that all RTE_RELATIONs are joined on their partition keys. In order to do that, we invented a new equivalence class namely: AttributeEquivalenceClass. In very simple words, a AttributeEquivalenceClass is identified by an unique id and consists of a list of AttributeEquivalenceMembers. Each AttributeEquivalenceMember is designed to identify attributes uniquely within the whole query. The necessity of this arise since varno attributes are defined within a single level of a query. Instead, here we want to identify each RTE_RELATION uniquely and try to find equality among each RTE_RELATION's partition key. Whenever we find an equality clause A = B, where both A and B originates from relation attributes (i.e., not random expressions), we create an AttributeEquivalenceClass to record this knowledge. If we later find another equivalence B = C, we create another AttributeEquivalenceClass. Finally, we can apply transitity rules and generate a new AttributeEquivalenceClass which includes A, B and C. Note that equality among the members are identified by the varattno and rteIdentity. Each equality among RTE_RELATION is saved using an AttributeEquivalenceClass where each member attribute is identified by a AttributeEquivalenceMember. In the final step, we try generate a common attribute equivalence class that holds as much as AttributeEquivalenceMembers whose attributes are a partition keys.pull/1282/head
parent
cfc0992137
commit
3825d4fd77
|
@ -26,11 +26,14 @@
|
|||
#include "executor/executor.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
#include "nodes/nodeFuncs.h"
|
||||
#include "parser/parsetree.h"
|
||||
#include "optimizer/pathnode.h"
|
||||
#include "optimizer/planner.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
|
||||
static List *relationRestrictionContextList = NIL;
|
||||
static List *joinRestrictionContextList = NIL;
|
||||
|
||||
/* create custom scan methods for separate executors */
|
||||
static CustomScanMethods RealTimeCustomScanMethods = {
|
||||
|
@ -57,7 +60,10 @@ static CustomScanMethods DelayedErrorCustomScanMethods = {
|
|||
/* local function forward declarations */
|
||||
static PlannedStmt * CreateDistributedPlan(PlannedStmt *localPlan, Query *originalQuery,
|
||||
Query *query, ParamListInfo boundParams,
|
||||
RelationRestrictionContext *restrictionContext);
|
||||
RelationRestrictionContext *restrictionContext,
|
||||
JoinRestrictionContext *joinRestrictionContext);
|
||||
static void AssignRTEIdentities(Query *queryTree);
|
||||
static void AssignRTEIdentity(RangeTblEntry *rangeTableEntry, int rteIdentifier);
|
||||
static Node * SerializeMultiPlan(struct MultiPlan *multiPlan);
|
||||
static MultiPlan * DeserializeMultiPlan(Node *node);
|
||||
static PlannedStmt * FinalizePlan(PlannedStmt *localPlan, MultiPlan *multiPlan);
|
||||
|
@ -65,9 +71,11 @@ static PlannedStmt * FinalizeNonRouterPlan(PlannedStmt *localPlan, MultiPlan *mu
|
|||
CustomScan *customScan);
|
||||
static PlannedStmt * FinalizeRouterPlan(PlannedStmt *localPlan, CustomScan *customScan);
|
||||
static void CheckNodeIsDumpable(Node *node);
|
||||
static RelationRestrictionContext * CreateAndPushRestrictionContext(void);
|
||||
static List * CopyPlanParamList(List *originalPlanParamList);
|
||||
static void CreateAndPushPlannerContexts(void);
|
||||
static RelationRestrictionContext * CurrentRestrictionContext(void);
|
||||
static void PopRestrictionContext(void);
|
||||
static JoinRestrictionContext * CurrentJoinRestrictionContext(void);
|
||||
static void PopRestrictionContexts(void);
|
||||
static bool HasUnresolvedExternParamsWalker(Node *expression, ParamListInfo boundParams);
|
||||
|
||||
|
||||
|
@ -78,7 +86,8 @@ multi_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
|
|||
PlannedStmt *result = NULL;
|
||||
bool needsDistributedPlanning = NeedsDistributedPlanning(parse);
|
||||
Query *originalQuery = NULL;
|
||||
RelationRestrictionContext *restrictionContext = NULL;
|
||||
RelationRestrictionContext *relationRestrictionContext = NULL;
|
||||
JoinRestrictionContext *joinRestrictionContext = NULL;
|
||||
|
||||
/*
|
||||
* standard_planner scribbles on it's input, but for deparsing we need the
|
||||
|
@ -88,30 +97,14 @@ multi_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
|
|||
{
|
||||
originalQuery = copyObject(parse);
|
||||
|
||||
/*
|
||||
* We implement INSERT INTO .. SELECT by pushing down the SELECT to
|
||||
* each shard. To compute that we use the router planner, by adding
|
||||
* an "uninstantiated" constraint that the partition column be equal to a
|
||||
* certain value. standard_planner() distributes that constraint to
|
||||
* the baserestrictinfos to all the tables where it knows how to push
|
||||
* the restriction safely. An example is that the tables that are
|
||||
* connected via equi joins.
|
||||
*
|
||||
* The router planner then iterates over the target table's shards,
|
||||
* for each we replace the "uninstantiated" restriction, with one that
|
||||
* PruneShardList() handles, and then generate a query for that
|
||||
* individual shard. If any of the involved tables don't prune down
|
||||
* to a single shard, or if the pruned shards aren't colocated,
|
||||
* we error out.
|
||||
*/
|
||||
if (InsertSelectQuery(parse))
|
||||
{
|
||||
AddUninstantiatedPartitionRestriction(parse);
|
||||
}
|
||||
AssignRTEIdentities(parse);
|
||||
}
|
||||
|
||||
/* create a restriction context and put it at the end if context list */
|
||||
restrictionContext = CreateAndPushRestrictionContext();
|
||||
CreateAndPushPlannerContexts();
|
||||
|
||||
relationRestrictionContext = CurrentRestrictionContext();
|
||||
joinRestrictionContext = CurrentJoinRestrictionContext();
|
||||
|
||||
PG_TRY();
|
||||
{
|
||||
|
@ -125,23 +118,75 @@ multi_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
|
|||
if (needsDistributedPlanning)
|
||||
{
|
||||
result = CreateDistributedPlan(result, originalQuery, parse,
|
||||
boundParams, restrictionContext);
|
||||
boundParams, relationRestrictionContext,
|
||||
joinRestrictionContext);
|
||||
}
|
||||
}
|
||||
PG_CATCH();
|
||||
{
|
||||
PopRestrictionContext();
|
||||
PopRestrictionContexts();
|
||||
PG_RE_THROW();
|
||||
}
|
||||
PG_END_TRY();
|
||||
|
||||
/* remove the context from the context list */
|
||||
PopRestrictionContext();
|
||||
PopRestrictionContexts();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* To be able to track individual RTEs through postgres' query
|
||||
* planning, we need to be able to figure out whether an RTE is
|
||||
* actually a copy of another, rather than a different one. We
|
||||
* simply number the RTEs starting from 1.
|
||||
*
|
||||
* Note that we're only interested in RTE_RELATIONs and thus assigning
|
||||
* identifiers to those RTEs only.
|
||||
*/
|
||||
void
|
||||
AssignRTEIdentities(Query *queryTree)
|
||||
{
|
||||
List *rangeTableList = NIL;
|
||||
ListCell *rangeTableCell = NULL;
|
||||
int rteIdentifier = 1;
|
||||
|
||||
/* extract range table entries for simple relations only */
|
||||
ExtractRangeTableEntryWalker((Node *) queryTree, &rangeTableList);
|
||||
|
||||
foreach(rangeTableCell, rangeTableList)
|
||||
{
|
||||
RangeTblEntry *rangeTableEntry = (RangeTblEntry *) lfirst(rangeTableCell);
|
||||
|
||||
if (rangeTableEntry->rtekind != RTE_RELATION)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
AssignRTEIdentity(rangeTableEntry, rteIdentifier++);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* To be able to track RTEs through postgres' query planning, which copies and
|
||||
* duplicate, and modifies them, we sometimes need to figure out whether two
|
||||
* RTEs are copies of the same original RTE. For that we, hackishly, use a
|
||||
* field normally unused in RTE_RELATION RTEs.
|
||||
*
|
||||
* The assigned identifier better be unique within a plantree.
|
||||
*/
|
||||
static void
|
||||
AssignRTEIdentity(RangeTblEntry *rangeTableEntry, int rteIdentifier)
|
||||
{
|
||||
Assert(rangeTableEntry->rtekind == RTE_RELATION);
|
||||
Assert(rangeTableEntry->values_lists == NIL);
|
||||
|
||||
rangeTableEntry->values_lists = list_make1_int(rteIdentifier);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IsModifyCommand returns true if the query performs modifications, false
|
||||
* otherwise.
|
||||
|
@ -187,7 +232,8 @@ IsModifyMultiPlan(MultiPlan *multiPlan)
|
|||
static PlannedStmt *
|
||||
CreateDistributedPlan(PlannedStmt *localPlan, Query *originalQuery, Query *query,
|
||||
ParamListInfo boundParams,
|
||||
RelationRestrictionContext *restrictionContext)
|
||||
RelationRestrictionContext *restrictionContext,
|
||||
JoinRestrictionContext *joinRestrictionContext)
|
||||
{
|
||||
MultiPlan *distributedPlan = NULL;
|
||||
PlannedStmt *resultPlan = NULL;
|
||||
|
@ -201,7 +247,9 @@ CreateDistributedPlan(PlannedStmt *localPlan, Query *originalQuery, Query *query
|
|||
if (IsModifyCommand(query))
|
||||
{
|
||||
/* modifications are always routed through the same planner/executor */
|
||||
distributedPlan = CreateModifyPlan(originalQuery, query, restrictionContext);
|
||||
distributedPlan = CreateModifyPlan(originalQuery, query, restrictionContext,
|
||||
joinRestrictionContext);
|
||||
|
||||
Assert(distributedPlan);
|
||||
}
|
||||
else
|
||||
|
@ -566,6 +614,37 @@ CheckNodeIsDumpable(Node *node)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* multi_join_restriction_hook is a hook called by postgresql standard planner
|
||||
* to notify us about various planning information regarding joins. We use
|
||||
* it to learn about the joining column.
|
||||
*/
|
||||
void
|
||||
multi_join_restriction_hook(PlannerInfo *root,
|
||||
RelOptInfo *joinrel,
|
||||
RelOptInfo *outerrel,
|
||||
RelOptInfo *innerrel,
|
||||
JoinType jointype,
|
||||
JoinPathExtraData *extra)
|
||||
{
|
||||
JoinRestrictionContext *joinContext = NULL;
|
||||
JoinRestriction *joinRestriction = palloc0(sizeof(JoinRestriction));
|
||||
List *restrictInfoList = NIL;
|
||||
|
||||
restrictInfoList = extra->restrictlist;
|
||||
joinContext = CurrentJoinRestrictionContext();
|
||||
Assert(joinContext != NULL);
|
||||
|
||||
joinRestriction->joinType = jointype;
|
||||
joinRestriction->joinRestrictInfoList = restrictInfoList;
|
||||
joinRestriction->plannerInfo = root;
|
||||
|
||||
|
||||
joinContext->joinRestrictionList =
|
||||
lappend(joinContext->joinRestrictionList, joinRestriction);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* multi_relation_restriction_hook is a hook called by postgresql standard planner
|
||||
* to notify us about various planning information regarding a relation. We use
|
||||
|
@ -599,8 +678,16 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo, Index
|
|||
relationRestriction->relOptInfo = relOptInfo;
|
||||
relationRestriction->distributedRelation = distributedTable;
|
||||
relationRestriction->plannerInfo = root;
|
||||
relationRestriction->parentPlannerInfo = root->parent_root;
|
||||
relationRestriction->prunedShardIntervalList = NIL;
|
||||
|
||||
/* see comments on GetVarFromAssignedParam() */
|
||||
if (relationRestriction->parentPlannerInfo)
|
||||
{
|
||||
relationRestriction->parentPlannerParamList =
|
||||
CopyPlanParamList(root->parent_root->plan_params);
|
||||
}
|
||||
|
||||
restrictionContext->hasDistributedRelation |= distributedTable;
|
||||
restrictionContext->hasLocalRelation |= localTable;
|
||||
|
||||
|
@ -621,23 +708,65 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo, Index
|
|||
}
|
||||
|
||||
|
||||
/* GetRTEIdentity returns the identity assigned with AssignRTEIdentity. */
|
||||
int
|
||||
GetRTEIdentity(RangeTblEntry *rte)
|
||||
{
|
||||
Assert(rte->rtekind == RTE_RELATION);
|
||||
Assert(IsA(rte->values_lists, IntList));
|
||||
Assert(list_length(rte->values_lists) == 1);
|
||||
|
||||
return linitial_int(rte->values_lists);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CreateAndPushRestrictionContext creates a new restriction context, inserts it to the
|
||||
* beginning of the context list, and returns the newly created context.
|
||||
* CopyPlanParamList deep copies the input PlannerParamItem list and returns the newly
|
||||
* allocated list.
|
||||
* Note that we cannot use copyObject() function directly since there is no support for
|
||||
* copying PlannerParamItem structs.
|
||||
*/
|
||||
static RelationRestrictionContext *
|
||||
CreateAndPushRestrictionContext(void)
|
||||
static List *
|
||||
CopyPlanParamList(List *originalPlanParamList)
|
||||
{
|
||||
ListCell *planParamCell = NULL;
|
||||
List *copiedPlanParamList = NIL;
|
||||
|
||||
foreach(planParamCell, originalPlanParamList)
|
||||
{
|
||||
PlannerParamItem *originalParamItem = lfirst(planParamCell);
|
||||
PlannerParamItem *copiedParamItem = makeNode(PlannerParamItem);
|
||||
|
||||
copiedParamItem->paramId = originalParamItem->paramId;
|
||||
copiedParamItem->item = copyObject(originalParamItem->item);
|
||||
|
||||
copiedPlanParamList = lappend(copiedPlanParamList, copiedParamItem);
|
||||
}
|
||||
|
||||
return copiedPlanParamList;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* CreateAndPushPlannerContextes creates a new restriction context and a new join context,
|
||||
* inserts it to the beginning of the respective context lists.
|
||||
*/
|
||||
static void
|
||||
CreateAndPushPlannerContexts(void)
|
||||
{
|
||||
RelationRestrictionContext *restrictionContext =
|
||||
palloc0(sizeof(RelationRestrictionContext));
|
||||
|
||||
JoinRestrictionContext *joinContext =
|
||||
palloc0(sizeof(JoinRestrictionContext));
|
||||
|
||||
/* we'll apply logical AND as we add tables */
|
||||
restrictionContext->allReferenceTables = true;
|
||||
|
||||
relationRestrictionContextList = lcons(restrictionContext,
|
||||
relationRestrictionContextList);
|
||||
|
||||
return restrictionContext;
|
||||
joinRestrictionContextList = lcons(joinContext,
|
||||
joinRestrictionContextList);
|
||||
}
|
||||
|
||||
|
||||
|
@ -660,13 +789,31 @@ CurrentRestrictionContext(void)
|
|||
|
||||
|
||||
/*
|
||||
* PopRestrictionContext removes the most recently added restriction context from
|
||||
* context list. The function assumes the list is not empty.
|
||||
* CurrentRestrictionContext returns the the last restriction context from the
|
||||
* list.
|
||||
*/
|
||||
static JoinRestrictionContext *
|
||||
CurrentJoinRestrictionContext(void)
|
||||
{
|
||||
JoinRestrictionContext *joinContext = NULL;
|
||||
|
||||
Assert(joinRestrictionContextList != NIL);
|
||||
|
||||
joinContext = (JoinRestrictionContext *) linitial(joinRestrictionContextList);
|
||||
|
||||
return joinContext;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* PopRestrictionContexts removes the most recently added restriction contexts from
|
||||
* the restriction and join context lists. The function assumes the lists are not empty.
|
||||
*/
|
||||
static void
|
||||
PopRestrictionContext(void)
|
||||
PopRestrictionContexts(void)
|
||||
{
|
||||
relationRestrictionContextList = list_delete_first(relationRestrictionContextList);
|
||||
joinRestrictionContextList = list_delete_first(joinRestrictionContextList);
|
||||
}
|
||||
|
||||
|
||||
|
@ -694,12 +841,6 @@ HasUnresolvedExternParamsWalker(Node *expression, ParamListInfo boundParams)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* don't care about our special parameter, it'll be removed during planning */
|
||||
if (paramId == UNINSTANTIATED_PARAMETER_ID)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/* check whether parameter is available (and valid) */
|
||||
if (boundParams && paramId > 0 && paramId <= boundParams->numParams)
|
||||
{
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -148,6 +148,7 @@ _PG_init(void)
|
|||
|
||||
/* register for planner hook */
|
||||
set_rel_pathlist_hook = multi_relation_restriction_hook;
|
||||
set_join_pathlist_hook = multi_join_restriction_hook;
|
||||
|
||||
/* organize that task tracker is started once server is up */
|
||||
TaskTrackerRegister();
|
||||
|
|
|
@ -37,9 +37,23 @@ typedef struct RelationRestriction
|
|||
RangeTblEntry *rte;
|
||||
RelOptInfo *relOptInfo;
|
||||
PlannerInfo *plannerInfo;
|
||||
PlannerInfo *parentPlannerInfo;
|
||||
List *parentPlannerParamList;
|
||||
List *prunedShardIntervalList;
|
||||
} RelationRestriction;
|
||||
|
||||
typedef struct JoinRestrictionContext
|
||||
{
|
||||
List *joinRestrictionList;
|
||||
} JoinRestrictionContext;
|
||||
|
||||
typedef struct JoinRestriction
|
||||
{
|
||||
JoinType joinType;
|
||||
List *joinRestrictInfoList;
|
||||
PlannerInfo *plannerInfo;
|
||||
} JoinRestriction;
|
||||
|
||||
typedef struct RelationShard
|
||||
{
|
||||
CitusNode type;
|
||||
|
@ -47,6 +61,42 @@ typedef struct RelationShard
|
|||
uint64 shardId;
|
||||
} RelationShard;
|
||||
|
||||
/*
|
||||
* AttributeEquivalenceClass
|
||||
*
|
||||
* Whenever we find an equality clause A = B, where both A and B originates from
|
||||
* relation attributes (i.e., not random expressions), we create an
|
||||
* AttributeEquivalenceClass to record this knowledge. If we later find another
|
||||
* equivalence B = C, we create another AttributeEquivalenceClass. Finally, we can
|
||||
* apply transitity rules and generate a new AttributeEquivalenceClass which includes
|
||||
* A, B and C.
|
||||
*
|
||||
* Note that equality among the members are identified by the varattno and rteIdentity.
|
||||
*/
|
||||
typedef struct AttributeEquivalenceClass
|
||||
{
|
||||
uint32 equivalenceId;
|
||||
List *equivalentAttributes;
|
||||
} AttributeEquivalenceClass;
|
||||
|
||||
/*
|
||||
* AttributeEquivalenceClassMember - one member expression of an
|
||||
* AttributeEquivalenceClassMember. The important thing to consider is that
|
||||
* the class member contains "rteIndentity" field. Note that each RTE_RELATION
|
||||
* is assigned a unique rteIdentity in AssignRTEIdentities() function.
|
||||
*
|
||||
* "varno" and "varattrno" is directly used from a Var clause that is being added
|
||||
* to the attribute equivalence. Since we only use this class for relations, the member
|
||||
* also includes the relation id field.
|
||||
*/
|
||||
typedef struct AttributeEquivalenceClassMember
|
||||
{
|
||||
Index varno;
|
||||
AttrNumber varattno;
|
||||
Oid relationId;
|
||||
int rteIdendity;
|
||||
} AttributeEquivalenceClassMember;
|
||||
|
||||
|
||||
extern PlannedStmt * multi_planner(Query *parse, int cursorOptions,
|
||||
ParamListInfo boundParams);
|
||||
|
@ -55,9 +105,17 @@ struct MultiPlan;
|
|||
extern struct MultiPlan * GetMultiPlan(CustomScan *node);
|
||||
extern void multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo,
|
||||
Index index, RangeTblEntry *rte);
|
||||
extern void multi_join_restriction_hook(PlannerInfo *root,
|
||||
RelOptInfo *joinrel,
|
||||
RelOptInfo *outerrel,
|
||||
RelOptInfo *innerrel,
|
||||
JoinType jointype,
|
||||
JoinPathExtraData *extra);
|
||||
extern bool IsModifyCommand(Query *query);
|
||||
extern bool IsModifyMultiPlan(struct MultiPlan *multiPlan);
|
||||
extern RangeTblEntry * RemoteScanRangeTableEntry(List *columnNameList);
|
||||
|
||||
|
||||
extern int GetRTEIdentity(RangeTblEntry *rte);
|
||||
|
||||
#endif /* MULTI_PLANNER_H */
|
||||
|
|
|
@ -21,9 +21,6 @@
|
|||
#include "nodes/parsenodes.h"
|
||||
|
||||
|
||||
/* reserved parameted id, we chose a negative number since it is not assigned by postgres */
|
||||
#define UNINSTANTIATED_PARAMETER_ID INT_MIN
|
||||
|
||||
/* reserved alias name for UPSERTs */
|
||||
#define CITUS_TABLE_ALIAS "citus_table_alias"
|
||||
|
||||
|
@ -32,9 +29,9 @@ extern bool EnableRouterExecution;
|
|||
extern MultiPlan * CreateRouterPlan(Query *originalQuery, Query *query,
|
||||
RelationRestrictionContext *restrictionContext);
|
||||
extern MultiPlan * CreateModifyPlan(Query *originalQuery, Query *query,
|
||||
RelationRestrictionContext *restrictionContext);
|
||||
RelationRestrictionContext *restrictionContext,
|
||||
JoinRestrictionContext *joinRestrictionContext);
|
||||
|
||||
extern void AddUninstantiatedPartitionRestriction(Query *originalQuery);
|
||||
extern DeferredErrorMessage * ModifyQuerySupported(Query *queryTree);
|
||||
extern Query * ReorderInsertSelectTargetLists(Query *originalQuery,
|
||||
RangeTblEntry *insertRte,
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -35,6 +35,13 @@ SELECT create_reference_table('reference_table');
|
|||
|
||||
(1 row)
|
||||
|
||||
CREATE TABLE insert_select_varchar_test (key varchar, value int);
|
||||
SELECT create_distributed_table('insert_select_varchar_test', 'key', 'hash');
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- set back to the defaults
|
||||
SET citus.shard_count = DEFAULT;
|
||||
SET citus.shard_replication_factor = DEFAULT;
|
||||
|
@ -1125,13 +1132,14 @@ SELECT
|
|||
FROM
|
||||
((SELECT user_id FROM raw_events_first) UNION
|
||||
(SELECT user_id FROM raw_events_second)) as foo;
|
||||
ERROR: set operations are not allowed in INSERT ... SELECT queries
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- We do not support any set operations
|
||||
INSERT INTO
|
||||
raw_events_first(user_id)
|
||||
(SELECT user_id FROM raw_events_first) INTERSECT
|
||||
(SELECT user_id FROM raw_events_first);
|
||||
ERROR: set operations are not allowed in INSERT ... SELECT queries
|
||||
ERROR: INTERSECT and EXCEPT set operations are not allowed in INSERT ... SELECT queries
|
||||
-- We do not support any set operations
|
||||
INSERT INTO
|
||||
raw_events_first(user_id)
|
||||
|
@ -1140,7 +1148,7 @@ SELECT
|
|||
FROM
|
||||
((SELECT user_id FROM raw_events_first WHERE user_id = 15) EXCEPT
|
||||
(SELECT user_id FROM raw_events_second where user_id = 17)) as foo;
|
||||
ERROR: set operations are not allowed in INSERT ... SELECT queries
|
||||
ERROR: INTERSECT and EXCEPT set operations are not allowed in INSERT ... SELECT queries
|
||||
-- some supported LEFT joins
|
||||
INSERT INTO agg_events (user_id)
|
||||
SELECT
|
||||
|
@ -1406,35 +1414,8 @@ DEBUG: Plan is router executable
|
|||
raw_events_second
|
||||
WHERE raw_events_second.user_id = raw_events_first.value_1
|
||||
AND raw_events_first.value_1 = 12;
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM public.raw_events_first_13300000 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (((raw_events_second.user_id = raw_events_first.value_1) AND (raw_events_first.value_1 = 12)) AND ((hashint4(raw_events_first.user_id) >= '-2147483648'::integer) AND (hashint4(raw_events_first.user_id) <= '-1073741825'::integer)))
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM public.raw_events_first_13300001 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (((raw_events_second.user_id = raw_events_first.value_1) AND (raw_events_first.value_1 = 12)) AND ((hashint4(raw_events_first.user_id) >= '-1073741824'::integer) AND (hashint4(raw_events_first.user_id) <= '-1'::integer)))
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM public.raw_events_first_13300002 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (((raw_events_second.user_id = raw_events_first.value_1) AND (raw_events_first.value_1 = 12)) AND ((hashint4(raw_events_first.user_id) >= 0) AND (hashint4(raw_events_first.user_id) <= 1073741823)))
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id) SELECT raw_events_first.user_id FROM public.raw_events_first_13300003 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (((raw_events_second.user_id = raw_events_first.value_1) AND (raw_events_first.value_1 = 12)) AND ((hashint4(raw_events_first.user_id) >= 1073741824) AND (hashint4(raw_events_first.user_id) <= 2147483647)))
|
||||
DEBUG: Plan is router executable
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
|
||||
-- some unsupported LEFT/INNER JOINs
|
||||
-- JOIN on one table with partition column other is not
|
||||
|
@ -1443,9 +1424,6 @@ DEBUG: Plan is router executable
|
|||
raw_events_first.user_id
|
||||
FROM
|
||||
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1;
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
|
||||
|
@ -1455,9 +1433,6 @@ DETAIL: Select query cannot be pushed down to the worker.
|
|||
raw_events_first.user_id
|
||||
FROM
|
||||
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1;
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
|
||||
|
@ -1477,9 +1452,6 @@ DETAIL: Select query cannot be pushed down to the worker.
|
|||
raw_events_first.user_id
|
||||
FROM
|
||||
raw_events_first LEFT JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
|
||||
|
@ -1489,9 +1461,6 @@ DETAIL: Select query cannot be pushed down to the worker.
|
|||
raw_events_first.user_id
|
||||
FROM
|
||||
raw_events_first INNER JOIN raw_events_second ON raw_events_first.value_1 = raw_events_second.value_1;
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
|
||||
|
@ -1530,9 +1499,6 @@ DETAIL: Select query cannot be pushed down to the worker.
|
|||
FROM
|
||||
raw_events_first INNER JOIN raw_events_second ON raw_events_first.user_id = raw_events_second.value_1
|
||||
WHERE raw_events_first.value_1 IN (10, 11,12) OR raw_events_second.user_id IN (1,2,3,4);
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
|
||||
|
@ -1543,9 +1509,6 @@ DETAIL: Select query cannot be pushed down to the worker.
|
|||
FROM raw_events_first,
|
||||
raw_events_second
|
||||
WHERE raw_events_second.user_id = raw_events_first.value_1;
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
|
||||
|
@ -1559,9 +1522,6 @@ DETAIL: Select query cannot be pushed down to the worker.
|
|||
raw_events_second
|
||||
WHERE raw_events_second.user_id = raw_events_first.value_1
|
||||
AND raw_events_first.value_2 = 12;
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
|
||||
|
@ -1596,12 +1556,6 @@ DETAIL: Select query cannot be pushed down to the worker.
|
|||
ON (f.id = f2.id)) as outer_most
|
||||
GROUP BY
|
||||
outer_most.id;
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
INSERT INTO agg_events
|
||||
|
@ -1793,12 +1747,6 @@ outer_most.id, max(outer_most.value)
|
|||
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||
ON (f.id != f2.id)) as outer_most
|
||||
GROUP BY outer_most.id;
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- cannot pushdown since foo2 is not join on partition key
|
||||
|
@ -1861,12 +1809,6 @@ FROM
|
|||
ON (f.id = f2.id)) as outer_most
|
||||
GROUP BY
|
||||
outer_most.id;
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- some unsupported LATERAL JOINs
|
||||
|
@ -1925,12 +1867,6 @@ FROM
|
|||
JOIN LATERAL
|
||||
(SELECT user_id, value_4 FROM agg_events) as agg_ids ON (agg_ids.value_4 = averages.user_id)
|
||||
GROUP BY averages.user_id;
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- not supported subqueries in WHERE clause
|
||||
|
@ -1942,9 +1878,6 @@ SELECT user_id
|
|||
FROM raw_events_first
|
||||
WHERE user_id IN (SELECT value_1
|
||||
FROM raw_events_second);
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- same as above but slightly more complex
|
||||
|
@ -1973,6 +1906,17 @@ FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
|||
ON (f.id = f2.id)
|
||||
WHERE f.id IN (SELECT value_1
|
||||
FROM raw_events_second);
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- some more semi-anti join tests
|
||||
-- join in where
|
||||
INSERT INTO raw_events_second
|
||||
(user_id)
|
||||
SELECT user_id
|
||||
FROM raw_events_first
|
||||
WHERE user_id IN (SELECT raw_events_second.user_id
|
||||
FROM raw_events_second, raw_events_first
|
||||
WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200);
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
|
@ -1982,6 +1926,389 @@ DEBUG: predicate pruning for shardId 13300007
|
|||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300000 raw_events_first WHERE ((user_id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300004 raw_events_second, public.raw_events_first_13300000 raw_events_first_1 WHERE ((raw_events_second.user_id = raw_events_first_1.user_id) AND (raw_events_first_1.user_id = 200)))) AND ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)))
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: Skipping target shard interval 13300005 since SELECT query for it pruned away
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: Skipping target shard interval 13300006 since SELECT query for it pruned away
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: Skipping target shard interval 13300007 since SELECT query for it pruned away
|
||||
DEBUG: Plan is router executable
|
||||
-- we cannot push this down since it is NOT IN
|
||||
INSERT INTO raw_events_second
|
||||
(user_id)
|
||||
SELECT user_id
|
||||
FROM raw_events_first
|
||||
WHERE user_id NOT IN (SELECT raw_events_second.user_id
|
||||
FROM raw_events_second, raw_events_first
|
||||
WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200);
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- safe to push down
|
||||
INSERT INTO raw_events_second
|
||||
(user_id)
|
||||
SELECT user_id
|
||||
FROM raw_events_first
|
||||
WHERE EXISTS (SELECT 1
|
||||
FROM raw_events_second
|
||||
WHERE raw_events_second.user_id =raw_events_first.user_id);
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300000 raw_events_first WHERE ((EXISTS (SELECT 1 FROM public.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id = raw_events_first.user_id))) AND ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)))
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300001 raw_events_first WHERE ((EXISTS (SELECT 1 FROM public.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id = raw_events_first.user_id))) AND ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer)))
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300002 raw_events_first WHERE ((EXISTS (SELECT 1 FROM public.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id = raw_events_first.user_id))) AND ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823)))
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((EXISTS (SELECT 1 FROM public.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id = raw_events_first.user_id))) AND ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)))
|
||||
DEBUG: Plan is router executable
|
||||
-- we cannot push down
|
||||
INSERT INTO raw_events_second
|
||||
(user_id)
|
||||
SELECT user_id
|
||||
FROM raw_events_first
|
||||
WHERE NOT EXISTS (SELECT 1
|
||||
FROM raw_events_second
|
||||
WHERE raw_events_second.user_id =raw_events_first.user_id);
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300000 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM public.raw_events_second_13300004 raw_events_second WHERE (raw_events_second.user_id = raw_events_first.user_id)))) AND ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)))
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300001 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM public.raw_events_second_13300005 raw_events_second WHERE (raw_events_second.user_id = raw_events_first.user_id)))) AND ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer)))
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300002 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM public.raw_events_second_13300006 raw_events_second WHERE (raw_events_second.user_id = raw_events_first.user_id)))) AND ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823)))
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((NOT (EXISTS (SELECT 1 FROM public.raw_events_second_13300007 raw_events_second WHERE (raw_events_second.user_id = raw_events_first.user_id)))) AND ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)))
|
||||
DEBUG: Plan is router executable
|
||||
-- more complex LEFT JOINs
|
||||
INSERT INTO agg_events
|
||||
(user_id, value_4_agg)
|
||||
SELECT
|
||||
outer_most.id, max(outer_most.value)
|
||||
FROM
|
||||
(
|
||||
SELECT f2.id as id, f2.v4 as value FROM
|
||||
(SELECT
|
||||
id
|
||||
FROM (SELECT raw_events_first.user_id AS id
|
||||
FROM raw_events_first LEFT JOIN
|
||||
reference_table
|
||||
ON (raw_events_first.user_id = reference_table.user_id)) AS foo) as f
|
||||
LEFT JOIN
|
||||
(SELECT v4,
|
||||
v1,
|
||||
id
|
||||
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||
SUM(raw_events_first.value_1) AS v1,
|
||||
raw_events_second.user_id AS id
|
||||
FROM raw_events_first,
|
||||
raw_events_second
|
||||
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||
GROUP BY raw_events_second.user_id
|
||||
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||
ON (f.id = f2.id)) as outer_most
|
||||
GROUP BY
|
||||
outer_most.id;
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: distributed statement: INSERT INTO public.agg_events_13300008 AS citus_table_alias (user_id, value_4_agg) SELECT id, max(value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (public.raw_events_first_13300000 raw_events_first LEFT JOIN public.reference_table_13300012 reference_table ON ((raw_events_first.user_id = reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300000 raw_events_first, public.raw_events_second_13300004 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id)))) outer_most WHERE ((hashint4(id) >= '-2147483648'::integer) AND (hashint4(id) <= '-1073741825'::integer)) GROUP BY id
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: distributed statement: INSERT INTO public.agg_events_13300009 AS citus_table_alias (user_id, value_4_agg) SELECT id, max(value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (public.raw_events_first_13300001 raw_events_first LEFT JOIN public.reference_table_13300012 reference_table ON ((raw_events_first.user_id = reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300001 raw_events_first, public.raw_events_second_13300005 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id)))) outer_most WHERE ((hashint4(id) >= '-1073741824'::integer) AND (hashint4(id) <= '-1'::integer)) GROUP BY id
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: distributed statement: INSERT INTO public.agg_events_13300010 AS citus_table_alias (user_id, value_4_agg) SELECT id, max(value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (public.raw_events_first_13300002 raw_events_first LEFT JOIN public.reference_table_13300012 reference_table ON ((raw_events_first.user_id = reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300002 raw_events_first, public.raw_events_second_13300006 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id)))) outer_most WHERE ((hashint4(id) >= 0) AND (hashint4(id) <= 1073741823)) GROUP BY id
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: distributed statement: INSERT INTO public.agg_events_13300011 AS citus_table_alias (user_id, value_4_agg) SELECT id, max(value) AS max FROM (SELECT f2.id, f2.v4 AS value FROM ((SELECT foo.id FROM (SELECT raw_events_first.user_id AS id FROM (public.raw_events_first_13300003 raw_events_first LEFT JOIN public.reference_table_13300012 reference_table ON ((raw_events_first.user_id = reference_table.user_id)))) foo) f LEFT JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300003 raw_events_first, public.raw_events_second_13300007 raw_events_second WHERE (raw_events_first.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id)))) outer_most WHERE ((hashint4(id) >= 1073741824) AND (hashint4(id) <= 2147483647)) GROUP BY id
|
||||
DEBUG: Plan is router executable
|
||||
-- cannot push down since the f.id IN is matched with value_1
|
||||
INSERT INTO raw_events_second
|
||||
(user_id)
|
||||
SELECT user_id
|
||||
FROM raw_events_first
|
||||
WHERE user_id IN (
|
||||
SELECT f2.id FROM
|
||||
(SELECT
|
||||
id
|
||||
FROM (SELECT reference_table.user_id AS id
|
||||
FROM raw_events_first,
|
||||
reference_table
|
||||
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
||||
INNER JOIN
|
||||
(SELECT v4,
|
||||
v1,
|
||||
id
|
||||
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||
SUM(raw_events_first.value_1) AS v1,
|
||||
raw_events_second.user_id AS id
|
||||
FROM raw_events_first,
|
||||
raw_events_second
|
||||
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||
GROUP BY raw_events_second.user_id
|
||||
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||
ON (f.id = f2.id)
|
||||
WHERE f.id IN (SELECT value_1
|
||||
FROM raw_events_second));
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- same as above, but this time is it safe to push down since
|
||||
-- f.id IN is matched with user_id
|
||||
INSERT INTO raw_events_second
|
||||
(user_id)
|
||||
SELECT user_id
|
||||
FROM raw_events_first
|
||||
WHERE user_id IN (
|
||||
SELECT f2.id FROM
|
||||
(SELECT
|
||||
id
|
||||
FROM (SELECT reference_table.user_id AS id
|
||||
FROM raw_events_first,
|
||||
reference_table
|
||||
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
||||
INNER JOIN
|
||||
(SELECT v4,
|
||||
v1,
|
||||
id
|
||||
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||
SUM(raw_events_first.value_1) AS v1,
|
||||
raw_events_second.user_id AS id
|
||||
FROM raw_events_first,
|
||||
raw_events_second
|
||||
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||
GROUP BY raw_events_second.user_id
|
||||
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||
ON (f.id = f2.id)
|
||||
WHERE f.id IN (SELECT user_id
|
||||
FROM raw_events_second));
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300004 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300000 raw_events_first WHERE ((user_id IN (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300000 raw_events_first_1, public.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300000 raw_events_first_1, public.raw_events_second_13300004 raw_events_second WHERE (raw_events_first_1.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id))) WHERE (f.id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300004 raw_events_second)))) AND ((hashint4(user_id) >= '-2147483648'::integer) AND (hashint4(user_id) <= '-1073741825'::integer)))
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300005 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300001 raw_events_first WHERE ((user_id IN (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300001 raw_events_first_1, public.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300001 raw_events_first_1, public.raw_events_second_13300005 raw_events_second WHERE (raw_events_first_1.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id))) WHERE (f.id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300005 raw_events_second)))) AND ((hashint4(user_id) >= '-1073741824'::integer) AND (hashint4(user_id) <= '-1'::integer)))
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300003
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300007
|
||||
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300006 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300002 raw_events_first WHERE ((user_id IN (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300002 raw_events_first_1, public.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300002 raw_events_first_1, public.raw_events_second_13300006 raw_events_second WHERE (raw_events_first_1.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id))) WHERE (f.id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300006 raw_events_second)))) AND ((hashint4(user_id) >= 0) AND (hashint4(user_id) <= 1073741823)))
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300000
|
||||
DEBUG: predicate pruning for shardId 13300001
|
||||
DEBUG: predicate pruning for shardId 13300002
|
||||
DEBUG: predicate pruning for shardId 13300004
|
||||
DEBUG: predicate pruning for shardId 13300005
|
||||
DEBUG: predicate pruning for shardId 13300006
|
||||
DEBUG: distributed statement: INSERT INTO public.raw_events_second_13300007 AS citus_table_alias (user_id) SELECT user_id FROM public.raw_events_first_13300003 raw_events_first WHERE ((user_id IN (SELECT f2.id FROM ((SELECT foo.id FROM (SELECT reference_table.user_id AS id FROM public.raw_events_first_13300003 raw_events_first_1, public.reference_table_13300012 reference_table WHERE (raw_events_first_1.user_id = reference_table.user_id)) foo) f JOIN (SELECT foo2.v4, foo2.v1, foo2.id FROM (SELECT sum(raw_events_second.value_4) AS v4, sum(raw_events_first_1.value_1) AS v1, raw_events_second.user_id AS id FROM public.raw_events_first_13300003 raw_events_first_1, public.raw_events_second_13300007 raw_events_second WHERE (raw_events_first_1.user_id = raw_events_second.user_id) GROUP BY raw_events_second.user_id HAVING (sum(raw_events_second.value_4) > (10)::numeric)) foo2) f2 ON ((f.id = f2.id))) WHERE (f.id IN (SELECT raw_events_second.user_id FROM public.raw_events_second_13300007 raw_events_second)))) AND ((hashint4(user_id) >= 1073741824) AND (hashint4(user_id) <= 2147483647)))
|
||||
DEBUG: Plan is router executable
|
||||
-- cannot push down since top level user_id is matched with NOT IN
|
||||
INSERT INTO raw_events_second
|
||||
(user_id)
|
||||
SELECT user_id
|
||||
FROM raw_events_first
|
||||
WHERE user_id NOT IN (
|
||||
SELECT f2.id FROM
|
||||
(SELECT
|
||||
id
|
||||
FROM (SELECT reference_table.user_id AS id
|
||||
FROM raw_events_first,
|
||||
reference_table
|
||||
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
||||
INNER JOIN
|
||||
(SELECT v4,
|
||||
v1,
|
||||
id
|
||||
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||
SUM(raw_events_first.value_1) AS v1,
|
||||
raw_events_second.user_id AS id
|
||||
FROM raw_events_first,
|
||||
raw_events_second
|
||||
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||
GROUP BY raw_events_second.user_id
|
||||
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||
ON (f.id = f2.id)
|
||||
WHERE f.id IN (SELECT user_id
|
||||
FROM raw_events_second));
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- cannot push down since join is not equi join (f.id > f2.id)
|
||||
INSERT INTO raw_events_second
|
||||
(user_id)
|
||||
SELECT user_id
|
||||
FROM raw_events_first
|
||||
WHERE user_id IN (
|
||||
SELECT f2.id FROM
|
||||
(SELECT
|
||||
id
|
||||
FROM (SELECT reference_table.user_id AS id
|
||||
FROM raw_events_first,
|
||||
reference_table
|
||||
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
||||
INNER JOIN
|
||||
(SELECT v4,
|
||||
v1,
|
||||
id
|
||||
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||
SUM(raw_events_first.value_1) AS v1,
|
||||
raw_events_second.user_id AS id
|
||||
FROM raw_events_first,
|
||||
raw_events_second
|
||||
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||
GROUP BY raw_events_second.user_id
|
||||
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||
ON (f.id > f2.id)
|
||||
WHERE f.id IN (SELECT user_id
|
||||
FROM raw_events_second));
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- we currently not support grouping sets
|
||||
|
@ -2054,7 +2381,7 @@ INSERT INTO raw_events_first (user_id, time, value_1, value_2, value_3, value_4)
|
|||
SELECT count(*) FROM raw_events_second;
|
||||
count
|
||||
-------
|
||||
9
|
||||
18
|
||||
(1 row)
|
||||
|
||||
INSERT INTO raw_events_second SELECT * FROM test_view;
|
||||
|
@ -2064,7 +2391,7 @@ INSERT INTO raw_events_second SELECT * FROM test_view WHERE user_id = 17 GROUP B
|
|||
SELECT count(*) FROM raw_events_second;
|
||||
count
|
||||
-------
|
||||
11
|
||||
20
|
||||
(1 row)
|
||||
|
||||
-- inserting into views does not
|
||||
|
@ -2210,6 +2537,32 @@ DEBUG: predicate pruning for shardId 13300007
|
|||
DEBUG: Skipping target shard interval 13300003 since SELECT query for it pruned away
|
||||
DEBUG: Plan is router executable
|
||||
SET client_min_messages TO INFO;
|
||||
-- now do some tests with varchars
|
||||
INSERT INTO insert_select_varchar_test VALUES ('test_1', 10);
|
||||
INSERT INTO insert_select_varchar_test VALUES ('test_2', 30);
|
||||
INSERT INTO insert_select_varchar_test (key, value)
|
||||
SELECT *, 100
|
||||
FROM (SELECT f1.key
|
||||
FROM (SELECT key
|
||||
FROM insert_select_varchar_test
|
||||
GROUP BY 1
|
||||
HAVING Count(key) < 3) AS f1,
|
||||
(SELECT key
|
||||
FROM insert_select_varchar_test
|
||||
GROUP BY 1
|
||||
HAVING Sum(COALESCE(insert_select_varchar_test.value, 0)) >
|
||||
20.0)
|
||||
AS f2
|
||||
WHERE f1.key = f2.key
|
||||
GROUP BY 1) AS foo;
|
||||
SELECT * FROM insert_select_varchar_test;
|
||||
key | value
|
||||
--------+-------
|
||||
test_2 | 30
|
||||
test_2 | 100
|
||||
test_1 | 10
|
||||
(3 rows)
|
||||
|
||||
-- some tests with DEFAULT columns and constant values
|
||||
-- this test is mostly importantly intended for deparsing the query correctly
|
||||
-- but still it is preferable to have this test here instead of multi_deparse_shard_query
|
||||
|
@ -2233,10 +2586,10 @@ SELECT create_distributed_table('table_with_defaults', 'store_id');
|
|||
SET client_min_messages TO DEBUG2;
|
||||
-- a very simple query
|
||||
INSERT INTO table_with_defaults SELECT * FROM table_with_defaults;
|
||||
DEBUG: predicate pruning for shardId 13300014
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, first_name, default_1, last_name, default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||
DEBUG: predicate pruning for shardId 13300013
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, first_name, default_1, last_name, default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||
DEBUG: predicate pruning for shardId 13300018
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, first_name, default_1, last_name, default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||
DEBUG: predicate pruning for shardId 13300017
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, first_name, default_1, last_name, default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||
DEBUG: Plan is router executable
|
||||
-- see that defaults are filled
|
||||
INSERT INTO table_with_defaults (store_id, first_name)
|
||||
|
@ -2244,10 +2597,10 @@ SELECT
|
|||
store_id, first_name
|
||||
FROM
|
||||
table_with_defaults;
|
||||
DEBUG: predicate pruning for shardId 13300014
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, 1 AS default_1, '2'::text AS default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||
DEBUG: predicate pruning for shardId 13300013
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, 1 AS default_1, '2'::text AS default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||
DEBUG: predicate pruning for shardId 13300018
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, 1 AS default_1, '2'::text AS default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||
DEBUG: predicate pruning for shardId 13300017
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, 1 AS default_1, '2'::text AS default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||
DEBUG: Plan is router executable
|
||||
-- shuffle one of the defaults and skip the other
|
||||
INSERT INTO table_with_defaults (default_2, store_id, first_name)
|
||||
|
@ -2255,10 +2608,10 @@ SELECT
|
|||
default_2, store_id, first_name
|
||||
FROM
|
||||
table_with_defaults;
|
||||
DEBUG: predicate pruning for shardId 13300014
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, 1 AS default_1, default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||
DEBUG: predicate pruning for shardId 13300013
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, 1 AS default_1, default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||
DEBUG: predicate pruning for shardId 13300018
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, 1 AS default_1, default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||
DEBUG: predicate pruning for shardId 13300017
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, 1 AS default_1, default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||
DEBUG: Plan is router executable
|
||||
-- shuffle both defaults
|
||||
INSERT INTO table_with_defaults (default_2, store_id, default_1, first_name)
|
||||
|
@ -2266,10 +2619,10 @@ SELECT
|
|||
default_2, store_id, default_1, first_name
|
||||
FROM
|
||||
table_with_defaults;
|
||||
DEBUG: predicate pruning for shardId 13300014
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, default_1, default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||
DEBUG: predicate pruning for shardId 13300013
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, default_1, default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||
DEBUG: predicate pruning for shardId 13300018
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, default_1, default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||
DEBUG: predicate pruning for shardId 13300017
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, first_name, default_1, default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||
DEBUG: Plan is router executable
|
||||
-- use constants instead of non-default column
|
||||
INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name)
|
||||
|
@ -2277,10 +2630,10 @@ SELECT
|
|||
default_2, 'Freund', store_id, 'Andres'
|
||||
FROM
|
||||
table_with_defaults;
|
||||
DEBUG: predicate pruning for shardId 13300014
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||
DEBUG: predicate pruning for shardId 13300013
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||
DEBUG: predicate pruning for shardId 13300018
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||
DEBUG: predicate pruning for shardId 13300017
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||
DEBUG: Plan is router executable
|
||||
-- use constants instead of non-default column and skip both defauls
|
||||
INSERT INTO table_with_defaults (last_name, store_id, first_name)
|
||||
|
@ -2288,10 +2641,10 @@ SELECT
|
|||
'Freund', store_id, 'Andres'
|
||||
FROM
|
||||
table_with_defaults;
|
||||
DEBUG: predicate pruning for shardId 13300014
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||
DEBUG: predicate pruning for shardId 13300013
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||
DEBUG: predicate pruning for shardId 13300018
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||
DEBUG: predicate pruning for shardId 13300017
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, 'Freund'::text AS last_name, '2'::text AS default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||
DEBUG: Plan is router executable
|
||||
-- use constants instead of default columns
|
||||
INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1)
|
||||
|
@ -2299,10 +2652,10 @@ SELECT
|
|||
20, last_name, store_id, first_name, 10
|
||||
FROM
|
||||
table_with_defaults;
|
||||
DEBUG: predicate pruning for shardId 13300014
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, first_name, 10, last_name, 20 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||
DEBUG: predicate pruning for shardId 13300013
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, first_name, 10, last_name, 20 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||
DEBUG: predicate pruning for shardId 13300018
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, first_name, 10, last_name, 20 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||
DEBUG: predicate pruning for shardId 13300017
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, first_name, 10, last_name, 20 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||
DEBUG: Plan is router executable
|
||||
-- use constants instead of both default columns and non-default columns
|
||||
INSERT INTO table_with_defaults (default_2, last_name, store_id, first_name, default_1)
|
||||
|
@ -2310,10 +2663,10 @@ SELECT
|
|||
20, 'Freund', store_id, 'Andres', 10
|
||||
FROM
|
||||
table_with_defaults;
|
||||
DEBUG: predicate pruning for shardId 13300014
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||
DEBUG: predicate pruning for shardId 13300013
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||
DEBUG: predicate pruning for shardId 13300018
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer))
|
||||
DEBUG: predicate pruning for shardId 13300017
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, last_name, default_2) SELECT store_id, 'Andres'::text AS first_name, 10, 'Freund'::text AS last_name, 20 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647))
|
||||
DEBUG: Plan is router executable
|
||||
-- some of the the ultimate queries where we have constants,
|
||||
-- defaults and group by entry is not on the target entry
|
||||
|
@ -2324,10 +2677,10 @@ FROM
|
|||
table_with_defaults
|
||||
GROUP BY
|
||||
last_name, store_id;
|
||||
DEBUG: predicate pruning for shardId 13300014
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer)) GROUP BY last_name, store_id
|
||||
DEBUG: predicate pruning for shardId 13300013
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647)) GROUP BY last_name, store_id
|
||||
DEBUG: predicate pruning for shardId 13300018
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer)) GROUP BY last_name, store_id
|
||||
DEBUG: predicate pruning for shardId 13300017
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1 AS default_1, '2000'::text AS default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647)) GROUP BY last_name, store_id
|
||||
DEBUG: Plan is router executable
|
||||
INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2)
|
||||
SELECT
|
||||
|
@ -2336,10 +2689,10 @@ FROM
|
|||
table_with_defaults
|
||||
GROUP BY
|
||||
last_name, store_id, first_name;
|
||||
DEBUG: predicate pruning for shardId 13300014
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer)) GROUP BY last_name, store_id, first_name
|
||||
DEBUG: predicate pruning for shardId 13300013
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647)) GROUP BY last_name, store_id, first_name
|
||||
DEBUG: predicate pruning for shardId 13300018
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer)) GROUP BY last_name, store_id, first_name
|
||||
DEBUG: predicate pruning for shardId 13300017
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647)) GROUP BY last_name, store_id, first_name
|
||||
DEBUG: Plan is router executable
|
||||
INSERT INTO table_with_defaults (default_1, store_id, first_name, default_2)
|
||||
SELECT
|
||||
|
@ -2348,10 +2701,10 @@ FROM
|
|||
table_with_defaults
|
||||
GROUP BY
|
||||
last_name, store_id, first_name, default_2;
|
||||
DEBUG: predicate pruning for shardId 13300014
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer)) GROUP BY last_name, store_id, first_name, default_2
|
||||
DEBUG: predicate pruning for shardId 13300013
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647)) GROUP BY last_name, store_id, first_name, default_2
|
||||
DEBUG: predicate pruning for shardId 13300018
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer)) GROUP BY last_name, store_id, first_name, default_2
|
||||
DEBUG: predicate pruning for shardId 13300017
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2000'::text AS default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647)) GROUP BY last_name, store_id, first_name, default_2
|
||||
DEBUG: Plan is router executable
|
||||
INSERT INTO table_with_defaults (default_1, store_id, first_name)
|
||||
SELECT
|
||||
|
@ -2360,10 +2713,10 @@ FROM
|
|||
table_with_defaults
|
||||
GROUP BY
|
||||
last_name, store_id, first_name, default_2;
|
||||
DEBUG: predicate pruning for shardId 13300014
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300013 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM public.table_with_defaults_13300013 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer)) GROUP BY last_name, store_id, first_name, default_2
|
||||
DEBUG: predicate pruning for shardId 13300013
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300014 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM public.table_with_defaults_13300014 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647)) GROUP BY last_name, store_id, first_name, default_2
|
||||
DEBUG: predicate pruning for shardId 13300018
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300017 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM public.table_with_defaults_13300017 table_with_defaults WHERE ((hashint4(store_id) >= '-2147483648'::integer) AND (hashint4(store_id) <= '-1'::integer)) GROUP BY last_name, store_id, first_name, default_2
|
||||
DEBUG: predicate pruning for shardId 13300017
|
||||
DEBUG: distributed statement: INSERT INTO public.table_with_defaults_13300018 AS citus_table_alias (store_id, first_name, default_1, default_2) SELECT store_id, 'Andres'::text AS first_name, 1000, '2'::text AS default_2 FROM public.table_with_defaults_13300018 table_with_defaults WHERE ((hashint4(store_id) >= 0) AND (hashint4(store_id) <= 2147483647)) GROUP BY last_name, store_id, first_name, default_2
|
||||
DEBUG: Plan is router executable
|
||||
RESET client_min_messages;
|
||||
-- Stable function in default should be allowed
|
||||
|
|
|
@ -0,0 +1,457 @@
|
|||
------------------------------------
|
||||
------------------------------------
|
||||
-- Vanilla funnel query
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
INSERT INTO agg_results (user_id, value_1_agg)
|
||||
SELECT user_id, array_length(events_table, 1)
|
||||
FROM (
|
||||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||
FROM (
|
||||
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
) t
|
||||
GROUP BY user_id
|
||||
) q;
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
5 | 5 | 15.6000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Funnel grouped by whether or not a user has done an event
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg )
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||
FROM (
|
||||
SELECT
|
||||
t1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||
FROM (
|
||||
(
|
||||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
)
|
||||
UNION
|
||||
(
|
||||
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (103, 104, 105)
|
||||
)
|
||||
) t1 LEFT JOIN (
|
||||
SELECT DISTINCT user_id,
|
||||
'Has done event'::TEXT AS hasdone_event
|
||||
FROM events_table AS e
|
||||
|
||||
WHERE e.user_id >= 10
|
||||
AND e.user_id <= 25
|
||||
AND e.event_type IN (106, 107, 108)
|
||||
) t2 ON (t1.user_id = t2.user_id)
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event;
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
8 | 8 | 16.1250000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Funnel, grouped by the number of times a user has done an event
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
avg(array_length(events_table, 1)) AS event_average,
|
||||
count_pay
|
||||
FROM (
|
||||
SELECT
|
||||
subquery_1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(count_pay, 0) AS count_pay
|
||||
FROM
|
||||
(
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>1'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
)
|
||||
UNION
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>2'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 12 AND events_table.event_type < 14
|
||||
)
|
||||
) AS subquery_1
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
user_id,
|
||||
COUNT(*) AS count_pay
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 15 AND users_table.value_1 < 17
|
||||
GROUP BY
|
||||
user_id
|
||||
HAVING
|
||||
COUNT(*) > 1) AS subquery_2
|
||||
ON
|
||||
subquery_1.user_id = subquery_2.user_id
|
||||
GROUP BY
|
||||
subquery_1.user_id,
|
||||
count_pay) AS subquery_top
|
||||
WHERE
|
||||
array_ndims(events_table) > 0
|
||||
GROUP BY
|
||||
count_pay, user_id
|
||||
ORDER BY
|
||||
count_pay;
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
8 | 8 | 45.0000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Most recently seen users_table events_table
|
||||
------------------------------------
|
||||
-- Note that we don't use ORDER BY/LIMIT yet
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
user_lastseen,
|
||||
array_length(event_array, 1)
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
max(u.time) as user_lastseen,
|
||||
array_agg(event_type ORDER BY u.time) AS event_array
|
||||
FROM (
|
||||
|
||||
SELECT user_id, time
|
||||
FROM users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||
) u LEFT JOIN LATERAL (
|
||||
SELECT event_type, time
|
||||
FROM events_table
|
||||
WHERE user_id = u.user_id AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
) t ON true
|
||||
GROUP BY user_id
|
||||
) AS shard_union
|
||||
ORDER BY user_lastseen DESC;
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
6 | 6 | 42.0000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Count the number of distinct users_table who are in segment X and Y and Z
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
INSERT INTO agg_results (user_id)
|
||||
SELECT DISTINCT user_id
|
||||
FROM users_table
|
||||
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
33 | 33 | 50.3939393939393939
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Count the number of distinct users_table who are in at least two of X and Y and Z segments
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
INSERT INTO agg_results(user_id)
|
||||
SELECT user_id
|
||||
FROM users_table
|
||||
WHERE (value_1 = 10
|
||||
OR value_1 = 11
|
||||
OR value_1 = 12)
|
||||
GROUP BY user_id
|
||||
HAVING count(distinct value_1) >= 2;
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
4 | 4 | 51.0000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find customers who have done X, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
INSERT INTO agg_results(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
34 | 27 | 40.5588235294117647
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who haven’t done X, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
INSERT INTO agg_results(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 = 101
|
||||
AND value_2 >= 5
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
8 | 7 | 39.7500000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X and Y, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
INSERT INTO agg_results(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 100
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type!=100 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
1202 | 14 | 47.7462562396006656
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
INSERT INTO agg_results(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
205 | 2 | 55.2195121951219512
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
INSERT INTO agg_results(user_id, value_2_agg)
|
||||
SELECT user_id,
|
||||
value_2
|
||||
FROM users_table
|
||||
WHERE value_1 > 100
|
||||
AND value_1 < 124
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id
|
||||
FROM events_table
|
||||
WHERE event_type > 100
|
||||
AND event_type < 124
|
||||
AND value_3 > 100
|
||||
AND user_id = users_table.user_id
|
||||
GROUP BY user_id
|
||||
HAVING Count(*) > 2);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
78 | 34 | 52.4230769230769231
|
||||
(1 row)
|
||||
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find me all users_table who logged in more than once
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
INSERT INTO agg_results(user_id, value_1_agg)
|
||||
SELECT user_id, value_1 from
|
||||
(
|
||||
SELECT user_id, value_1 From users_table
|
||||
WHERE value_2 > 100 and user_id = 15 GROUP BY value_1, user_id HAVING count(*) > 1
|
||||
) as a;
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
6 | 1 | 15.0000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find me all users_table who has done some event and has filters
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
INSERT INTO agg_results(user_id)
|
||||
Select user_id
|
||||
From events_table
|
||||
Where event_type = 16
|
||||
And value_2 > 50
|
||||
And user_id in
|
||||
(select user_id
|
||||
From users_table
|
||||
Where value_1 = 15
|
||||
And value_2 > 25);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
2 | 2 | 30.0000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Which events_table did people who has done some specific events_table
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
INSERT INTO agg_results(user_id, value_1_agg)
|
||||
SELECT user_id, event_type FROM events_table
|
||||
WHERE user_id in (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
|
||||
GROUP BY user_id, event_type;
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
3084 | 32 | 44.1498054474708171
|
||||
(1 row)
|
||||
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find me all the users_table who has done some event more than three times
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
INSERT INTO agg_results(user_id)
|
||||
select user_id from
|
||||
(
|
||||
select
|
||||
user_id
|
||||
from
|
||||
events_table
|
||||
where event_type = 901 group by user_id having count(*) > 3
|
||||
) as a;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
1 | 1 | 57.0000000000000000
|
||||
(1 row)
|
||||
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find my assets that have the highest probability and fetch their metadata
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
INSERT INTO agg_results(user_id, value_1_agg, value_3_agg)
|
||||
SELECT
|
||||
users_table.user_id, users_table.value_1, prob
|
||||
FROM
|
||||
users_table
|
||||
JOIN
|
||||
(SELECT
|
||||
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||||
FROM
|
||||
users_table AS ma, events_table as short_list
|
||||
WHERE
|
||||
short_list.user_id = ma.user_id and ma.value_1 < 50 and short_list.event_type < 50
|
||||
) temp
|
||||
ON users_table.user_id = temp.user_id
|
||||
WHERE users_table.value_1 < 50;
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
14371 | 101 | 50.5232064574490293
|
||||
(1 row)
|
||||
|
||||
|
|
@ -0,0 +1,423 @@
|
|||
------------------------------------
|
||||
------------------------------------
|
||||
-- Vanilla funnel query -- single shard
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
INSERT INTO agg_results_second (user_id, value_1_agg)
|
||||
SELECT user_id, array_length(events_table, 1)
|
||||
FROM (
|
||||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||
FROM (
|
||||
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
) t
|
||||
GROUP BY user_id
|
||||
) q
|
||||
WHERE user_id = 20;
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
1 | 1 | 20.0000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Vanilla funnel query -- two shards
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
INSERT INTO agg_results_second (user_id, value_1_agg)
|
||||
SELECT user_id, array_length(events_table, 1)
|
||||
FROM (
|
||||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||
FROM (
|
||||
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id AND
|
||||
(u.user_id = 13 OR u.user_id = 20) AND
|
||||
(e.user_id = 13 OR e.user_id = 20)
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
) t
|
||||
GROUP BY user_id
|
||||
) q
|
||||
WHERE (user_id = 13 OR user_id = 20);
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
2 | 2 | 16.5000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Funnel grouped by whether or not a user has done an event -- single shard query
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
INSERT INTO agg_results_second (user_id, value_1_agg, value_2_agg )
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||
FROM (
|
||||
SELECT
|
||||
t1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||
FROM (
|
||||
(
|
||||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
)
|
||||
UNION
|
||||
(
|
||||
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (103, 104, 105)
|
||||
)
|
||||
) t1 LEFT JOIN (
|
||||
SELECT DISTINCT user_id,
|
||||
'Has done event'::TEXT AS hasdone_event
|
||||
FROM events_table AS e
|
||||
|
||||
WHERE e.user_id >= 10
|
||||
AND e.user_id <= 25
|
||||
AND e.event_type IN (106, 107, 108)
|
||||
) t2 ON (t1.user_id = t2.user_id)
|
||||
WHERE t1.user_id = 20
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event;
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Funnel grouped by whether or not a user has done an event -- two shards query
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
INSERT INTO agg_results_second (user_id, value_1_agg, value_2_agg )
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||
FROM (
|
||||
SELECT
|
||||
t1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||
FROM (
|
||||
(
|
||||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND (e.user_id = 20 OR e.user_id = 17)
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
)
|
||||
UNION
|
||||
(
|
||||
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND (e.user_id = 20 OR e.user_id = 17)
|
||||
AND e.event_type IN (103, 104, 105)
|
||||
)
|
||||
) t1 LEFT JOIN (
|
||||
SELECT DISTINCT user_id,
|
||||
'Has done event'::TEXT AS hasdone_event
|
||||
FROM events_table AS e
|
||||
|
||||
WHERE
|
||||
(e.user_id = 20 OR e.user_id = 17)
|
||||
AND e.event_type IN (106, 107, 108)
|
||||
) t2 ON (t1.user_id = t2.user_id)
|
||||
WHERE (t1.user_id = 20 OR t1.user_id = 17)
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event;
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
2 | 2 | 18.5000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Most recently seen users_table events_table -- single shard query
|
||||
------------------------------------
|
||||
-- Note that we don't use ORDER BY/LIMIT yet
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
INSERT INTO agg_results_second (user_id, agg_time, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
user_lastseen,
|
||||
array_length(event_array, 1)
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
max(u.time) as user_lastseen,
|
||||
array_agg(event_type ORDER BY u.time) AS event_array
|
||||
FROM (
|
||||
|
||||
SELECT user_id, time
|
||||
FROM users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||
) u LEFT JOIN LATERAL (
|
||||
SELECT event_type, time
|
||||
FROM events_table
|
||||
WHERE user_id = u.user_id AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
) t ON true
|
||||
WHERE user_id = 65
|
||||
GROUP BY user_id
|
||||
) AS shard_union
|
||||
ORDER BY user_lastseen DESC;
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
1 | 1 | 65.0000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Most recently seen users_table events_table -- two shards query
|
||||
------------------------------------
|
||||
-- Note that we don't use ORDER BY/LIMIT yet
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
INSERT INTO agg_results_second (user_id, agg_time, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
user_lastseen,
|
||||
array_length(event_array, 1)
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
max(u.time) as user_lastseen,
|
||||
array_agg(event_type ORDER BY u.time) AS event_array
|
||||
FROM (
|
||||
|
||||
SELECT user_id, time
|
||||
FROM users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
(user_id = 65 OR user_id = 12) AND
|
||||
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||
) u LEFT JOIN LATERAL (
|
||||
SELECT event_type, time
|
||||
FROM events_table
|
||||
WHERE user_id = u.user_id AND (user_id = 65 OR user_id = 12) AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
) t ON true
|
||||
WHERE (user_id = 65 OR user_id = 12)
|
||||
GROUP BY user_id
|
||||
) AS shard_union
|
||||
ORDER BY user_lastseen DESC;
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
2 | 2 | 38.5000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Count the number of distinct users_table who are in segment X and Y and Z -- single shard
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
INSERT INTO agg_results_second (user_id)
|
||||
SELECT DISTINCT user_id
|
||||
FROM users_table
|
||||
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60)
|
||||
AND user_id = 7;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
count | count | avg
|
||||
-------+-------+--------------------
|
||||
1 | 1 | 7.0000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Count the number of distinct users_table who are in segment X and Y and Z -- two shards
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
INSERT INTO agg_results_second (user_id)
|
||||
SELECT DISTINCT user_id
|
||||
FROM users_table
|
||||
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20 AND (user_id = 7 OR user_id = 20))
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40 AND (user_id = 7 OR user_id = 20))
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60 AND (user_id = 7 OR user_id = 20))
|
||||
AND (user_id = 7 OR user_id = 20);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
2 | 2 | 13.5000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find customers who have done X, and satisfy other customer specific criteria -- single shard
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||
AND user_id = 61;
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
1 | 1 | 61.0000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find customers who have done X, and satisfy other customer specific criteria -- two shards
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND (user_id = 61 OR user_id = 51) AND user_id=users_table.user_id)
|
||||
AND (user_id = 61 OR user_id = 51);
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
2 | 2 | 56.0000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria -- single shard
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_2 >= 5
|
||||
AND user_id = 96
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
110 | 1 | 96.0000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria -- two shards
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_2 >= 5
|
||||
AND (user_id = 96 OR user_id = 8)
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id=users_table.user_id AND (user_id = 96 OR user_id = 8))
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id=users_table.user_id AND (user_id = 96 OR user_id = 8));
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
205 | 2 | 55.2195121951219512
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria -- single shard
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||
SELECT user_id,
|
||||
value_2
|
||||
FROM users_table
|
||||
WHERE value_1 > 100
|
||||
AND value_1 < 124
|
||||
AND value_2 >= 5
|
||||
AND user_id = 47
|
||||
AND EXISTS (SELECT user_id
|
||||
FROM events_table
|
||||
WHERE event_type > 100
|
||||
AND event_type < 124
|
||||
AND value_3 > 100
|
||||
AND user_id = users_table.user_id
|
||||
AND user_id = 47
|
||||
GROUP BY user_id
|
||||
HAVING Count(*) > 2);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
6 | 1 | 47.0000000000000000
|
||||
(1 row)
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria -- two shards
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||
SELECT user_id,
|
||||
value_2
|
||||
FROM users_table
|
||||
WHERE value_1 > 100
|
||||
AND value_1 < 124
|
||||
AND value_2 >= 5
|
||||
AND (user_id = 47 or user_id = 81)
|
||||
AND EXISTS (SELECT user_id
|
||||
FROM events_table
|
||||
WHERE event_type > 100
|
||||
AND event_type < 124
|
||||
AND value_3 > 100
|
||||
AND user_id = users_table.user_id
|
||||
AND (user_id = 47 or user_id = 81)
|
||||
GROUP BY user_id
|
||||
HAVING Count(*) > 2);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
count | count | avg
|
||||
-------+-------+---------------------
|
||||
7 | 2 | 51.8571428571428571
|
||||
(1 row)
|
||||
|
||||
|
|
@ -0,0 +1,667 @@
|
|||
------------------------------------
|
||||
------------------------------------
|
||||
-- Vanilla funnel query
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- not pushable since the JOIN is not an equi join
|
||||
INSERT INTO agg_results_third (user_id, value_1_agg)
|
||||
SELECT user_id, array_length(events_table, 1)
|
||||
FROM (
|
||||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||
FROM (
|
||||
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id != e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
) t
|
||||
GROUP BY user_id
|
||||
) q;
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Funnel grouped by whether or not a user has done an event
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- not pushable since the JOIN is not an equi join left part of the UNION
|
||||
-- is not equi join
|
||||
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg )
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||
FROM (
|
||||
SELECT
|
||||
t1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||
FROM (
|
||||
(
|
||||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id != e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
)
|
||||
UNION
|
||||
(
|
||||
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (103, 104, 105)
|
||||
)
|
||||
) t1 LEFT JOIN (
|
||||
SELECT DISTINCT user_id,
|
||||
'Has done event'::TEXT AS hasdone_event
|
||||
FROM events_table AS e
|
||||
|
||||
WHERE e.user_id >= 10
|
||||
AND e.user_id <= 25
|
||||
AND e.event_type IN (106, 107, 108)
|
||||
) t2 ON (t1.user_id = t2.user_id)
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event;
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- not pushable since the JOIN is not an equi join right part of the UNION
|
||||
-- is not joined on the partition key
|
||||
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg )
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||
FROM (
|
||||
SELECT
|
||||
t1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||
FROM (
|
||||
(
|
||||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
)
|
||||
UNION
|
||||
(
|
||||
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.event_type
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (103, 104, 105)
|
||||
)
|
||||
) t1 LEFT JOIN (
|
||||
SELECT DISTINCT user_id,
|
||||
'Has done event'::TEXT AS hasdone_event
|
||||
FROM events_table AS e
|
||||
|
||||
WHERE e.user_id >= 10
|
||||
AND e.user_id <= 25
|
||||
AND e.event_type IN (106, 107, 108)
|
||||
) t2 ON (t1.user_id = t2.user_id)
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event;
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- the LEFT JOIN conditon is not on the partition column (i.e., is it part_key divided by 2)
|
||||
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg )
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||
FROM (
|
||||
SELECT
|
||||
t1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||
FROM (
|
||||
(
|
||||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
)
|
||||
UNION
|
||||
(
|
||||
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (103, 104, 105)
|
||||
)
|
||||
) t1 LEFT JOIN (
|
||||
SELECT DISTINCT user_id,
|
||||
'Has done event'::TEXT AS hasdone_event
|
||||
FROM events_table AS e
|
||||
|
||||
WHERE e.user_id >= 10
|
||||
AND e.user_id <= 25
|
||||
AND e.event_type IN (106, 107, 108)
|
||||
) t2 ON (t1.user_id = (t2.user_id)/2)
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event;
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Funnel, grouped by the number of times a user has done an event
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- not pushable since the right of the UNION query is not joined on
|
||||
-- the partition key
|
||||
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
avg(array_length(events_table, 1)) AS event_average,
|
||||
count_pay
|
||||
FROM (
|
||||
SELECT
|
||||
subquery_1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(count_pay, 0) AS count_pay
|
||||
FROM
|
||||
(
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>1'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
)
|
||||
UNION
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>2'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id != events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 12 AND events_table.event_type < 14
|
||||
)
|
||||
) AS subquery_1
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
user_id,
|
||||
COUNT(*) AS count_pay
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 15 AND users_table.value_1 < 17
|
||||
GROUP BY
|
||||
user_id
|
||||
HAVING
|
||||
COUNT(*) > 1) AS subquery_2
|
||||
ON
|
||||
subquery_1.user_id = subquery_2.user_id
|
||||
GROUP BY
|
||||
subquery_1.user_id,
|
||||
count_pay) AS subquery_top
|
||||
WHERE
|
||||
array_ndims(events_table) > 0
|
||||
GROUP BY
|
||||
count_pay, user_id
|
||||
ORDER BY
|
||||
count_pay;
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- not pushable since the JOIN condition is not equi JOIN
|
||||
-- (subquery_1 JOIN subquery_2)
|
||||
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
avg(array_length(events_table, 1)) AS event_average,
|
||||
count_pay
|
||||
FROM (
|
||||
SELECT
|
||||
subquery_1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(count_pay, 0) AS count_pay
|
||||
FROM
|
||||
(
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>1'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
)
|
||||
UNION
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>2'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 12 AND events_table.event_type < 14
|
||||
)
|
||||
) AS subquery_1
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
user_id,
|
||||
COUNT(*) AS count_pay
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 15 AND users_table.value_1 < 17
|
||||
GROUP BY
|
||||
user_id
|
||||
HAVING
|
||||
COUNT(*) > 1) AS subquery_2
|
||||
ON
|
||||
subquery_1.user_id > subquery_2.user_id
|
||||
GROUP BY
|
||||
subquery_1.user_id,
|
||||
count_pay) AS subquery_top
|
||||
WHERE
|
||||
array_ndims(events_table) > 0
|
||||
GROUP BY
|
||||
count_pay, user_id
|
||||
ORDER BY
|
||||
count_pay;
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Most recently seen users_table events_table
|
||||
------------------------------------
|
||||
-- Note that we don't use ORDER BY/LIMIT yet
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- not pushable since lateral join is not an equi join
|
||||
INSERT INTO agg_results_third (user_id, agg_time, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
user_lastseen,
|
||||
array_length(event_array, 1)
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
max(u.time) as user_lastseen,
|
||||
array_agg(event_type ORDER BY u.time) AS event_array
|
||||
FROM (
|
||||
|
||||
SELECT user_id, time
|
||||
FROM users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||
) u LEFT JOIN LATERAL (
|
||||
SELECT event_type, time
|
||||
FROM events_table
|
||||
WHERE user_id != u.user_id AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
) t ON true
|
||||
GROUP BY user_id
|
||||
) AS shard_union
|
||||
ORDER BY user_lastseen DESC;
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- not pushable since lateral join is not on the partition key
|
||||
INSERT INTO agg_results_third (user_id, agg_time, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
user_lastseen,
|
||||
array_length(event_array, 1)
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
max(u.time) as user_lastseen,
|
||||
array_agg(event_type ORDER BY u.time) AS event_array
|
||||
FROM (
|
||||
|
||||
SELECT user_id, time
|
||||
FROM users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||
) u LEFT JOIN LATERAL (
|
||||
SELECT event_type, time
|
||||
FROM events_table
|
||||
WHERE event_type = u.user_id AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
) t ON true
|
||||
GROUP BY user_id
|
||||
) AS shard_union
|
||||
ORDER BY user_lastseen DESC;
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- not pushable since lateral join is not on the partition key
|
||||
INSERT INTO agg_results_third (user_id, agg_time, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
user_lastseen,
|
||||
array_length(event_array, 1)
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
max(u.time) as user_lastseen,
|
||||
array_agg(event_type ORDER BY u.time) AS event_array
|
||||
FROM (
|
||||
|
||||
SELECT user_id, time, value_3 as val_3
|
||||
FROM users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||
) u LEFT JOIN LATERAL (
|
||||
SELECT event_type, time
|
||||
FROM events_table
|
||||
WHERE event_type = u.val_3 AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
) t ON true
|
||||
GROUP BY user_id
|
||||
) AS shard_union
|
||||
ORDER BY user_lastseen DESC;
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Count the number of distinct users_table who are in segment X and Y and Z
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- not pushable since partition key is NOT IN
|
||||
INSERT INTO agg_results_third (user_id)
|
||||
SELECT DISTINCT user_id
|
||||
FROM users_table
|
||||
WHERE user_id NOT IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- not pushable since partition key is not selected from the second subquery
|
||||
INSERT INTO agg_results_third (user_id)
|
||||
SELECT DISTINCT user_id
|
||||
FROM users_table
|
||||
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||
AND user_id IN (SELECT value_1 FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- not pushable since second subquery does not return bare partition key
|
||||
INSERT INTO agg_results_third (user_id)
|
||||
SELECT DISTINCT user_id
|
||||
FROM users_table
|
||||
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||
AND user_id IN (SELECT 3 * user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find customers who have done X, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- not pushable since join is not an euqi join
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND user_id!=users_table.user_id);
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- not pushable since the join is not on the partition key
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND event_type = users_table.user_id);
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who haven’t done X, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- not pushable since the join is not an equi join
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 = 101
|
||||
AND value_2 >= 5
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id!=users_table.user_id);
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- not pushable since the join is not the partition key
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 = 101
|
||||
AND value_2 >= 5
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND event_type=users_table.user_id);
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X and Y, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- not pushable since the second join is not on the partition key
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 100
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type!=100 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id!=users_table.user_id);
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- not pushable since the first join is not on the partition key
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id!=users_table.user_id)
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- not pushable since the second join is not an equi join
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id,
|
||||
value_2
|
||||
FROM users_table
|
||||
WHERE value_1 > 100
|
||||
AND value_1 < 124
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id
|
||||
FROM events_table
|
||||
WHERE event_type > 100
|
||||
AND event_type < 124
|
||||
AND value_3 > 100
|
||||
AND user_id != users_table.user_id
|
||||
GROUP BY user_id
|
||||
HAVING Count(*) > 2);
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- not pushable since the second join is not on the partition key
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id,
|
||||
value_2
|
||||
FROM users_table
|
||||
WHERE value_1 > 100
|
||||
AND value_1 < 124
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id
|
||||
FROM events_table
|
||||
WHERE event_type > 100
|
||||
AND event_type < 124
|
||||
AND value_3 > 100
|
||||
AND event_type = users_table.user_id
|
||||
GROUP BY user_id
|
||||
HAVING Count(*) > 2);
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- not pushable since the second join is not on the partition key
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id,
|
||||
value_2
|
||||
FROM users_table
|
||||
WHERE value_1 > 100
|
||||
AND value_1 < 124
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id
|
||||
FROM events_table
|
||||
WHERE event_type > 100
|
||||
AND event_type < 124
|
||||
AND value_3 > 100
|
||||
AND user_id = users_table.value_1
|
||||
GROUP BY user_id
|
||||
HAVING Count(*) > 2);
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find me all users_table who has done some event and has filters
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- not pushable due to NOT IN
|
||||
INSERT INTO agg_results_third(user_id)
|
||||
Select user_id
|
||||
From events_table
|
||||
Where event_type = 16
|
||||
And value_2 > 50
|
||||
And user_id NOT in
|
||||
(select user_id
|
||||
From users_table
|
||||
Where value_1 = 15
|
||||
And value_2 > 25);
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- not pushable since we're not selecting the partition key
|
||||
INSERT INTO agg_results_third(user_id)
|
||||
Select user_id
|
||||
From events_table
|
||||
Where event_type = 16
|
||||
And value_2 > 50
|
||||
And user_id in
|
||||
(select value_3
|
||||
From users_table
|
||||
Where value_1 = 15
|
||||
And value_2 > 25);
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
|
||||
-- not pushable since we're not selecting the partition key
|
||||
-- from the events table
|
||||
INSERT INTO agg_results_third(user_id)
|
||||
Select user_id
|
||||
From events_table
|
||||
Where event_type = 16
|
||||
And value_2 > 50
|
||||
And event_type in
|
||||
(select user_id
|
||||
From users_table
|
||||
Where value_1 = 15
|
||||
And value_2 > 25);
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Which events_table did people who has done some specific events_table
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- not pushable due to NOT IN
|
||||
INSERT INTO agg_results_third(user_id, value_1_agg)
|
||||
SELECT user_id, event_type FROM events_table
|
||||
WHERE user_id NOT IN (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
|
||||
GROUP BY user_id, event_type;
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- not pushable due to not selecting the partition key
|
||||
INSERT INTO agg_results_third(user_id, value_1_agg)
|
||||
SELECT user_id, event_type FROM events_table
|
||||
WHERE user_id IN (SELECT value_2 from events_table WHERE event_type > 500 and event_type < 505)
|
||||
GROUP BY user_id, event_type;
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- not pushable due to not comparing user id from the events table
|
||||
INSERT INTO agg_results_third(user_id, value_1_agg)
|
||||
SELECT user_id, event_type FROM events_table
|
||||
WHERE event_type IN (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
|
||||
GROUP BY user_id, event_type;
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find my assets that have the highest probability and fetch their metadata
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- not pushable since the join is not an equi join
|
||||
INSERT INTO agg_results_third(user_id, value_1_agg, value_3_agg)
|
||||
SELECT
|
||||
users_table.user_id, users_table.value_1, prob
|
||||
FROM
|
||||
users_table
|
||||
JOIN
|
||||
(SELECT
|
||||
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||||
FROM
|
||||
users_table AS ma, events_table as short_list
|
||||
WHERE
|
||||
short_list.user_id != ma.user_id and ma.value_1 < 50 and short_list.event_type < 50
|
||||
) temp
|
||||
ON users_table.user_id = temp.user_id
|
||||
WHERE users_table.value_1 < 50;
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
||||
-- not pushable since the join is not on the partition key
|
||||
INSERT INTO agg_results_third(user_id, value_1_agg, value_3_agg)
|
||||
SELECT
|
||||
users_table.user_id, users_table.value_1, prob
|
||||
FROM
|
||||
users_table
|
||||
JOIN
|
||||
(SELECT
|
||||
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||||
FROM
|
||||
users_table AS ma, events_table as short_list
|
||||
WHERE
|
||||
short_list.user_id = ma.value_2 and ma.value_1 < 50 and short_list.event_type < 50
|
||||
) temp
|
||||
ON users_table.user_id = temp.user_id
|
||||
WHERE users_table.value_1 < 50;
|
||||
ERROR: cannot perform distributed planning for the given modification
|
||||
DETAIL: Select query cannot be pushed down to the worker.
|
|
@ -0,0 +1,43 @@
|
|||
--
|
||||
-- multi insert select behavioral analytics
|
||||
-- this file is intended to create the table requires for the tests
|
||||
--
|
||||
|
||||
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1400000;
|
||||
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
|
||||
|
||||
SET citus.shard_replication_factor = 1;
|
||||
SET citus.shard_count = 4;
|
||||
|
||||
CREATE TABLE users_table (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint);
|
||||
SELECT create_distributed_table('users_table', 'user_id');
|
||||
|
||||
CREATE TABLE events_table (user_id int, time timestamp, event_type int, value_2 int, value_3 float, value_4 bigint);
|
||||
SELECT create_distributed_table('events_table', 'user_id');
|
||||
|
||||
CREATE TABLE agg_results (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||
SELECT create_distributed_table('agg_results', 'user_id');
|
||||
|
||||
-- we need this to improve the concurrency on the regression tests
|
||||
CREATE TABLE agg_results_second (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||
SELECT create_distributed_table('agg_results_second', 'user_id');
|
||||
|
||||
-- same as agg_results_second
|
||||
CREATE TABLE agg_results_third (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||
SELECT create_distributed_table('agg_results_third', 'user_id');
|
||||
|
||||
-- same as agg_results_second
|
||||
CREATE TABLE agg_results_fourth (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||
SELECT create_distributed_table('agg_results_fourth', 'user_id');
|
||||
|
||||
COPY users_table FROM '@abs_srcdir@/data/users_table.data' WITH CSV;
|
||||
COPY events_table FROM '@abs_srcdir@/data/events_table.data' WITH CSV;
|
||||
|
||||
-- create indexes for
|
||||
CREATE INDEX is_index1 ON users_table(user_id);
|
||||
CREATE INDEX is_index2 ON events_table(user_id);
|
||||
CREATE INDEX is_index3 ON users_table(value_1);
|
||||
CREATE INDEX is_index4 ON events_table(event_type);
|
||||
CREATE INDEX is_index5 ON users_table(value_2);
|
||||
CREATE INDEX is_index6 ON events_table(value_2);
|
||||
|
|
@ -30,6 +30,8 @@ test: multi_create_table_constraints
|
|||
test: multi_master_protocol
|
||||
test: multi_load_data
|
||||
|
||||
test: multi_insert_select_behavioral_analytics_create_table
|
||||
test: multi_insert_select_behavioral_analytics_basics multi_insert_select_behavioral_analytics_single_shard_queries multi_insert_select_non_pushable_queries
|
||||
test: multi_insert_select
|
||||
|
||||
# ----------
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
--
|
||||
-- multi insert select behavioral analytics
|
||||
-- this file is intended to create the table requires for the tests
|
||||
--
|
||||
ALTER SEQUENCE pg_catalog.pg_dist_shardid_seq RESTART 1400000;
|
||||
ALTER SEQUENCE pg_catalog.pg_dist_jobid_seq RESTART 1400000;
|
||||
SET citus.shard_replication_factor = 1;
|
||||
SET citus.shard_count = 4;
|
||||
CREATE TABLE users_table (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint);
|
||||
SELECT create_distributed_table('users_table', 'user_id');
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
CREATE TABLE events_table (user_id int, time timestamp, event_type int, value_2 int, value_3 float, value_4 bigint);
|
||||
SELECT create_distributed_table('events_table', 'user_id');
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
CREATE TABLE agg_results (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||
SELECT create_distributed_table('agg_results', 'user_id');
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- we need this to improve the concurrency on the regression tests
|
||||
CREATE TABLE agg_results_second (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||
SELECT create_distributed_table('agg_results_second', 'user_id');
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- same as agg_results_second
|
||||
CREATE TABLE agg_results_third (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||
SELECT create_distributed_table('agg_results_third', 'user_id');
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
-- same as agg_results_second
|
||||
CREATE TABLE agg_results_fourth (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
|
||||
SELECT create_distributed_table('agg_results_fourth', 'user_id');
|
||||
create_distributed_table
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
COPY users_table FROM '@abs_srcdir@/data/users_table.data' WITH CSV;
|
||||
COPY events_table FROM '@abs_srcdir@/data/events_table.data' WITH CSV;
|
||||
-- create indexes for
|
||||
CREATE INDEX is_index1 ON users_table(user_id);
|
||||
NOTICE: using one-phase commit for distributed DDL commands
|
||||
HINT: You can enable two-phase commit for extra safety with: SET citus.multi_shard_commit_protocol TO '2pc'
|
||||
CREATE INDEX is_index2 ON events_table(user_id);
|
||||
CREATE INDEX is_index3 ON users_table(value_1);
|
||||
CREATE INDEX is_index4 ON events_table(event_type);
|
||||
CREATE INDEX is_index5 ON users_table(value_2);
|
||||
CREATE INDEX is_index6 ON events_table(value_2);
|
|
@ -22,6 +22,9 @@ SELECT create_distributed_table('agg_events', 'user_id');;
|
|||
CREATE TABLE reference_table (user_id int);
|
||||
SELECT create_reference_table('reference_table');
|
||||
|
||||
CREATE TABLE insert_select_varchar_test (key varchar, value int);
|
||||
SELECT create_distributed_table('insert_select_varchar_test', 'key', 'hash');
|
||||
|
||||
-- set back to the defaults
|
||||
SET citus.shard_count = DEFAULT;
|
||||
SET citus.shard_replication_factor = DEFAULT;
|
||||
|
@ -1063,7 +1066,193 @@ ON (f.id = f2.id)
|
|||
WHERE f.id IN (SELECT value_1
|
||||
FROM raw_events_second);
|
||||
|
||||
-- some more semi-anti join tests
|
||||
|
||||
-- join in where
|
||||
INSERT INTO raw_events_second
|
||||
(user_id)
|
||||
SELECT user_id
|
||||
FROM raw_events_first
|
||||
WHERE user_id IN (SELECT raw_events_second.user_id
|
||||
FROM raw_events_second, raw_events_first
|
||||
WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200);
|
||||
|
||||
-- we cannot push this down since it is NOT IN
|
||||
INSERT INTO raw_events_second
|
||||
(user_id)
|
||||
SELECT user_id
|
||||
FROM raw_events_first
|
||||
WHERE user_id NOT IN (SELECT raw_events_second.user_id
|
||||
FROM raw_events_second, raw_events_first
|
||||
WHERE raw_events_second.user_id = raw_events_first.user_id AND raw_events_first.user_id = 200);
|
||||
|
||||
|
||||
-- safe to push down
|
||||
INSERT INTO raw_events_second
|
||||
(user_id)
|
||||
SELECT user_id
|
||||
FROM raw_events_first
|
||||
WHERE EXISTS (SELECT 1
|
||||
FROM raw_events_second
|
||||
WHERE raw_events_second.user_id =raw_events_first.user_id);
|
||||
|
||||
-- we cannot push down
|
||||
INSERT INTO raw_events_second
|
||||
(user_id)
|
||||
SELECT user_id
|
||||
FROM raw_events_first
|
||||
WHERE NOT EXISTS (SELECT 1
|
||||
FROM raw_events_second
|
||||
WHERE raw_events_second.user_id =raw_events_first.user_id);
|
||||
|
||||
|
||||
-- more complex LEFT JOINs
|
||||
INSERT INTO agg_events
|
||||
(user_id, value_4_agg)
|
||||
SELECT
|
||||
outer_most.id, max(outer_most.value)
|
||||
FROM
|
||||
(
|
||||
SELECT f2.id as id, f2.v4 as value FROM
|
||||
(SELECT
|
||||
id
|
||||
FROM (SELECT raw_events_first.user_id AS id
|
||||
FROM raw_events_first LEFT JOIN
|
||||
reference_table
|
||||
ON (raw_events_first.user_id = reference_table.user_id)) AS foo) as f
|
||||
LEFT JOIN
|
||||
(SELECT v4,
|
||||
v1,
|
||||
id
|
||||
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||
SUM(raw_events_first.value_1) AS v1,
|
||||
raw_events_second.user_id AS id
|
||||
FROM raw_events_first,
|
||||
raw_events_second
|
||||
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||
GROUP BY raw_events_second.user_id
|
||||
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||
ON (f.id = f2.id)) as outer_most
|
||||
GROUP BY
|
||||
outer_most.id;
|
||||
|
||||
|
||||
-- cannot push down since the f.id IN is matched with value_1
|
||||
INSERT INTO raw_events_second
|
||||
(user_id)
|
||||
SELECT user_id
|
||||
FROM raw_events_first
|
||||
WHERE user_id IN (
|
||||
SELECT f2.id FROM
|
||||
(SELECT
|
||||
id
|
||||
FROM (SELECT reference_table.user_id AS id
|
||||
FROM raw_events_first,
|
||||
reference_table
|
||||
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
||||
INNER JOIN
|
||||
(SELECT v4,
|
||||
v1,
|
||||
id
|
||||
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||
SUM(raw_events_first.value_1) AS v1,
|
||||
raw_events_second.user_id AS id
|
||||
FROM raw_events_first,
|
||||
raw_events_second
|
||||
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||
GROUP BY raw_events_second.user_id
|
||||
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||
ON (f.id = f2.id)
|
||||
WHERE f.id IN (SELECT value_1
|
||||
FROM raw_events_second));
|
||||
|
||||
-- same as above, but this time is it safe to push down since
|
||||
-- f.id IN is matched with user_id
|
||||
INSERT INTO raw_events_second
|
||||
(user_id)
|
||||
SELECT user_id
|
||||
FROM raw_events_first
|
||||
WHERE user_id IN (
|
||||
SELECT f2.id FROM
|
||||
(SELECT
|
||||
id
|
||||
FROM (SELECT reference_table.user_id AS id
|
||||
FROM raw_events_first,
|
||||
reference_table
|
||||
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
||||
INNER JOIN
|
||||
(SELECT v4,
|
||||
v1,
|
||||
id
|
||||
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||
SUM(raw_events_first.value_1) AS v1,
|
||||
raw_events_second.user_id AS id
|
||||
FROM raw_events_first,
|
||||
raw_events_second
|
||||
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||
GROUP BY raw_events_second.user_id
|
||||
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||
ON (f.id = f2.id)
|
||||
WHERE f.id IN (SELECT user_id
|
||||
FROM raw_events_second));
|
||||
|
||||
-- cannot push down since top level user_id is matched with NOT IN
|
||||
INSERT INTO raw_events_second
|
||||
(user_id)
|
||||
SELECT user_id
|
||||
FROM raw_events_first
|
||||
WHERE user_id NOT IN (
|
||||
SELECT f2.id FROM
|
||||
(SELECT
|
||||
id
|
||||
FROM (SELECT reference_table.user_id AS id
|
||||
FROM raw_events_first,
|
||||
reference_table
|
||||
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
||||
INNER JOIN
|
||||
(SELECT v4,
|
||||
v1,
|
||||
id
|
||||
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||
SUM(raw_events_first.value_1) AS v1,
|
||||
raw_events_second.user_id AS id
|
||||
FROM raw_events_first,
|
||||
raw_events_second
|
||||
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||
GROUP BY raw_events_second.user_id
|
||||
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||
ON (f.id = f2.id)
|
||||
WHERE f.id IN (SELECT user_id
|
||||
FROM raw_events_second));
|
||||
|
||||
-- cannot push down since join is not equi join (f.id > f2.id)
|
||||
INSERT INTO raw_events_second
|
||||
(user_id)
|
||||
SELECT user_id
|
||||
FROM raw_events_first
|
||||
WHERE user_id IN (
|
||||
SELECT f2.id FROM
|
||||
(SELECT
|
||||
id
|
||||
FROM (SELECT reference_table.user_id AS id
|
||||
FROM raw_events_first,
|
||||
reference_table
|
||||
WHERE raw_events_first.user_id = reference_table.user_id ) AS foo) as f
|
||||
INNER JOIN
|
||||
(SELECT v4,
|
||||
v1,
|
||||
id
|
||||
FROM (SELECT SUM(raw_events_second.value_4) AS v4,
|
||||
SUM(raw_events_first.value_1) AS v1,
|
||||
raw_events_second.user_id AS id
|
||||
FROM raw_events_first,
|
||||
raw_events_second
|
||||
WHERE raw_events_first.user_id = raw_events_second.user_id
|
||||
GROUP BY raw_events_second.user_id
|
||||
HAVING SUM(raw_events_second.value_4) > 10) AS foo2 ) as f2
|
||||
ON (f.id > f2.id)
|
||||
WHERE f.id IN (SELECT user_id
|
||||
FROM raw_events_second));
|
||||
|
||||
-- we currently not support grouping sets
|
||||
INSERT INTO agg_events
|
||||
|
@ -1198,8 +1387,30 @@ SET client_min_messages TO DEBUG2;
|
|||
-- this should also work
|
||||
INSERT INTO raw_events_first SELECT * FROM raw_events_second WHERE user_id = 5;
|
||||
|
||||
|
||||
SET client_min_messages TO INFO;
|
||||
|
||||
-- now do some tests with varchars
|
||||
INSERT INTO insert_select_varchar_test VALUES ('test_1', 10);
|
||||
INSERT INTO insert_select_varchar_test VALUES ('test_2', 30);
|
||||
|
||||
INSERT INTO insert_select_varchar_test (key, value)
|
||||
SELECT *, 100
|
||||
FROM (SELECT f1.key
|
||||
FROM (SELECT key
|
||||
FROM insert_select_varchar_test
|
||||
GROUP BY 1
|
||||
HAVING Count(key) < 3) AS f1,
|
||||
(SELECT key
|
||||
FROM insert_select_varchar_test
|
||||
GROUP BY 1
|
||||
HAVING Sum(COALESCE(insert_select_varchar_test.value, 0)) >
|
||||
20.0)
|
||||
AS f2
|
||||
WHERE f1.key = f2.key
|
||||
GROUP BY 1) AS foo;
|
||||
|
||||
SELECT * FROM insert_select_varchar_test;
|
||||
|
||||
-- some tests with DEFAULT columns and constant values
|
||||
-- this test is mostly importantly intended for deparsing the query correctly
|
||||
-- but still it is preferable to have this test here instead of multi_deparse_shard_query
|
||||
|
|
|
@ -0,0 +1,420 @@
|
|||
------------------------------------
|
||||
------------------------------------
|
||||
-- Vanilla funnel query
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
INSERT INTO agg_results (user_id, value_1_agg)
|
||||
SELECT user_id, array_length(events_table, 1)
|
||||
FROM (
|
||||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||
FROM (
|
||||
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
) t
|
||||
GROUP BY user_id
|
||||
) q;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Funnel grouped by whether or not a user has done an event
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
|
||||
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg )
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||
FROM (
|
||||
SELECT
|
||||
t1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||
FROM (
|
||||
(
|
||||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
)
|
||||
UNION
|
||||
(
|
||||
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (103, 104, 105)
|
||||
)
|
||||
) t1 LEFT JOIN (
|
||||
SELECT DISTINCT user_id,
|
||||
'Has done event'::TEXT AS hasdone_event
|
||||
FROM events_table AS e
|
||||
|
||||
WHERE e.user_id >= 10
|
||||
AND e.user_id <= 25
|
||||
AND e.event_type IN (106, 107, 108)
|
||||
|
||||
) t2 ON (t1.user_id = t2.user_id)
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Funnel, grouped by the number of times a user has done an event
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
|
||||
TRUNCATE agg_results;
|
||||
|
||||
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
avg(array_length(events_table, 1)) AS event_average,
|
||||
count_pay
|
||||
FROM (
|
||||
SELECT
|
||||
subquery_1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(count_pay, 0) AS count_pay
|
||||
FROM
|
||||
(
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>1'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
)
|
||||
UNION
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>2'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 12 AND events_table.event_type < 14
|
||||
)
|
||||
) AS subquery_1
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
user_id,
|
||||
COUNT(*) AS count_pay
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 15 AND users_table.value_1 < 17
|
||||
GROUP BY
|
||||
user_id
|
||||
HAVING
|
||||
COUNT(*) > 1) AS subquery_2
|
||||
ON
|
||||
subquery_1.user_id = subquery_2.user_id
|
||||
GROUP BY
|
||||
subquery_1.user_id,
|
||||
count_pay) AS subquery_top
|
||||
WHERE
|
||||
array_ndims(events_table) > 0
|
||||
GROUP BY
|
||||
count_pay, user_id
|
||||
ORDER BY
|
||||
count_pay;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Most recently seen users_table events_table
|
||||
------------------------------------
|
||||
-- Note that we don't use ORDER BY/LIMIT yet
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
|
||||
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
user_lastseen,
|
||||
array_length(event_array, 1)
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
max(u.time) as user_lastseen,
|
||||
array_agg(event_type ORDER BY u.time) AS event_array
|
||||
FROM (
|
||||
|
||||
SELECT user_id, time
|
||||
FROM users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||
|
||||
) u LEFT JOIN LATERAL (
|
||||
SELECT event_type, time
|
||||
FROM events_table
|
||||
WHERE user_id = u.user_id AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
) t ON true
|
||||
GROUP BY user_id
|
||||
) AS shard_union
|
||||
ORDER BY user_lastseen DESC;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Count the number of distinct users_table who are in segment X and Y and Z
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
|
||||
TRUNCATE agg_results;
|
||||
|
||||
INSERT INTO agg_results (user_id)
|
||||
SELECT DISTINCT user_id
|
||||
FROM users_table
|
||||
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Count the number of distinct users_table who are in at least two of X and Y and Z segments
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
|
||||
INSERT INTO agg_results(user_id)
|
||||
SELECT user_id
|
||||
FROM users_table
|
||||
WHERE (value_1 = 10
|
||||
OR value_1 = 11
|
||||
OR value_1 = 12)
|
||||
GROUP BY user_id
|
||||
HAVING count(distinct value_1) >= 2;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find customers who have done X, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
|
||||
INSERT INTO agg_results(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who haven’t done X, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
|
||||
INSERT INTO agg_results(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 = 101
|
||||
AND value_2 >= 5
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X and Y, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
|
||||
INSERT INTO agg_results(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 100
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type!=100 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
|
||||
INSERT INTO agg_results(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
|
||||
INSERT INTO agg_results(user_id, value_2_agg)
|
||||
SELECT user_id,
|
||||
value_2
|
||||
FROM users_table
|
||||
WHERE value_1 > 100
|
||||
AND value_1 < 124
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id
|
||||
FROM events_table
|
||||
WHERE event_type > 100
|
||||
AND event_type < 124
|
||||
AND value_3 > 100
|
||||
AND user_id = users_table.user_id
|
||||
GROUP BY user_id
|
||||
HAVING Count(*) > 2);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find me all users_table who logged in more than once
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
|
||||
INSERT INTO agg_results(user_id, value_1_agg)
|
||||
SELECT user_id, value_1 from
|
||||
(
|
||||
SELECT user_id, value_1 From users_table
|
||||
WHERE value_2 > 100 and user_id = 15 GROUP BY value_1, user_id HAVING count(*) > 1
|
||||
) as a;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find me all users_table who has done some event and has filters
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
|
||||
INSERT INTO agg_results(user_id)
|
||||
Select user_id
|
||||
From events_table
|
||||
Where event_type = 16
|
||||
And value_2 > 50
|
||||
And user_id in
|
||||
(select user_id
|
||||
From users_table
|
||||
Where value_1 = 15
|
||||
And value_2 > 25);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Which events_table did people who has done some specific events_table
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
|
||||
INSERT INTO agg_results(user_id, value_1_agg)
|
||||
SELECT user_id, event_type FROM events_table
|
||||
WHERE user_id in (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
|
||||
GROUP BY user_id, event_type;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find me all the users_table who has done some event more than three times
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
|
||||
INSERT INTO agg_results(user_id)
|
||||
select user_id from
|
||||
(
|
||||
select
|
||||
user_id
|
||||
from
|
||||
events_table
|
||||
where event_type = 901 group by user_id having count(*) > 3
|
||||
) as a;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find my assets that have the highest probability and fetch their metadata
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results;
|
||||
|
||||
INSERT INTO agg_results(user_id, value_1_agg, value_3_agg)
|
||||
SELECT
|
||||
users_table.user_id, users_table.value_1, prob
|
||||
FROM
|
||||
users_table
|
||||
JOIN
|
||||
(SELECT
|
||||
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||||
FROM
|
||||
users_table AS ma, events_table as short_list
|
||||
WHERE
|
||||
short_list.user_id = ma.user_id and ma.value_1 < 50 and short_list.event_type < 50
|
||||
) temp
|
||||
ON users_table.user_id = temp.user_id
|
||||
WHERE users_table.value_1 < 50;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||||
|
|
@ -0,0 +1,401 @@
|
|||
------------------------------------
|
||||
------------------------------------
|
||||
-- Vanilla funnel query -- single shard
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
|
||||
INSERT INTO agg_results_second (user_id, value_1_agg)
|
||||
SELECT user_id, array_length(events_table, 1)
|
||||
FROM (
|
||||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||
FROM (
|
||||
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
) t
|
||||
GROUP BY user_id
|
||||
) q
|
||||
WHERE user_id = 20;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Vanilla funnel query -- two shards
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
|
||||
INSERT INTO agg_results_second (user_id, value_1_agg)
|
||||
SELECT user_id, array_length(events_table, 1)
|
||||
FROM (
|
||||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||
FROM (
|
||||
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id AND
|
||||
(u.user_id = 13 OR u.user_id = 20) AND
|
||||
(e.user_id = 13 OR e.user_id = 20)
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
) t
|
||||
GROUP BY user_id
|
||||
) q
|
||||
WHERE (user_id = 13 OR user_id = 20);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Funnel grouped by whether or not a user has done an event -- single shard query
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
|
||||
INSERT INTO agg_results_second (user_id, value_1_agg, value_2_agg )
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||
FROM (
|
||||
SELECT
|
||||
t1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||
FROM (
|
||||
(
|
||||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
)
|
||||
UNION
|
||||
(
|
||||
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (103, 104, 105)
|
||||
)
|
||||
) t1 LEFT JOIN (
|
||||
SELECT DISTINCT user_id,
|
||||
'Has done event'::TEXT AS hasdone_event
|
||||
FROM events_table AS e
|
||||
|
||||
WHERE e.user_id >= 10
|
||||
AND e.user_id <= 25
|
||||
AND e.event_type IN (106, 107, 108)
|
||||
) t2 ON (t1.user_id = t2.user_id)
|
||||
WHERE t1.user_id = 20
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event;
|
||||
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Funnel grouped by whether or not a user has done an event -- two shards query
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
|
||||
INSERT INTO agg_results_second (user_id, value_1_agg, value_2_agg )
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||
FROM (
|
||||
SELECT
|
||||
t1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||
FROM (
|
||||
(
|
||||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND (e.user_id = 20 OR e.user_id = 17)
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
)
|
||||
UNION
|
||||
(
|
||||
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND (e.user_id = 20 OR e.user_id = 17)
|
||||
AND e.event_type IN (103, 104, 105)
|
||||
)
|
||||
) t1 LEFT JOIN (
|
||||
SELECT DISTINCT user_id,
|
||||
'Has done event'::TEXT AS hasdone_event
|
||||
FROM events_table AS e
|
||||
|
||||
WHERE
|
||||
(e.user_id = 20 OR e.user_id = 17)
|
||||
AND e.event_type IN (106, 107, 108)
|
||||
) t2 ON (t1.user_id = t2.user_id)
|
||||
WHERE (t1.user_id = 20 OR t1.user_id = 17)
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Most recently seen users_table events_table -- single shard query
|
||||
------------------------------------
|
||||
-- Note that we don't use ORDER BY/LIMIT yet
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
|
||||
INSERT INTO agg_results_second (user_id, agg_time, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
user_lastseen,
|
||||
array_length(event_array, 1)
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
max(u.time) as user_lastseen,
|
||||
array_agg(event_type ORDER BY u.time) AS event_array
|
||||
FROM (
|
||||
|
||||
SELECT user_id, time
|
||||
FROM users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||
|
||||
) u LEFT JOIN LATERAL (
|
||||
SELECT event_type, time
|
||||
FROM events_table
|
||||
WHERE user_id = u.user_id AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
) t ON true
|
||||
WHERE user_id = 65
|
||||
GROUP BY user_id
|
||||
) AS shard_union
|
||||
ORDER BY user_lastseen DESC;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Most recently seen users_table events_table -- two shards query
|
||||
------------------------------------
|
||||
-- Note that we don't use ORDER BY/LIMIT yet
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
|
||||
INSERT INTO agg_results_second (user_id, agg_time, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
user_lastseen,
|
||||
array_length(event_array, 1)
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
max(u.time) as user_lastseen,
|
||||
array_agg(event_type ORDER BY u.time) AS event_array
|
||||
FROM (
|
||||
|
||||
SELECT user_id, time
|
||||
FROM users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
(user_id = 65 OR user_id = 12) AND
|
||||
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||
|
||||
) u LEFT JOIN LATERAL (
|
||||
SELECT event_type, time
|
||||
FROM events_table
|
||||
WHERE user_id = u.user_id AND (user_id = 65 OR user_id = 12) AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
) t ON true
|
||||
WHERE (user_id = 65 OR user_id = 12)
|
||||
GROUP BY user_id
|
||||
) AS shard_union
|
||||
ORDER BY user_lastseen DESC;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Count the number of distinct users_table who are in segment X and Y and Z -- single shard
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
|
||||
TRUNCATE agg_results_second;
|
||||
|
||||
INSERT INTO agg_results_second (user_id)
|
||||
SELECT DISTINCT user_id
|
||||
FROM users_table
|
||||
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60)
|
||||
AND user_id = 7;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Count the number of distinct users_table who are in segment X and Y and Z -- two shards
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
|
||||
TRUNCATE agg_results_second;
|
||||
|
||||
INSERT INTO agg_results_second (user_id)
|
||||
SELECT DISTINCT user_id
|
||||
FROM users_table
|
||||
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20 AND (user_id = 7 OR user_id = 20))
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40 AND (user_id = 7 OR user_id = 20))
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60 AND (user_id = 7 OR user_id = 20))
|
||||
AND (user_id = 7 OR user_id = 20);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find customers who have done X, and satisfy other customer specific criteria -- single shard
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
|
||||
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||
AND user_id = 61;
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find customers who have done X, and satisfy other customer specific criteria -- two shards
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
|
||||
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND (user_id = 61 OR user_id = 51) AND user_id=users_table.user_id)
|
||||
AND (user_id = 61 OR user_id = 51);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria -- single shard
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
|
||||
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_2 >= 5
|
||||
AND user_id = 96
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria -- two shards
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
|
||||
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_2 >= 5
|
||||
AND (user_id = 96 OR user_id = 8)
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id=users_table.user_id AND (user_id = 96 OR user_id = 8))
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id=users_table.user_id AND (user_id = 96 OR user_id = 8));
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria -- single shard
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
|
||||
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||
SELECT user_id,
|
||||
value_2
|
||||
FROM users_table
|
||||
WHERE value_1 > 100
|
||||
AND value_1 < 124
|
||||
AND value_2 >= 5
|
||||
AND user_id = 47
|
||||
AND EXISTS (SELECT user_id
|
||||
FROM events_table
|
||||
WHERE event_type > 100
|
||||
AND event_type < 124
|
||||
AND value_3 > 100
|
||||
AND user_id = users_table.user_id
|
||||
AND user_id = 47
|
||||
GROUP BY user_id
|
||||
HAVING Count(*) > 2);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria -- two shards
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
TRUNCATE agg_results_second;
|
||||
|
||||
INSERT INTO agg_results_second(user_id, value_2_agg)
|
||||
SELECT user_id,
|
||||
value_2
|
||||
FROM users_table
|
||||
WHERE value_1 > 100
|
||||
AND value_1 < 124
|
||||
AND value_2 >= 5
|
||||
AND (user_id = 47 or user_id = 81)
|
||||
AND EXISTS (SELECT user_id
|
||||
FROM events_table
|
||||
WHERE event_type > 100
|
||||
AND event_type < 124
|
||||
AND value_3 > 100
|
||||
AND user_id = users_table.user_id
|
||||
AND (user_id = 47 or user_id = 81)
|
||||
GROUP BY user_id
|
||||
HAVING Count(*) > 2);
|
||||
|
||||
-- get some statistics from the aggregated results to ensure the results are correct
|
||||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results_second;
|
||||
|
|
@ -0,0 +1,651 @@
|
|||
------------------------------------
|
||||
------------------------------------
|
||||
-- Vanilla funnel query
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
|
||||
-- not pushable since the JOIN is not an equi join
|
||||
INSERT INTO agg_results_third (user_id, value_1_agg)
|
||||
SELECT user_id, array_length(events_table, 1)
|
||||
FROM (
|
||||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||||
FROM (
|
||||
SELECT u.user_id, e.event_type::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id != e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
) t
|
||||
GROUP BY user_id
|
||||
) q;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Funnel grouped by whether or not a user has done an event
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
|
||||
-- not pushable since the JOIN is not an equi join left part of the UNION
|
||||
-- is not equi join
|
||||
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg )
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||
FROM (
|
||||
SELECT
|
||||
t1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||
FROM (
|
||||
(
|
||||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id != e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
)
|
||||
UNION
|
||||
(
|
||||
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (103, 104, 105)
|
||||
)
|
||||
) t1 LEFT JOIN (
|
||||
SELECT DISTINCT user_id,
|
||||
'Has done event'::TEXT AS hasdone_event
|
||||
FROM events_table AS e
|
||||
|
||||
WHERE e.user_id >= 10
|
||||
AND e.user_id <= 25
|
||||
AND e.event_type IN (106, 107, 108)
|
||||
|
||||
) t2 ON (t1.user_id = t2.user_id)
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event;
|
||||
|
||||
-- not pushable since the JOIN is not an equi join right part of the UNION
|
||||
-- is not joined on the partition key
|
||||
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg )
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||
FROM (
|
||||
SELECT
|
||||
t1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||
FROM (
|
||||
(
|
||||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
)
|
||||
UNION
|
||||
(
|
||||
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.event_type
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (103, 104, 105)
|
||||
)
|
||||
) t1 LEFT JOIN (
|
||||
SELECT DISTINCT user_id,
|
||||
'Has done event'::TEXT AS hasdone_event
|
||||
FROM events_table AS e
|
||||
|
||||
WHERE e.user_id >= 10
|
||||
AND e.user_id <= 25
|
||||
AND e.event_type IN (106, 107, 108)
|
||||
|
||||
) t2 ON (t1.user_id = t2.user_id)
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event;
|
||||
|
||||
-- the LEFT JOIN conditon is not on the partition column (i.e., is it part_key divided by 2)
|
||||
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg )
|
||||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||||
FROM (
|
||||
SELECT
|
||||
t1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||||
FROM (
|
||||
(
|
||||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (100, 101, 102)
|
||||
)
|
||||
UNION
|
||||
(
|
||||
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||||
FROM users_table AS u,
|
||||
events_table AS e
|
||||
WHERE u.user_id = e.user_id
|
||||
AND u.user_id >= 10
|
||||
AND u.user_id <= 25
|
||||
AND e.event_type IN (103, 104, 105)
|
||||
)
|
||||
) t1 LEFT JOIN (
|
||||
SELECT DISTINCT user_id,
|
||||
'Has done event'::TEXT AS hasdone_event
|
||||
FROM events_table AS e
|
||||
|
||||
WHERE e.user_id >= 10
|
||||
AND e.user_id <= 25
|
||||
AND e.event_type IN (106, 107, 108)
|
||||
|
||||
) t2 ON (t1.user_id = (t2.user_id)/2)
|
||||
GROUP BY t1.user_id, hasdone_event
|
||||
) t GROUP BY user_id, hasdone_event;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Funnel, grouped by the number of times a user has done an event
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
|
||||
-- not pushable since the right of the UNION query is not joined on
|
||||
-- the partition key
|
||||
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
avg(array_length(events_table, 1)) AS event_average,
|
||||
count_pay
|
||||
FROM (
|
||||
SELECT
|
||||
subquery_1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(count_pay, 0) AS count_pay
|
||||
FROM
|
||||
(
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>1'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
)
|
||||
UNION
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>2'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id != events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 12 AND events_table.event_type < 14
|
||||
)
|
||||
) AS subquery_1
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
user_id,
|
||||
COUNT(*) AS count_pay
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 15 AND users_table.value_1 < 17
|
||||
GROUP BY
|
||||
user_id
|
||||
HAVING
|
||||
COUNT(*) > 1) AS subquery_2
|
||||
ON
|
||||
subquery_1.user_id = subquery_2.user_id
|
||||
GROUP BY
|
||||
subquery_1.user_id,
|
||||
count_pay) AS subquery_top
|
||||
WHERE
|
||||
array_ndims(events_table) > 0
|
||||
GROUP BY
|
||||
count_pay, user_id
|
||||
ORDER BY
|
||||
count_pay;
|
||||
|
||||
-- not pushable since the JOIN condition is not equi JOIN
|
||||
-- (subquery_1 JOIN subquery_2)
|
||||
INSERT INTO agg_results_third (user_id, value_1_agg, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
avg(array_length(events_table, 1)) AS event_average,
|
||||
count_pay
|
||||
FROM (
|
||||
SELECT
|
||||
subquery_1.user_id,
|
||||
array_agg(event ORDER BY time) AS events_table,
|
||||
COALESCE(count_pay, 0) AS count_pay
|
||||
FROM
|
||||
(
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>1'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
)
|
||||
UNION
|
||||
(SELECT
|
||||
users_table.user_id,
|
||||
'action=>2'AS event,
|
||||
events_table.time
|
||||
FROM
|
||||
users_table,
|
||||
events_table
|
||||
WHERE
|
||||
users_table.user_id = events_table.user_id AND
|
||||
users_table.user_id >= 10 AND
|
||||
users_table.user_id <= 70 AND
|
||||
events_table.event_type > 12 AND events_table.event_type < 14
|
||||
)
|
||||
) AS subquery_1
|
||||
LEFT JOIN
|
||||
(SELECT
|
||||
user_id,
|
||||
COUNT(*) AS count_pay
|
||||
FROM
|
||||
users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 15 AND users_table.value_1 < 17
|
||||
GROUP BY
|
||||
user_id
|
||||
HAVING
|
||||
COUNT(*) > 1) AS subquery_2
|
||||
ON
|
||||
subquery_1.user_id > subquery_2.user_id
|
||||
GROUP BY
|
||||
subquery_1.user_id,
|
||||
count_pay) AS subquery_top
|
||||
WHERE
|
||||
array_ndims(events_table) > 0
|
||||
GROUP BY
|
||||
count_pay, user_id
|
||||
ORDER BY
|
||||
count_pay;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Most recently seen users_table events_table
|
||||
------------------------------------
|
||||
-- Note that we don't use ORDER BY/LIMIT yet
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- not pushable since lateral join is not an equi join
|
||||
INSERT INTO agg_results_third (user_id, agg_time, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
user_lastseen,
|
||||
array_length(event_array, 1)
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
max(u.time) as user_lastseen,
|
||||
array_agg(event_type ORDER BY u.time) AS event_array
|
||||
FROM (
|
||||
|
||||
SELECT user_id, time
|
||||
FROM users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||
|
||||
) u LEFT JOIN LATERAL (
|
||||
SELECT event_type, time
|
||||
FROM events_table
|
||||
WHERE user_id != u.user_id AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
) t ON true
|
||||
GROUP BY user_id
|
||||
) AS shard_union
|
||||
ORDER BY user_lastseen DESC;
|
||||
|
||||
-- not pushable since lateral join is not on the partition key
|
||||
INSERT INTO agg_results_third (user_id, agg_time, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
user_lastseen,
|
||||
array_length(event_array, 1)
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
max(u.time) as user_lastseen,
|
||||
array_agg(event_type ORDER BY u.time) AS event_array
|
||||
FROM (
|
||||
|
||||
SELECT user_id, time
|
||||
FROM users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||
|
||||
) u LEFT JOIN LATERAL (
|
||||
SELECT event_type, time
|
||||
FROM events_table
|
||||
WHERE event_type = u.user_id AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
) t ON true
|
||||
GROUP BY user_id
|
||||
) AS shard_union
|
||||
ORDER BY user_lastseen DESC;
|
||||
|
||||
-- not pushable since lateral join is not on the partition key
|
||||
INSERT INTO agg_results_third (user_id, agg_time, value_2_agg)
|
||||
SELECT
|
||||
user_id,
|
||||
user_lastseen,
|
||||
array_length(event_array, 1)
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
max(u.time) as user_lastseen,
|
||||
array_agg(event_type ORDER BY u.time) AS event_array
|
||||
FROM (
|
||||
|
||||
SELECT user_id, time, value_3 as val_3
|
||||
FROM users_table
|
||||
WHERE
|
||||
user_id >= 10 AND
|
||||
user_id <= 70 AND
|
||||
users_table.value_1 > 10 AND users_table.value_1 < 12
|
||||
|
||||
) u LEFT JOIN LATERAL (
|
||||
SELECT event_type, time
|
||||
FROM events_table
|
||||
WHERE event_type = u.val_3 AND
|
||||
events_table.event_type > 10 AND events_table.event_type < 12
|
||||
) t ON true
|
||||
GROUP BY user_id
|
||||
) AS shard_union
|
||||
ORDER BY user_lastseen DESC;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Count the number of distinct users_table who are in segment X and Y and Z
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
|
||||
-- not pushable since partition key is NOT IN
|
||||
INSERT INTO agg_results_third (user_id)
|
||||
SELECT DISTINCT user_id
|
||||
FROM users_table
|
||||
WHERE user_id NOT IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||
|
||||
-- not pushable since partition key is not selected from the second subquery
|
||||
INSERT INTO agg_results_third (user_id)
|
||||
SELECT DISTINCT user_id
|
||||
FROM users_table
|
||||
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||
AND user_id IN (SELECT value_1 FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||
|
||||
-- not pushable since second subquery does not return bare partition key
|
||||
INSERT INTO agg_results_third (user_id)
|
||||
SELECT DISTINCT user_id
|
||||
FROM users_table
|
||||
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 10 AND value_1 <= 20)
|
||||
AND user_id IN (SELECT 3 * user_id FROM users_table WHERE value_1 >= 30 AND value_1 <= 40)
|
||||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 50 AND value_1 <= 60);
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find customers who have done X, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
|
||||
-- not pushable since join is not an euqi join
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND user_id!=users_table.user_id);
|
||||
|
||||
-- not pushable since the join is not on the partition key
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 101 AND value_1 < 110
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>101 AND event_type < 110 AND value_3 > 100 AND event_type = users_table.user_id);
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who haven’t done X, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- not pushable since the join is not an equi join
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 = 101
|
||||
AND value_2 >= 5
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id!=users_table.user_id);
|
||||
|
||||
-- not pushable since the join is not the partition key
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 = 101
|
||||
AND value_2 >= 5
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND event_type=users_table.user_id);
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X and Y, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- not pushable since the second join is not on the partition key
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_1 > 100
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type!=100 AND value_3 > 100 AND user_id=users_table.user_id)
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type=101 AND value_3 > 100 AND user_id!=users_table.user_id);
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
|
||||
-- not pushable since the first join is not on the partition key
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id, value_2 FROM users_table WHERE
|
||||
value_2 >= 5
|
||||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 100 AND event_type <= 300 AND value_3 > 100 AND user_id!=users_table.user_id)
|
||||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 300 AND event_type <= 350 AND value_3 > 100 AND user_id=users_table.user_id);
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
|
||||
-- not pushable since the second join is not an equi join
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id,
|
||||
value_2
|
||||
FROM users_table
|
||||
WHERE value_1 > 100
|
||||
AND value_1 < 124
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id
|
||||
FROM events_table
|
||||
WHERE event_type > 100
|
||||
AND event_type < 124
|
||||
AND value_3 > 100
|
||||
AND user_id != users_table.user_id
|
||||
GROUP BY user_id
|
||||
HAVING Count(*) > 2);
|
||||
|
||||
-- not pushable since the second join is not on the partition key
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id,
|
||||
value_2
|
||||
FROM users_table
|
||||
WHERE value_1 > 100
|
||||
AND value_1 < 124
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id
|
||||
FROM events_table
|
||||
WHERE event_type > 100
|
||||
AND event_type < 124
|
||||
AND value_3 > 100
|
||||
AND event_type = users_table.user_id
|
||||
GROUP BY user_id
|
||||
HAVING Count(*) > 2);
|
||||
|
||||
-- not pushable since the second join is not on the partition key
|
||||
INSERT INTO agg_results_third(user_id, value_2_agg)
|
||||
SELECT user_id,
|
||||
value_2
|
||||
FROM users_table
|
||||
WHERE value_1 > 100
|
||||
AND value_1 < 124
|
||||
AND value_2 >= 5
|
||||
AND EXISTS (SELECT user_id
|
||||
FROM events_table
|
||||
WHERE event_type > 100
|
||||
AND event_type < 124
|
||||
AND value_3 > 100
|
||||
AND user_id = users_table.value_1
|
||||
GROUP BY user_id
|
||||
HAVING Count(*) > 2);
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find me all users_table who has done some event and has filters
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
|
||||
-- not pushable due to NOT IN
|
||||
INSERT INTO agg_results_third(user_id)
|
||||
Select user_id
|
||||
From events_table
|
||||
Where event_type = 16
|
||||
And value_2 > 50
|
||||
And user_id NOT in
|
||||
(select user_id
|
||||
From users_table
|
||||
Where value_1 = 15
|
||||
And value_2 > 25);
|
||||
|
||||
-- not pushable since we're not selecting the partition key
|
||||
INSERT INTO agg_results_third(user_id)
|
||||
Select user_id
|
||||
From events_table
|
||||
Where event_type = 16
|
||||
And value_2 > 50
|
||||
And user_id in
|
||||
(select value_3
|
||||
From users_table
|
||||
Where value_1 = 15
|
||||
And value_2 > 25);
|
||||
|
||||
-- not pushable since we're not selecting the partition key
|
||||
-- from the events table
|
||||
INSERT INTO agg_results_third(user_id)
|
||||
Select user_id
|
||||
From events_table
|
||||
Where event_type = 16
|
||||
And value_2 > 50
|
||||
And event_type in
|
||||
(select user_id
|
||||
From users_table
|
||||
Where value_1 = 15
|
||||
And value_2 > 25);
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Which events_table did people who has done some specific events_table
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
|
||||
-- not pushable due to NOT IN
|
||||
INSERT INTO agg_results_third(user_id, value_1_agg)
|
||||
SELECT user_id, event_type FROM events_table
|
||||
WHERE user_id NOT IN (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
|
||||
GROUP BY user_id, event_type;
|
||||
|
||||
-- not pushable due to not selecting the partition key
|
||||
INSERT INTO agg_results_third(user_id, value_1_agg)
|
||||
SELECT user_id, event_type FROM events_table
|
||||
WHERE user_id IN (SELECT value_2 from events_table WHERE event_type > 500 and event_type < 505)
|
||||
GROUP BY user_id, event_type;
|
||||
|
||||
-- not pushable due to not comparing user id from the events table
|
||||
INSERT INTO agg_results_third(user_id, value_1_agg)
|
||||
SELECT user_id, event_type FROM events_table
|
||||
WHERE event_type IN (SELECT user_id from events_table WHERE event_type > 500 and event_type < 505)
|
||||
GROUP BY user_id, event_type;
|
||||
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
-- Find my assets that have the highest probability and fetch their metadata
|
||||
------------------------------------
|
||||
------------------------------------
|
||||
|
||||
-- not pushable since the join is not an equi join
|
||||
INSERT INTO agg_results_third(user_id, value_1_agg, value_3_agg)
|
||||
SELECT
|
||||
users_table.user_id, users_table.value_1, prob
|
||||
FROM
|
||||
users_table
|
||||
JOIN
|
||||
(SELECT
|
||||
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||||
FROM
|
||||
users_table AS ma, events_table as short_list
|
||||
WHERE
|
||||
short_list.user_id != ma.user_id and ma.value_1 < 50 and short_list.event_type < 50
|
||||
) temp
|
||||
ON users_table.user_id = temp.user_id
|
||||
WHERE users_table.value_1 < 50;
|
||||
|
||||
-- not pushable since the join is not on the partition key
|
||||
INSERT INTO agg_results_third(user_id, value_1_agg, value_3_agg)
|
||||
SELECT
|
||||
users_table.user_id, users_table.value_1, prob
|
||||
FROM
|
||||
users_table
|
||||
JOIN
|
||||
(SELECT
|
||||
ma.user_id, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||||
FROM
|
||||
users_table AS ma, events_table as short_list
|
||||
WHERE
|
||||
short_list.user_id = ma.value_2 and ma.value_1 < 50 and short_list.event_type < 50
|
||||
) temp
|
||||
ON users_table.user_id = temp.user_id
|
||||
WHERE users_table.value_1 < 50;
|
Loading…
Reference in New Issue