mirror of https://github.com/citusdata/citus.git
516 lines
20 KiB
Plaintext
516 lines
20 KiB
Plaintext
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
-- Vanilla funnel query
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
INSERT INTO agg_results (user_id, value_1_agg)
|
||
SELECT user_id, array_length(events_table, 1)
|
||
FROM (
|
||
SELECT user_id, array_agg(event ORDER BY time) AS events_table
|
||
FROM (
|
||
SELECT u.user_id, e.event_type::text AS event, e.time
|
||
FROM users_table AS u,
|
||
events_table AS e
|
||
WHERE u.user_id = e.user_id
|
||
AND u.user_id >= 1
|
||
AND u.user_id <= 2
|
||
AND e.event_type IN (2, 3)
|
||
) t
|
||
GROUP BY user_id
|
||
) q;
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
2 | 2 | 1.5000000000000000
|
||
(1 row)
|
||
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
-- Funnel grouped by whether or not a user has done an event
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg )
|
||
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
|
||
FROM (
|
||
SELECT
|
||
t1.user_id,
|
||
array_agg(event ORDER BY time) AS events_table,
|
||
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
|
||
FROM (
|
||
(
|
||
SELECT u.user_id, 'step=>1'::text AS event, e.time
|
||
FROM users_table AS u,
|
||
events_table AS e
|
||
WHERE u.user_id = e.user_id
|
||
AND u.user_id >= 1
|
||
AND u.user_id <= 2
|
||
AND e.event_type IN (1, 2)
|
||
)
|
||
UNION
|
||
(
|
||
SELECT u.user_id, 'step=>2'::text AS event, e.time
|
||
FROM users_table AS u,
|
||
events_table AS e
|
||
WHERE u.user_id = e.user_id
|
||
AND u.user_id >= 1
|
||
AND u.user_id <= 2
|
||
AND e.event_type IN (3, 4)
|
||
)
|
||
) t1 LEFT JOIN (
|
||
SELECT DISTINCT user_id,
|
||
'Has done event'::TEXT AS hasdone_event
|
||
FROM events_table AS e
|
||
WHERE e.user_id >= 1
|
||
AND e.user_id <= 2
|
||
AND e.event_type IN (5, 6)
|
||
) t2 ON (t1.user_id = t2.user_id)
|
||
GROUP BY t1.user_id, hasdone_event
|
||
) t GROUP BY user_id, hasdone_event;
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
4 | 2 | 1.5000000000000000
|
||
(1 row)
|
||
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
-- Funnel, grouped by the number of times a user has done an event
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
|
||
SELECT
|
||
user_id,
|
||
avg(array_length(events_table, 1)) AS event_average,
|
||
count_pay
|
||
FROM (
|
||
SELECT
|
||
subquery_1.user_id,
|
||
array_agg(event ORDER BY time) AS events_table,
|
||
COALESCE(count_pay, 0) AS count_pay
|
||
FROM
|
||
(
|
||
(SELECT
|
||
users_table.user_id,
|
||
'action=>1'AS event,
|
||
events_table.time
|
||
FROM
|
||
users_table,
|
||
events_table
|
||
WHERE
|
||
users_table.user_id = events_table.user_id AND
|
||
users_table.user_id >= 1 AND
|
||
users_table.user_id <= 3 AND
|
||
events_table.event_type > 0 AND events_table.event_type < 2
|
||
)
|
||
UNION
|
||
(SELECT
|
||
users_table.user_id,
|
||
'action=>2'AS event,
|
||
events_table.time
|
||
FROM
|
||
users_table,
|
||
events_table
|
||
WHERE
|
||
users_table.user_id = events_table.user_id AND
|
||
users_table.user_id >= 1 AND
|
||
users_table.user_id <= 3 AND
|
||
events_table.event_type > 1 AND events_table.event_type < 3
|
||
)
|
||
) AS subquery_1
|
||
LEFT JOIN
|
||
(SELECT
|
||
user_id,
|
||
COUNT(*) AS count_pay
|
||
FROM
|
||
users_table
|
||
WHERE
|
||
user_id >= 1 AND
|
||
user_id <= 3 AND
|
||
users_table.value_1 > 3 AND users_table.value_1 < 5
|
||
GROUP BY
|
||
user_id
|
||
HAVING
|
||
COUNT(*) > 1) AS subquery_2
|
||
ON
|
||
subquery_1.user_id = subquery_2.user_id
|
||
GROUP BY
|
||
subquery_1.user_id,
|
||
count_pay) AS subquery_top
|
||
WHERE
|
||
array_ndims(events_table) > 0
|
||
GROUP BY
|
||
count_pay, user_id
|
||
ORDER BY
|
||
count_pay;
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
7 | 3 | 1.7142857142857143
|
||
(1 row)
|
||
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
-- Most recently seen users_table events_table
|
||
---------------------------------------------------------------------
|
||
-- Note that we don't use ORDER BY/LIMIT yet
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
TRUNCATE agg_results;
|
||
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
|
||
SELECT
|
||
user_id,
|
||
user_lastseen,
|
||
array_length(event_array, 1)
|
||
FROM (
|
||
SELECT
|
||
user_id,
|
||
max(u.time) as user_lastseen,
|
||
array_agg(event_type ORDER BY u.time) AS event_array
|
||
FROM (
|
||
SELECT user_id, time
|
||
FROM users_table
|
||
WHERE
|
||
user_id >= 1 AND
|
||
user_id <= 3 AND
|
||
users_table.value_1 > 3 AND users_table.value_1 < 6
|
||
) u LEFT JOIN LATERAL (
|
||
SELECT event_type, time
|
||
FROM events_table
|
||
WHERE user_id = u.user_id AND
|
||
events_table.event_type > 3 AND events_table.event_type < 6
|
||
) t ON true
|
||
GROUP BY user_id
|
||
) AS shard_union
|
||
ORDER BY user_lastseen DESC;
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
3 | 3 | 2.0000000000000000
|
||
(1 row)
|
||
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
-- Count the number of distinct users_table who are in segment X and Y and Z
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
TRUNCATE agg_results;
|
||
INSERT INTO agg_results (user_id)
|
||
SELECT DISTINCT user_id
|
||
FROM users_table
|
||
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2)
|
||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 3 AND value_1 <= 4)
|
||
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 5 AND value_1 <= 6);
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
5 | 5 | 3.8000000000000000
|
||
(1 row)
|
||
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
-- Count the number of distinct users_table who are in at least two of X and Y and Z segments
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
TRUNCATE agg_results;
|
||
INSERT INTO agg_results(user_id)
|
||
SELECT user_id
|
||
FROM users_table
|
||
WHERE (value_1 = 1
|
||
OR value_1 = 2
|
||
OR value_1 = 3)
|
||
GROUP BY user_id
|
||
HAVING count(distinct value_1) >= 2;
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
6 | 6 | 3.5000000000000000
|
||
(1 row)
|
||
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
-- Find customers who have done X, and satisfy other customer specific criteria
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
TRUNCATE agg_results;
|
||
INSERT INTO agg_results(user_id, value_2_agg)
|
||
SELECT user_id, value_2 FROM users_table WHERE
|
||
value_1 > 1 AND value_1 < 4
|
||
AND value_2 >= 3
|
||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>1 AND event_type < 5 AND value_3 > 2 AND user_id=users_table.user_id);
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
20 | 6 | 3.7500000000000000
|
||
(1 row)
|
||
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
-- Customers who haven’t done X, and satisfy other customer specific criteria
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
TRUNCATE agg_results;
|
||
INSERT INTO agg_results(user_id, value_2_agg)
|
||
SELECT user_id, value_2 FROM users_table WHERE
|
||
value_1 = 1
|
||
AND value_2 >= 2
|
||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=1 AND value_3 > 4 AND user_id=users_table.user_id);
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
4 | 2 | 4.2500000000000000
|
||
(1 row)
|
||
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
-- Customers who have done X and Y, and satisfy other customer specific criteria
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
TRUNCATE agg_results;
|
||
INSERT INTO agg_results(user_id, value_2_agg)
|
||
SELECT user_id, value_2 FROM users_table WHERE
|
||
value_1 > 1
|
||
AND value_2 >= 3
|
||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type!=1 AND value_3 > 1 AND user_id=users_table.user_id)
|
||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type=2 AND value_3 > 1 AND user_id=users_table.user_id);
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
29 | 5 | 3.1034482758620690
|
||
(1 row)
|
||
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
TRUNCATE agg_results;
|
||
INSERT INTO agg_results(user_id, value_2_agg)
|
||
SELECT user_id, value_2 FROM users_table WHERE
|
||
value_2 >= 3
|
||
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type <= 3 AND value_3 > 1 AND user_id=users_table.user_id)
|
||
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 3 AND event_type <= 4 AND value_3 > 1 AND user_id=users_table.user_id);
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
11 | 1 | 5.0000000000000000
|
||
(1 row)
|
||
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
TRUNCATE agg_results;
|
||
INSERT INTO agg_results(user_id, value_2_agg)
|
||
SELECT user_id,
|
||
value_2
|
||
FROM users_table
|
||
WHERE value_1 > 1
|
||
AND value_1 < 3
|
||
AND value_2 >= 1
|
||
AND EXISTS (SELECT user_id
|
||
FROM events_table
|
||
WHERE event_type > 1
|
||
AND event_type < 3
|
||
AND value_3 > 1
|
||
AND user_id = users_table.user_id
|
||
GROUP BY user_id
|
||
HAVING Count(*) > 2);
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
4 | 2 | 3.5000000000000000
|
||
(1 row)
|
||
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
-- Find me all users_table who logged in more than once
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
TRUNCATE agg_results;
|
||
INSERT INTO agg_results(user_id, value_1_agg)
|
||
SELECT user_id, value_1 from
|
||
(
|
||
SELECT user_id, value_1 From users_table
|
||
WHERE value_2 > 1 and user_id = 1 GROUP BY value_1, user_id HAVING count(*) > 1
|
||
) as a;
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
1 | 1 | 1.00000000000000000000
|
||
(1 row)
|
||
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
-- Find me all users_table who has done some event and has filters
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
TRUNCATE agg_results;
|
||
INSERT INTO agg_results(user_id)
|
||
Select user_id
|
||
From events_table
|
||
Where event_type = 2
|
||
And value_2 > 2
|
||
And user_id in
|
||
(select user_id
|
||
From users_table
|
||
Where value_1 = 2
|
||
And value_2 > 1);
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
11 | 4 | 3.1818181818181818
|
||
(1 row)
|
||
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
-- Which events_table did people who has done some specific events_table
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
TRUNCATE agg_results;
|
||
INSERT INTO agg_results(user_id, value_1_agg)
|
||
SELECT user_id, event_type FROM events_table
|
||
WHERE user_id in (SELECT user_id from events_table WHERE event_type > 3 and event_type < 5)
|
||
GROUP BY user_id, event_type;
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
34 | 6 | 3.4411764705882353
|
||
(1 row)
|
||
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
-- Find me all the users_table who has done some event more than three times
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
TRUNCATE agg_results;
|
||
INSERT INTO agg_results(user_id)
|
||
select user_id from
|
||
(
|
||
select
|
||
user_id
|
||
from
|
||
events_table
|
||
where event_type = 4 group by user_id having count(*) > 3
|
||
) as a;
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
4 | 4 | 2.5000000000000000
|
||
(1 row)
|
||
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
-- Find my assets that have the highest probability and fetch their metadata
|
||
---------------------------------------------------------------------
|
||
---------------------------------------------------------------------
|
||
TRUNCATE agg_results;
|
||
INSERT INTO agg_results(user_id, value_1_agg, value_3_agg)
|
||
SELECT
|
||
users_table.user_id, users_table.value_1, prob
|
||
FROM
|
||
users_table
|
||
JOIN
|
||
(SELECT
|
||
ma.user_id, (GREATEST(coalesce(ma.value_4, 0.0) / 250 + GREATEST(1.0))) / 2 AS prob
|
||
FROM
|
||
users_table AS ma, events_table as short_list
|
||
WHERE
|
||
short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 3
|
||
) temp
|
||
ON users_table.user_id = temp.user_id
|
||
WHERE users_table.value_1 < 3;
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
3488 | 6 | 3.5372706422018349
|
||
(1 row)
|
||
|
||
-- DISTINCT in the outer query and DISTINCT in the subquery
|
||
TRUNCATE agg_results;
|
||
INSERT INTO agg_results(user_id)
|
||
SELECT
|
||
DISTINCT users_ids.user_id
|
||
FROM
|
||
(SELECT DISTINCT user_id FROM users_table) as users_ids
|
||
JOIN
|
||
(SELECT
|
||
ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||
FROM
|
||
users_table AS ma, events_table as short_list
|
||
WHERE
|
||
short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 2
|
||
) temp
|
||
ON users_ids.user_id = temp.user_id
|
||
WHERE temp.value_1 < 3;
|
||
-- get some statistics from the aggregated results to ensure the results are correct
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
|
||
count | count | avg
|
||
---------------------------------------------------------------------
|
||
6 | 6 | 3.5000000000000000
|
||
(1 row)
|
||
|
||
-- DISTINCT ON in the outer query and DISTINCT in the subquery
|
||
TRUNCATE agg_results;
|
||
INSERT INTO agg_results(user_id, value_1_agg, value_2_agg)
|
||
SELECT
|
||
DISTINCT ON (users_ids.user_id) users_ids.user_id, temp.value_1, prob
|
||
FROM
|
||
(SELECT DISTINCT user_id FROM users_table) as users_ids
|
||
JOIN
|
||
(SELECT
|
||
ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||
FROM
|
||
users_table AS ma, events_table as short_list
|
||
WHERE
|
||
short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 2
|
||
) temp
|
||
ON users_ids.user_id = temp.user_id
|
||
WHERE temp.value_1 < 3
|
||
ORDER BY 1, 2;
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id), avg(value_1_agg) FROM agg_results;
|
||
count | count | avg | avg
|
||
---------------------------------------------------------------------
|
||
6 | 6 | 3.5000000000000000 | 0.16666666666666666667
|
||
(1 row)
|
||
|
||
-- DISTINCT ON in the outer query and DISTINCT ON in the subquery
|
||
TRUNCATE agg_results;
|
||
INSERT INTO agg_results(user_id, value_1_agg, value_2_agg)
|
||
SELECT
|
||
DISTINCT ON (users_ids.user_id) users_ids.user_id, temp.value_1, prob
|
||
FROM
|
||
(SELECT DISTINCT ON (user_id) user_id, value_2 FROM users_table ORDER BY 1,2) as users_ids
|
||
JOIN
|
||
(SELECT
|
||
ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
|
||
FROM
|
||
users_table AS ma, events_table as short_list
|
||
WHERE
|
||
short_list.user_id = ma.user_id and ma.value_1 < 10 and short_list.event_type < 2
|
||
) temp
|
||
ON users_ids.user_id = temp.user_id
|
||
ORDER BY 1, 2;
|
||
SELECT count(*), count(DISTINCT user_id), avg(user_id), avg(value_1_agg) FROM agg_results;
|
||
count | count | avg | avg
|
||
---------------------------------------------------------------------
|
||
6 | 6 | 3.5000000000000000 | 0.16666666666666666667
|
||
(1 row)
|
||
|