citus/src/test/regress/expected/multi_behavioral_analytics_...

516 lines
20 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

---------------------------------------------------------------------
---------------------------------------------------------------------
-- Vanilla funnel query
---------------------------------------------------------------------
---------------------------------------------------------------------
INSERT INTO agg_results (user_id, value_1_agg)
SELECT user_id, array_length(events_table, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events_table
FROM (
SELECT u.user_id, e.event_type::text AS event, e.time
FROM users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id
AND u.user_id >= 1
AND u.user_id <= 2
AND e.event_type IN (2, 3)
) t
GROUP BY user_id
) q;
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
2 | 2 | 1.5000000000000000
(1 row)
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Funnel grouped by whether or not a user has done an event
---------------------------------------------------------------------
---------------------------------------------------------------------
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg )
SELECT user_id, sum(array_length(events_table, 1)), length(hasdone_event)
FROM (
SELECT
t1.user_id,
array_agg(event ORDER BY time) AS events_table,
COALESCE(hasdone_event, 'Has not done event') AS hasdone_event
FROM (
(
SELECT u.user_id, 'step=>1'::text AS event, e.time
FROM users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id
AND u.user_id >= 1
AND u.user_id <= 2
AND e.event_type IN (1, 2)
)
UNION
(
SELECT u.user_id, 'step=>2'::text AS event, e.time
FROM users_table AS u,
events_table AS e
WHERE u.user_id = e.user_id
AND u.user_id >= 1
AND u.user_id <= 2
AND e.event_type IN (3, 4)
)
) t1 LEFT JOIN (
SELECT DISTINCT user_id,
'Has done event'::TEXT AS hasdone_event
FROM events_table AS e
WHERE e.user_id >= 1
AND e.user_id <= 2
AND e.event_type IN (5, 6)
) t2 ON (t1.user_id = t2.user_id)
GROUP BY t1.user_id, hasdone_event
) t GROUP BY user_id, hasdone_event;
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
4 | 2 | 1.5000000000000000
(1 row)
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Funnel, grouped by the number of times a user has done an event
---------------------------------------------------------------------
---------------------------------------------------------------------
INSERT INTO agg_results (user_id, value_1_agg, value_2_agg)
SELECT
user_id,
avg(array_length(events_table, 1)) AS event_average,
count_pay
FROM (
SELECT
subquery_1.user_id,
array_agg(event ORDER BY time) AS events_table,
COALESCE(count_pay, 0) AS count_pay
FROM
(
(SELECT
users_table.user_id,
'action=>1'AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 1 AND
users_table.user_id <= 3 AND
events_table.event_type > 0 AND events_table.event_type < 2
)
UNION
(SELECT
users_table.user_id,
'action=>2'AS event,
events_table.time
FROM
users_table,
events_table
WHERE
users_table.user_id = events_table.user_id AND
users_table.user_id >= 1 AND
users_table.user_id <= 3 AND
events_table.event_type > 1 AND events_table.event_type < 3
)
) AS subquery_1
LEFT JOIN
(SELECT
user_id,
COUNT(*) AS count_pay
FROM
users_table
WHERE
user_id >= 1 AND
user_id <= 3 AND
users_table.value_1 > 3 AND users_table.value_1 < 5
GROUP BY
user_id
HAVING
COUNT(*) > 1) AS subquery_2
ON
subquery_1.user_id = subquery_2.user_id
GROUP BY
subquery_1.user_id,
count_pay) AS subquery_top
WHERE
array_ndims(events_table) > 0
GROUP BY
count_pay, user_id
ORDER BY
count_pay;
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
7 | 3 | 1.7142857142857143
(1 row)
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Most recently seen users_table events_table
---------------------------------------------------------------------
-- Note that we don't use ORDER BY/LIMIT yet
---------------------------------------------------------------------
---------------------------------------------------------------------
TRUNCATE agg_results;
INSERT INTO agg_results (user_id, agg_time, value_2_agg)
SELECT
user_id,
user_lastseen,
array_length(event_array, 1)
FROM (
SELECT
user_id,
max(u.time) as user_lastseen,
array_agg(event_type ORDER BY u.time) AS event_array
FROM (
SELECT user_id, time
FROM users_table
WHERE
user_id >= 1 AND
user_id <= 3 AND
users_table.value_1 > 3 AND users_table.value_1 < 6
) u LEFT JOIN LATERAL (
SELECT event_type, time
FROM events_table
WHERE user_id = u.user_id AND
events_table.event_type > 3 AND events_table.event_type < 6
) t ON true
GROUP BY user_id
) AS shard_union
ORDER BY user_lastseen DESC;
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
3 | 3 | 2.0000000000000000
(1 row)
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Count the number of distinct users_table who are in segment X and Y and Z
---------------------------------------------------------------------
---------------------------------------------------------------------
TRUNCATE agg_results;
INSERT INTO agg_results (user_id)
SELECT DISTINCT user_id
FROM users_table
WHERE user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 1 AND value_1 <= 2)
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 3 AND value_1 <= 4)
AND user_id IN (SELECT user_id FROM users_table WHERE value_1 >= 5 AND value_1 <= 6);
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
5 | 5 | 3.8000000000000000
(1 row)
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Count the number of distinct users_table who are in at least two of X and Y and Z segments
---------------------------------------------------------------------
---------------------------------------------------------------------
TRUNCATE agg_results;
INSERT INTO agg_results(user_id)
SELECT user_id
FROM users_table
WHERE (value_1 = 1
OR value_1 = 2
OR value_1 = 3)
GROUP BY user_id
HAVING count(distinct value_1) >= 2;
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
6 | 6 | 3.5000000000000000
(1 row)
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Find customers who have done X, and satisfy other customer specific criteria
---------------------------------------------------------------------
---------------------------------------------------------------------
TRUNCATE agg_results;
INSERT INTO agg_results(user_id, value_2_agg)
SELECT user_id, value_2 FROM users_table WHERE
value_1 > 1 AND value_1 < 4
AND value_2 >= 3
AND EXISTS (SELECT user_id FROM events_table WHERE event_type>1 AND event_type < 5 AND value_3 > 2 AND user_id=users_table.user_id);
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
20 | 6 | 3.7500000000000000
(1 row)
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Customers who haven’t done X, and satisfy other customer specific criteria
---------------------------------------------------------------------
---------------------------------------------------------------------
TRUNCATE agg_results;
INSERT INTO agg_results(user_id, value_2_agg)
SELECT user_id, value_2 FROM users_table WHERE
value_1 = 1
AND value_2 >= 2
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type=1 AND value_3 > 4 AND user_id=users_table.user_id);
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
4 | 2 | 4.2500000000000000
(1 row)
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Customers who have done X and Y, and satisfy other customer specific criteria
---------------------------------------------------------------------
---------------------------------------------------------------------
TRUNCATE agg_results;
INSERT INTO agg_results(user_id, value_2_agg)
SELECT user_id, value_2 FROM users_table WHERE
value_1 > 1
AND value_2 >= 3
AND EXISTS (SELECT user_id FROM events_table WHERE event_type!=1 AND value_3 > 1 AND user_id=users_table.user_id)
AND EXISTS (SELECT user_id FROM events_table WHERE event_type=2 AND value_3 > 1 AND user_id=users_table.user_id);
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
29 | 5 | 3.1034482758620690
(1 row)
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Customers who have done X and haven’t done Y, and satisfy other customer specific criteria
---------------------------------------------------------------------
---------------------------------------------------------------------
TRUNCATE agg_results;
INSERT INTO agg_results(user_id, value_2_agg)
SELECT user_id, value_2 FROM users_table WHERE
value_2 >= 3
AND EXISTS (SELECT user_id FROM events_table WHERE event_type > 1 AND event_type <= 3 AND value_3 > 1 AND user_id=users_table.user_id)
AND NOT EXISTS (SELECT user_id FROM events_table WHERE event_type > 3 AND event_type <= 4 AND value_3 > 1 AND user_id=users_table.user_id);
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
11 | 1 | 5.0000000000000000
(1 row)
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Customers who have done X more than 2 times, and satisfy other customer specific criteria
---------------------------------------------------------------------
---------------------------------------------------------------------
TRUNCATE agg_results;
INSERT INTO agg_results(user_id, value_2_agg)
SELECT user_id,
value_2
FROM users_table
WHERE value_1 > 1
AND value_1 < 3
AND value_2 >= 1
AND EXISTS (SELECT user_id
FROM events_table
WHERE event_type > 1
AND event_type < 3
AND value_3 > 1
AND user_id = users_table.user_id
GROUP BY user_id
HAVING Count(*) > 2);
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
4 | 2 | 3.5000000000000000
(1 row)
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Find me all users_table who logged in more than once
---------------------------------------------------------------------
---------------------------------------------------------------------
TRUNCATE agg_results;
INSERT INTO agg_results(user_id, value_1_agg)
SELECT user_id, value_1 from
(
SELECT user_id, value_1 From users_table
WHERE value_2 > 1 and user_id = 1 GROUP BY value_1, user_id HAVING count(*) > 1
) as a;
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
1 | 1 | 1.00000000000000000000
(1 row)
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Find me all users_table who has done some event and has filters
---------------------------------------------------------------------
---------------------------------------------------------------------
TRUNCATE agg_results;
INSERT INTO agg_results(user_id)
Select user_id
From events_table
Where event_type = 2
And value_2 > 2
And user_id in
(select user_id
From users_table
Where value_1 = 2
And value_2 > 1);
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
11 | 4 | 3.1818181818181818
(1 row)
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Which events_table did people who has done some specific events_table
---------------------------------------------------------------------
---------------------------------------------------------------------
TRUNCATE agg_results;
INSERT INTO agg_results(user_id, value_1_agg)
SELECT user_id, event_type FROM events_table
WHERE user_id in (SELECT user_id from events_table WHERE event_type > 3 and event_type < 5)
GROUP BY user_id, event_type;
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
34 | 6 | 3.4411764705882353
(1 row)
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Find me all the users_table who has done some event more than three times
---------------------------------------------------------------------
---------------------------------------------------------------------
TRUNCATE agg_results;
INSERT INTO agg_results(user_id)
select user_id from
(
select
user_id
from
events_table
where event_type = 4 group by user_id having count(*) > 3
) as a;
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
4 | 4 | 2.5000000000000000
(1 row)
---------------------------------------------------------------------
---------------------------------------------------------------------
-- Find my assets that have the highest probability and fetch their metadata
---------------------------------------------------------------------
---------------------------------------------------------------------
TRUNCATE agg_results;
INSERT INTO agg_results(user_id, value_1_agg, value_3_agg)
SELECT
users_table.user_id, users_table.value_1, prob
FROM
users_table
JOIN
(SELECT
ma.user_id, (GREATEST(coalesce(ma.value_4, 0.0) / 250 + GREATEST(1.0))) / 2 AS prob
FROM
users_table AS ma, events_table as short_list
WHERE
short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 3
) temp
ON users_table.user_id = temp.user_id
WHERE users_table.value_1 < 3;
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
3488 | 6 | 3.5372706422018349
(1 row)
-- DISTINCT in the outer query and DISTINCT in the subquery
TRUNCATE agg_results;
INSERT INTO agg_results(user_id)
SELECT
DISTINCT users_ids.user_id
FROM
(SELECT DISTINCT user_id FROM users_table) as users_ids
JOIN
(SELECT
ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
FROM
users_table AS ma, events_table as short_list
WHERE
short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 2
) temp
ON users_ids.user_id = temp.user_id
WHERE temp.value_1 < 3;
-- get some statistics from the aggregated results to ensure the results are correct
SELECT count(*), count(DISTINCT user_id), avg(user_id) FROM agg_results;
count | count | avg
---------------------------------------------------------------------
6 | 6 | 3.5000000000000000
(1 row)
-- DISTINCT ON in the outer query and DISTINCT in the subquery
TRUNCATE agg_results;
INSERT INTO agg_results(user_id, value_1_agg, value_2_agg)
SELECT
DISTINCT ON (users_ids.user_id) users_ids.user_id, temp.value_1, prob
FROM
(SELECT DISTINCT user_id FROM users_table) as users_ids
JOIN
(SELECT
ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
FROM
users_table AS ma, events_table as short_list
WHERE
short_list.user_id = ma.user_id and ma.value_1 < 3 and short_list.event_type < 2
) temp
ON users_ids.user_id = temp.user_id
WHERE temp.value_1 < 3
ORDER BY 1, 2;
SELECT count(*), count(DISTINCT user_id), avg(user_id), avg(value_1_agg) FROM agg_results;
count | count | avg | avg
---------------------------------------------------------------------
6 | 6 | 3.5000000000000000 | 0.16666666666666666667
(1 row)
-- DISTINCT ON in the outer query and DISTINCT ON in the subquery
TRUNCATE agg_results;
INSERT INTO agg_results(user_id, value_1_agg, value_2_agg)
SELECT
DISTINCT ON (users_ids.user_id) users_ids.user_id, temp.value_1, prob
FROM
(SELECT DISTINCT ON (user_id) user_id, value_2 FROM users_table ORDER BY 1,2) as users_ids
JOIN
(SELECT
ma.user_id, ma.value_1, (GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
FROM
users_table AS ma, events_table as short_list
WHERE
short_list.user_id = ma.user_id and ma.value_1 < 10 and short_list.event_type < 2
) temp
ON users_ids.user_id = temp.user_id
ORDER BY 1, 2;
SELECT count(*), count(DISTINCT user_id), avg(user_id), avg(value_1_agg) FROM agg_results;
count | count | avg | avg
---------------------------------------------------------------------
6 | 6 | 3.5000000000000000 | 0.16666666666666666667
(1 row)