mirror of https://github.com/citusdata/citus.git
289 lines
9.4 KiB
Plaintext
289 lines
9.4 KiB
Plaintext
-- Runs the steps in https://docs.microsoft.com/en-us/azure/postgresql/tutorial-design-database-hyperscale-multi-tenant
|
|
SET citus.shard_replication_factor TO 1;
|
|
CREATE TABLE companies (
|
|
id bigserial PRIMARY KEY,
|
|
name text NOT NULL,
|
|
image_url text,
|
|
created_at timestamp without time zone NOT NULL,
|
|
updated_at timestamp without time zone NOT NULL
|
|
);
|
|
CREATE TABLE campaigns (
|
|
id bigserial,
|
|
company_id bigint REFERENCES companies (id),
|
|
name text NOT NULL,
|
|
cost_model text NOT NULL,
|
|
state text NOT NULL,
|
|
monthly_budget bigint,
|
|
blacklisted_site_urls text[],
|
|
created_at timestamp without time zone NOT NULL,
|
|
updated_at timestamp without time zone NOT NULL,
|
|
PRIMARY KEY (company_id, id)
|
|
);
|
|
CREATE TABLE ads (
|
|
id bigserial,
|
|
company_id bigint,
|
|
campaign_id bigint,
|
|
name text NOT NULL,
|
|
image_url text,
|
|
target_url text,
|
|
impressions_count bigint DEFAULT 0,
|
|
clicks_count bigint DEFAULT 0,
|
|
created_at timestamp without time zone NOT NULL,
|
|
updated_at timestamp without time zone NOT NULL,
|
|
PRIMARY KEY (company_id, id),
|
|
FOREIGN KEY (company_id, campaign_id)
|
|
REFERENCES campaigns (company_id, id)
|
|
);
|
|
CREATE TABLE clicks (
|
|
id bigserial,
|
|
company_id bigint,
|
|
ad_id bigint,
|
|
clicked_at timestamp without time zone NOT NULL,
|
|
site_url text NOT NULL,
|
|
cost_per_click_usd numeric(20,10),
|
|
user_ip inet NOT NULL,
|
|
user_data jsonb NOT NULL,
|
|
PRIMARY KEY (company_id, id),
|
|
FOREIGN KEY (company_id, ad_id)
|
|
REFERENCES ads (company_id, id)
|
|
);
|
|
CREATE TABLE impressions (
|
|
id bigserial,
|
|
company_id bigint,
|
|
ad_id bigint,
|
|
seen_at timestamp without time zone NOT NULL,
|
|
site_url text NOT NULL,
|
|
cost_per_impression_usd numeric(20,10),
|
|
user_ip inet NOT NULL,
|
|
user_data jsonb NOT NULL,
|
|
PRIMARY KEY (company_id, id),
|
|
FOREIGN KEY (company_id, ad_id)
|
|
REFERENCES ads (company_id, id)
|
|
);
|
|
begin;
|
|
SELECT create_distributed_table('companies', 'id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT create_distributed_table('campaigns', 'company_id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
rollback;
|
|
SELECT create_distributed_table('companies', 'id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT create_distributed_table('campaigns', 'company_id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT create_distributed_table('ads', 'company_id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT create_distributed_table('clicks', 'company_id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT create_distributed_table('impressions', 'company_id');
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
\set companies_csv_file :abs_srcdir '/data/companies.csv'
|
|
\set campaigns_csv_file :abs_srcdir '/data/campaigns.csv'
|
|
\set ads_csv_file :abs_srcdir '/data/ads.csv'
|
|
\set clicks_csv_file :abs_srcdir '/data/clicks.csv'
|
|
\set impressions_csv_file :abs_srcdir '/data/impressions.csv'
|
|
COPY companies from :'companies_csv_file' with csv;
|
|
COPY campaigns from :'campaigns_csv_file' with csv;
|
|
COPY ads from :'ads_csv_file' with csv;
|
|
COPY clicks from :'clicks_csv_file' with csv;
|
|
COPY impressions from :'impressions_csv_file' with csv;
|
|
SELECT a.campaign_id,
|
|
RANK() OVER (
|
|
PARTITION BY a.campaign_id
|
|
ORDER BY a.campaign_id, count(*) desc
|
|
), count(*) as n_impressions, a.id
|
|
FROM ads as a
|
|
JOIN impressions as i
|
|
ON i.company_id = a.company_id
|
|
AND i.ad_id = a.id
|
|
WHERE a.company_id = 5
|
|
GROUP BY a.campaign_id, a.id
|
|
ORDER BY a.campaign_id, n_impressions desc, a.id
|
|
LIMIT 10;
|
|
campaign_id | rank | n_impressions | id
|
|
---------------------------------------------------------------------
|
|
34 | 1 | 68 | 264
|
|
34 | 2 | 56 | 266
|
|
34 | 3 | 41 | 267
|
|
34 | 4 | 30 | 265
|
|
34 | 5 | 29 | 268
|
|
34 | 6 | 21 | 263
|
|
35 | 1 | 70 | 270
|
|
35 | 2 | 64 | 269
|
|
35 | 3 | 55 | 276
|
|
35 | 4 | 49 | 275
|
|
(10 rows)
|
|
|
|
DROP TABLE companies, campaigns, ads, clicks, impressions;
|
|
-- again with data loaded first
|
|
CREATE TABLE companies (
|
|
id bigserial PRIMARY KEY,
|
|
name text NOT NULL,
|
|
image_url text,
|
|
created_at timestamp without time zone NOT NULL,
|
|
updated_at timestamp without time zone NOT NULL
|
|
);
|
|
CREATE TABLE campaigns (
|
|
id bigserial,
|
|
company_id bigint REFERENCES companies (id),
|
|
name text NOT NULL,
|
|
cost_model text NOT NULL,
|
|
state text NOT NULL,
|
|
monthly_budget bigint,
|
|
blacklisted_site_urls text[],
|
|
created_at timestamp without time zone NOT NULL,
|
|
updated_at timestamp without time zone NOT NULL,
|
|
PRIMARY KEY (company_id, id)
|
|
);
|
|
CREATE TABLE ads (
|
|
id bigserial,
|
|
company_id bigint,
|
|
campaign_id bigint,
|
|
name text NOT NULL,
|
|
image_url text,
|
|
target_url text,
|
|
impressions_count bigint DEFAULT 0,
|
|
clicks_count bigint DEFAULT 0,
|
|
created_at timestamp without time zone NOT NULL,
|
|
updated_at timestamp without time zone NOT NULL,
|
|
PRIMARY KEY (company_id, id),
|
|
FOREIGN KEY (company_id, campaign_id)
|
|
REFERENCES campaigns (company_id, id)
|
|
);
|
|
CREATE TABLE clicks (
|
|
id bigserial,
|
|
company_id bigint,
|
|
ad_id bigint,
|
|
clicked_at timestamp without time zone NOT NULL,
|
|
site_url text NOT NULL,
|
|
cost_per_click_usd numeric(20,10),
|
|
user_ip inet NOT NULL,
|
|
user_data jsonb NOT NULL,
|
|
PRIMARY KEY (company_id, id),
|
|
FOREIGN KEY (company_id, ad_id)
|
|
REFERENCES ads (company_id, id)
|
|
);
|
|
CREATE TABLE impressions (
|
|
id bigserial,
|
|
company_id bigint,
|
|
ad_id bigint,
|
|
seen_at timestamp without time zone NOT NULL,
|
|
site_url text NOT NULL,
|
|
cost_per_impression_usd numeric(20,10),
|
|
user_ip inet NOT NULL,
|
|
user_data jsonb NOT NULL,
|
|
PRIMARY KEY (company_id, id),
|
|
FOREIGN KEY (company_id, ad_id)
|
|
REFERENCES ads (company_id, id)
|
|
);
|
|
COPY companies from :'companies_csv_file' with csv;
|
|
COPY campaigns from :'campaigns_csv_file' with csv;
|
|
COPY ads from :'ads_csv_file' with csv;
|
|
COPY clicks from :'clicks_csv_file' with csv;
|
|
COPY impressions from :'impressions_csv_file' with csv;
|
|
SELECT create_distributed_table('companies', 'id');
|
|
NOTICE: Copying data from local table...
|
|
NOTICE: copying the data has completed
|
|
DETAIL: The local data in the table is no longer visible, but is still on disk.
|
|
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.companies$$)
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT create_distributed_table('campaigns', 'company_id');
|
|
NOTICE: Copying data from local table...
|
|
NOTICE: copying the data has completed
|
|
DETAIL: The local data in the table is no longer visible, but is still on disk.
|
|
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.campaigns$$)
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT create_distributed_table('ads', 'company_id');
|
|
NOTICE: Copying data from local table...
|
|
NOTICE: copying the data has completed
|
|
DETAIL: The local data in the table is no longer visible, but is still on disk.
|
|
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.ads$$)
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT create_distributed_table('clicks', 'company_id');
|
|
NOTICE: Copying data from local table...
|
|
NOTICE: copying the data has completed
|
|
DETAIL: The local data in the table is no longer visible, but is still on disk.
|
|
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.clicks$$)
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT create_distributed_table('impressions', 'company_id');
|
|
NOTICE: Copying data from local table...
|
|
NOTICE: copying the data has completed
|
|
DETAIL: The local data in the table is no longer visible, but is still on disk.
|
|
HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.impressions$$)
|
|
create_distributed_table
|
|
---------------------------------------------------------------------
|
|
|
|
(1 row)
|
|
|
|
SELECT a.campaign_id,
|
|
RANK() OVER (
|
|
PARTITION BY a.campaign_id
|
|
ORDER BY a.campaign_id, count(*) desc
|
|
), count(*) as n_impressions, a.id
|
|
FROM ads as a
|
|
JOIN impressions as i
|
|
ON i.company_id = a.company_id
|
|
AND i.ad_id = a.id
|
|
WHERE a.company_id = 8
|
|
GROUP BY a.campaign_id, a.id
|
|
ORDER BY a.campaign_id, n_impressions desc, a.id
|
|
LIMIT 10;
|
|
campaign_id | rank | n_impressions | id
|
|
---------------------------------------------------------------------
|
|
59 | 1 | 70 | 477
|
|
59 | 2 | 69 | 479
|
|
59 | 3 | 63 | 475
|
|
59 | 4 | 52 | 474
|
|
59 | 4 | 52 | 480
|
|
59 | 6 | 32 | 478
|
|
59 | 7 | 29 | 476
|
|
60 | 1 | 70 | 484
|
|
60 | 2 | 68 | 488
|
|
60 | 3 | 67 | 481
|
|
(10 rows)
|
|
|
|
DROP TABLE companies, campaigns, ads, clicks, impressions;
|