-- Runs the steps in https://docs.microsoft.com/en-us/azure/postgresql/tutorial-design-database-hyperscale-multi-tenant SET citus.shard_replication_factor TO 1; CREATE TABLE companies ( id bigserial PRIMARY KEY, name text NOT NULL, image_url text, created_at timestamp without time zone NOT NULL, updated_at timestamp without time zone NOT NULL ); CREATE TABLE campaigns ( id bigserial, company_id bigint REFERENCES companies (id), name text NOT NULL, cost_model text NOT NULL, state text NOT NULL, monthly_budget bigint, blacklisted_site_urls text[], created_at timestamp without time zone NOT NULL, updated_at timestamp without time zone NOT NULL, PRIMARY KEY (company_id, id) ); CREATE TABLE ads ( id bigserial, company_id bigint, campaign_id bigint, name text NOT NULL, image_url text, target_url text, impressions_count bigint DEFAULT 0, clicks_count bigint DEFAULT 0, created_at timestamp without time zone NOT NULL, updated_at timestamp without time zone NOT NULL, PRIMARY KEY (company_id, id), FOREIGN KEY (company_id, campaign_id) REFERENCES campaigns (company_id, id) ); CREATE TABLE clicks ( id bigserial, company_id bigint, ad_id bigint, clicked_at timestamp without time zone NOT NULL, site_url text NOT NULL, cost_per_click_usd numeric(20,10), user_ip inet NOT NULL, user_data jsonb NOT NULL, PRIMARY KEY (company_id, id), FOREIGN KEY (company_id, ad_id) REFERENCES ads (company_id, id) ); CREATE TABLE impressions ( id bigserial, company_id bigint, ad_id bigint, seen_at timestamp without time zone NOT NULL, site_url text NOT NULL, cost_per_impression_usd numeric(20,10), user_ip inet NOT NULL, user_data jsonb NOT NULL, PRIMARY KEY (company_id, id), FOREIGN KEY (company_id, ad_id) REFERENCES ads (company_id, id) ); begin; SELECT create_distributed_table('companies', 'id'); SELECT create_distributed_table('campaigns', 'company_id'); rollback; SELECT create_distributed_table('companies', 'id'); SELECT create_distributed_table('campaigns', 'company_id'); SELECT create_distributed_table('ads', 'company_id'); SELECT create_distributed_table('clicks', 'company_id'); SELECT create_distributed_table('impressions', 'company_id'); \copy companies from '@abs_srcdir@/data/companies.csv' with csv \copy campaigns from '@abs_srcdir@/data/campaigns.csv' with csv \copy ads from '@abs_srcdir@/data/ads.csv' with csv \copy clicks from '@abs_srcdir@/data/clicks.csv' with csv \copy impressions from '@abs_srcdir@/data/impressions.csv' with csv SELECT a.campaign_id, RANK() OVER ( PARTITION BY a.campaign_id ORDER BY a.campaign_id, count(*) desc ), count(*) as n_impressions, a.id FROM ads as a JOIN impressions as i ON i.company_id = a.company_id AND i.ad_id = a.id WHERE a.company_id = 5 GROUP BY a.campaign_id, a.id ORDER BY a.campaign_id, n_impressions desc LIMIT 10; DROP TABLE companies, campaigns, ads, clicks, impressions; -- again with data loaded first CREATE TABLE companies ( id bigserial PRIMARY KEY, name text NOT NULL, image_url text, created_at timestamp without time zone NOT NULL, updated_at timestamp without time zone NOT NULL ); CREATE TABLE campaigns ( id bigserial, company_id bigint REFERENCES companies (id), name text NOT NULL, cost_model text NOT NULL, state text NOT NULL, monthly_budget bigint, blacklisted_site_urls text[], created_at timestamp without time zone NOT NULL, updated_at timestamp without time zone NOT NULL, PRIMARY KEY (company_id, id) ); CREATE TABLE ads ( id bigserial, company_id bigint, campaign_id bigint, name text NOT NULL, image_url text, target_url text, impressions_count bigint DEFAULT 0, clicks_count bigint DEFAULT 0, created_at timestamp without time zone NOT NULL, updated_at timestamp without time zone NOT NULL, PRIMARY KEY (company_id, id), FOREIGN KEY (company_id, campaign_id) REFERENCES campaigns (company_id, id) ); CREATE TABLE clicks ( id bigserial, company_id bigint, ad_id bigint, clicked_at timestamp without time zone NOT NULL, site_url text NOT NULL, cost_per_click_usd numeric(20,10), user_ip inet NOT NULL, user_data jsonb NOT NULL, PRIMARY KEY (company_id, id), FOREIGN KEY (company_id, ad_id) REFERENCES ads (company_id, id) ); CREATE TABLE impressions ( id bigserial, company_id bigint, ad_id bigint, seen_at timestamp without time zone NOT NULL, site_url text NOT NULL, cost_per_impression_usd numeric(20,10), user_ip inet NOT NULL, user_data jsonb NOT NULL, PRIMARY KEY (company_id, id), FOREIGN KEY (company_id, ad_id) REFERENCES ads (company_id, id) ); \copy companies from '@abs_srcdir@/data/companies.csv' with csv \copy campaigns from '@abs_srcdir@/data/campaigns.csv' with csv \copy ads from '@abs_srcdir@/data/ads.csv' with csv \copy clicks from '@abs_srcdir@/data/clicks.csv' with csv \copy impressions from '@abs_srcdir@/data/impressions.csv' with csv SELECT create_distributed_table('companies', 'id'); SELECT create_distributed_table('campaigns', 'company_id'); SELECT create_distributed_table('ads', 'company_id'); SELECT create_distributed_table('clicks', 'company_id'); SELECT create_distributed_table('impressions', 'company_id'); SELECT a.campaign_id, RANK() OVER ( PARTITION BY a.campaign_id ORDER BY a.campaign_id, count(*) desc ), count(*) as n_impressions, a.id FROM ads as a JOIN impressions as i ON i.company_id = a.company_id AND i.ad_id = a.id WHERE a.company_id = 8 GROUP BY a.campaign_id, a.id ORDER BY a.campaign_id, n_impressions desc LIMIT 10; DROP TABLE companies, campaigns, ads, clicks, impressions;