diff --git a/src/backend/distributed/sql/citus--10.1-1--10.2-1.sql b/src/backend/distributed/sql/citus--10.1-1--10.2-1.sql index 546494f3a..825e3aaa1 100644 --- a/src/backend/distributed/sql/citus--10.1-1--10.2-1.sql +++ b/src/backend/distributed/sql/citus--10.1-1--10.2-1.sql @@ -19,3 +19,4 @@ ALTER TABLE pg_catalog.pg_dist_placement ADD CONSTRAINT placement_shardid_groupi #include "../../timeseries/sql/timeseries--10.1-1--10.2-1.sql" #include "../../timeseries/sql/udfs/create_timeseries_table/10.2-1.sql" #include "../../timeseries/sql/udfs/create_missing_partitions/10.2-1.sql" +#include "../../timeseries/sql/udfs/get_missing_partition_ranges/10.2-1.sql" diff --git a/src/backend/timeseries/create_timeseries_table.c b/src/backend/timeseries/create_timeseries_table.c index 70b10cd80..834a7781e 100644 --- a/src/backend/timeseries/create_timeseries_table.c +++ b/src/backend/timeseries/create_timeseries_table.c @@ -189,11 +189,12 @@ InitiateTimeseriesTablePartitions(Oid relationId) SPI_finish(); } + // TODO: If result is not true, error out! (Check metadata tables after erroring out!) + SPI_execute(initiateTimeseriesPartitionsCommand->data, readOnly, 0); SPI_finish(); } - /* * Add tuples for the given table to the citus_timeseries_tables using given params */ diff --git a/src/backend/timeseries/sql/udfs/create_missing_partitions/10.2-1.sql b/src/backend/timeseries/sql/udfs/create_missing_partitions/10.2-1.sql index 5bd30e26b..bf3e41aef 100644 --- a/src/backend/timeseries/sql/udfs/create_missing_partitions/10.2-1.sql +++ b/src/backend/timeseries/sql/udfs/create_missing_partitions/10.2-1.sql @@ -6,87 +6,32 @@ returns boolean LANGUAGE plpgsql AS $$ DECLARE - table_partition_interval INTERVAL; - current_range_from_value timestamptz; - current_range_to_value timestamptz; + range_values_record record; current_partition_name text; current_partition_count int; BEGIN - /* - * First check whether such timeseries table exists. If not, error out. - * - * Then check if start_from is given. If it is, create the partition for that time - * and let remaining of the function fill the gap from start_from to to_date. - * - * TODO: That part is implemented by assuming that there is no partition exist for - * the given table, in other words that function will be called via create_timeseries_table - * only. That will be handled while adding starting time for old data ingestion (Task 1.3) - */ - - SELECT partitioninterval - INTO table_partition_interval - FROM citus_timeseries.citus_timeseries_tables - WHERE logicalrelid = table_name; - - IF NOT found THEN - RAISE '% must be timeseries table', table_name; - END IF; - - IF start_from IS NOT NULL THEN - /* - * Decide on the current_range_from_value of the initial partition according to interval of the timeseries table. - * Since we will create all other partitions by adding intervals, truncating given start time will provide - * more intuitive interval ranges. - */ - IF table_partition_interval < INTERVAL '1 hour' THEN - current_range_from_value = date_trunc('minute', start_from); - ELSIF table_partition_interval < INTERVAL '1 day' THEN - current_range_from_value = date_trunc('hour', start_from); - ELSIF table_partition_interval < INTERVAL '1 week' THEN - current_range_from_value = date_trunc('day', start_from); - ELSIF table_partition_interval < INTERVAL '1 month' THEN - current_range_from_value = date_trunc('week', start_from); - ELSIF table_partition_interval < INTERVAL '1 year' THEN - current_range_from_value = date_trunc('month', start_from); - ELSE - current_range_from_value = date_trunc('year', start_from); - END IF; - - current_partition_count := 0; - current_range_to_value := current_range_from_value + table_partition_interval; - current_partition_name := table_name::text || '_' || current_partition_count::text; - - EXECUTE format('CREATE TABLE %I PARTITION OF %I FOR VALUES FROM (''%I'') TO (''%I'')', current_partition_name, table_name::text, current_range_from_value::text, current_range_to_value::text); - END IF; /* - * At this point, it is assumed that initial partition of the timeseries table - * exists. Remaining partitions till to_date will be created if any partition - * missing + * Get the partition ranges using the get_missing_partition_ranges and create + * partitions for those ranges. Since timeseries table related checks are + * handled by get_missing_partition_range we don't check them here again. + * + * TODO: Create that using attach partition, which can be implemented with another UDF to make sure that it gets right locks. + * Consider different types of tables while implemented that (Task 1.3 on sheet) */ - SELECT from_value::timestamptz, to_value::timestamptz - INTO current_range_from_value, current_range_to_value - FROM pg_catalog.time_partitions - WHERE parent_table = table_name - ORDER BY from_value::timestamptz; SELECT count(*) INTO current_partition_count FROM pg_catalog.time_partitions WHERE parent_table = table_name; - WHILE current_range_to_value < to_date LOOP - current_range_from_value := current_range_to_value; - current_range_to_value := current_range_to_value + table_partition_interval; + FOR range_values_record IN + SELECT * + FROM get_missing_partition_ranges(table_name, to_date, start_from) + LOOP current_partition_name := table_name::text || '_' || current_partition_count::text; - - -- TODO: Create that using attach partition, which can be implemented with another UDF to make sure that it gets light locks. - BEGIN - EXECUTE format('CREATE TABLE %I PARTITION OF %I FOR VALUES FROM (''%I'') TO (''%I'')', current_partition_name, table_name::text, current_range_from_value::text, current_range_to_value::text); - current_partition_count := current_partition_count + 1; - EXCEPTION WHEN OTHERS THEN - raise DEBUG3 'Partition has already been created for the range from % to %', current_range_from_value::text, current_range_to_value::text; - END; + EXECUTE format('CREATE TABLE %I PARTITION OF %I FOR VALUES FROM (''%I'') TO (''%I'')', current_partition_name, table_name::text, range_values_record.range_from_value, range_values_record.range_to_value); + current_partition_count := current_partition_count + 1; END LOOP; RETURN true; @@ -96,4 +41,4 @@ COMMENT ON FUNCTION pg_catalog.create_missing_partitions( table_name regclass, to_date timestamptz, start_from timestamptz) -IS 'create missing partitions for the given timeseries table'; \ No newline at end of file +IS 'create missing partitions for the given timeseries table and range'; diff --git a/src/backend/timeseries/sql/udfs/create_missing_partitions/latest.sql b/src/backend/timeseries/sql/udfs/create_missing_partitions/latest.sql index cb33bfa19..bf3e41aef 100644 --- a/src/backend/timeseries/sql/udfs/create_missing_partitions/latest.sql +++ b/src/backend/timeseries/sql/udfs/create_missing_partitions/latest.sql @@ -6,87 +6,31 @@ returns boolean LANGUAGE plpgsql AS $$ DECLARE - table_partition_interval INTERVAL; - current_range_from_value timestamptz; - current_range_to_value timestamptz; + range_values_record record; current_partition_name text; current_partition_count int; BEGIN - /* - * First check whether such timeseries table exists. If not, error out. - * - * Then check if start_from is given. If it is, create the partition for that time - * and let remaining of the function fill the gap from start_from to to_date. - * - * TODO: That part is implemented by assuming that there is no partition exist for - * the given table, in other words that function will be called via create_timeseries_table - * only. That will be handled while adding starting time for old data ingestion (Task 1.3) - */ - - SELECT partitioninterval - INTO table_partition_interval - FROM citus_timeseries.citus_timeseries_tables - WHERE logicalrelid = table_name; - - IF NOT found THEN - RAISE '% must be timeseries table', table_name; - END IF; - - IF start_from IS NOT NULL THEN - /* - * Decide on the current_range_from_value of the initial partition according to interval of the timeseries table. - * Since we will create all other partitions by adding intervals, truncating given start time will provide - * more intuitive interval ranges. - */ - IF table_partition_interval < INTERVAL '1 hour' THEN - current_range_from_value = date_trunc('minute', start_from); - ELSIF table_partition_interval < INTERVAL '1 day' THEN - current_range_from_value = date_trunc('hour', start_from); - ELSIF table_partition_interval < INTERVAL '1 week' THEN - current_range_from_value = date_trunc('day', start_from); - ELSIF table_partition_interval < INTERVAL '1 month' THEN - current_range_from_value = date_trunc('week', start_from); - ELSIF table_partition_interval < INTERVAL '1 year' THEN - current_range_from_value = date_trunc('month', start_from); - ELSE - current_range_from_value = date_trunc('year', start_from); - END IF; - - current_partition_count := 0; - current_range_to_value := current_range_from_value + table_partition_interval; - current_partition_name := table_name::text || '_' || current_partition_count::text; - - EXECUTE format('CREATE TABLE %I PARTITION OF %I FOR VALUES FROM (''%I'') TO (''%I'')', current_partition_name, table_name::text, current_range_from_value::text, current_range_to_value::text); - END IF; /* - * At this point, it is assumed that initial partition of the timeseries table - * exists. Remaining partitions till to_date will be created if any partition - * missing + * Get the partition ranges using the get_missing_partition_ranges and create + * partitions for those ranges. Since timeseries table related checks are + * handled by get_missing_partition_range we don't check them here again. + * + * TODO: Create that using attach partition, which can be implemented with another UDF to make sure that it gets right locks. + * Consider different types of tables while implemented that (Task 1.3 on sheet) */ - SELECT from_value::timestamptz, to_value::timestamptz - INTO current_range_from_value, current_range_to_value - FROM pg_catalog.time_partitions - WHERE parent_table = table_name - ORDER BY from_value::timestamptz; SELECT count(*) INTO current_partition_count FROM pg_catalog.time_partitions WHERE parent_table = table_name; - WHILE current_range_to_value < to_date LOOP - current_range_from_value := current_range_to_value; - current_range_to_value := current_range_to_value + table_partition_interval; + FOR range_values_record IN + SELECT * + FROM get_missing_partition_ranges(table_name, to_date, start_from) + LOOP current_partition_name := table_name::text || '_' || current_partition_count::text; - - -- TODO: Create that using attach partition, which can be implemented with another UDF to make sure that it gets light locks. - BEGIN - EXECUTE format('CREATE TABLE %I PARTITION OF %I FOR VALUES FROM (''%I'') TO (''%I'')', current_partition_name, table_name::text, current_range_from_value::text, current_range_to_value::text); - EXCEPTION WHEN OTHERS THEN - raise DEBUG3 'Partition has already been created for the range from % to %', current_range_from_value::text, current_range_to_value::text; - END; - + EXECUTE format('CREATE TABLE %I PARTITION OF %I FOR VALUES FROM (''%I'') TO (''%I'')', current_partition_name, table_name::text, range_values_record.range_from_value, range_values_record.range_to_value); current_partition_count := current_partition_count + 1; END LOOP; @@ -97,4 +41,4 @@ COMMENT ON FUNCTION pg_catalog.create_missing_partitions( table_name regclass, to_date timestamptz, start_from timestamptz) -IS 'create missing partitions for the given timeseries table'; \ No newline at end of file +IS 'create missing partitions for the given timeseries table and range'; diff --git a/src/backend/timeseries/sql/udfs/get_missing_partition_ranges/10.2-1.sql b/src/backend/timeseries/sql/udfs/get_missing_partition_ranges/10.2-1.sql new file mode 100644 index 000000000..cc0654516 --- /dev/null +++ b/src/backend/timeseries/sql/udfs/get_missing_partition_ranges/10.2-1.sql @@ -0,0 +1,136 @@ +CREATE OR REPLACE FUNCTION pg_catalog.get_missing_partition_ranges( + table_name regclass, + to_date timestamptz, + start_from timestamptz DEFAULT NULL) +returns table( + range_from_value text, + range_to_value text +) +LANGUAGE plpgsql +AS $$ +DECLARE + table_partition_interval INTERVAL; + table_partition_column_type_name text; + current_range_from_value timestamptz := NULL; + current_range_to_value timestamptz := NULL; +BEGIN + /* + * First check whether such timeseries table exists. If not, error out. + * + * Then check if start_from is given. If it is, create the partition for that time + * and let remaining of the function fill the gap from start_from to to_date. + * + * TODO: That part is implemented by assuming that there is no partition exist for + * the given table, in other words that function will be called via create_timeseries_table + * only when start_from is given. That will be handled while adding starting time for old + * data ingestion. That part will be implemented with Task 1.2. + * + * TODO: Add tests to cover start_from for the PR to handle Task 1.2. + */ + + SELECT partitioninterval + INTO table_partition_interval + FROM citus_timeseries.citus_timeseries_tables + WHERE logicalrelid = table_name; + + IF NOT found THEN + RAISE '% must be timeseries table', table_name; + END IF; + + -- Get datatype here to generate range values in the right data format + -- Since we already check that timeseries tables have single column to partition the table + -- we can directly get the 0th element of the partattrs column + SELECT atttypid::regtype::text INTO table_partition_column_type_name + FROM pg_attribute + WHERE attrelid = table_name::oid + AND attnum = (select partattrs[0] from pg_partitioned_table where partrelid = table_name::oid); + + IF start_from IS NOT NULL THEN + /* + * Decide on the current_range_from_value of the initial partition according to interval of the timeseries table. + * Since we will create all other partitions by adding intervals, truncating given start time will provide + * more intuitive interval ranges, instead of starting from start_from directly. + */ + IF table_partition_interval < INTERVAL '1 hour' THEN + current_range_from_value = date_trunc('minute', start_from); + ELSIF table_partition_interval < INTERVAL '1 day' THEN + current_range_from_value = date_trunc('hour', start_from); + ELSIF table_partition_interval < INTERVAL '1 week' THEN + current_range_from_value = date_trunc('day', start_from); + ELSIF table_partition_interval < INTERVAL '1 month' THEN + current_range_from_value = date_trunc('week', start_from); + ELSIF table_partition_interval < INTERVAL '1 year' THEN + current_range_from_value = date_trunc('month', start_from); + ELSE + current_range_from_value = date_trunc('year', start_from); + END IF; + + current_range_to_value := current_range_from_value + table_partition_interval; + + -- TODO: Check for dynamic way to do it or create a function for it to use for the call at the end + IF table_partition_column_type_name = 'date' THEN + RETURN QUERY SELECT current_range_from_value::date::text, current_range_to_value::date::text; + ELSIF table_partition_column_type_name = 'timestamp without time zone' THEN + RETURN QUERY SELECT current_range_from_value::timestamp::text, current_range_to_value::timestamp::text; + ELSIF table_partition_column_type_name = 'timestamp with time zone' THEN + RETURN QUERY SELECT current_range_from_value::timestamptz::text, current_range_to_value::timestamptz::text; + ELSE + RAISE 'type of the partition column of the table % must be date, timestamp or timestamptz', table_name; + END IF; + END IF; + + /* + * To be able to fill any gaps after the initial partition of the timeseries table, + * we are starting from the first partition instead of the last. + * + * Also note that we must have either start_from or an initial partition for the timeseries + * table, as we call that function while creating timeseries table first. + */ + IF current_range_from_value IS NULL AND current_range_to_value IS NULL THEN + SELECT from_value::timestamptz, to_value::timestamptz + INTO current_range_from_value, current_range_to_value + FROM pg_catalog.time_partitions + WHERE parent_table = table_name + ORDER BY from_value::timestamptz; + END IF; + + WHILE current_range_to_value < to_date LOOP + current_range_from_value := current_range_to_value; + current_range_to_value := current_range_to_value + table_partition_interval; + + -- Check whether partition with given range has already been created + -- Since partition interval can be given as, we are converting all variables to timestamptz to make sure + -- that we are comparing same type of parameters + PERFORM * FROM pg_catalog.time_partitions WHERE from_value::timestamptz = current_range_from_value::timestamptz AND to_value::timestamptz = current_range_to_value::timestamptz; + IF found THEN + CONTINUE; + END IF; + + -- Check whether any other partition covers from_value or to_value + -- That means some partitions have been created manually and we must error out. + PERFORM * FROM pg_catalog.time_partitions + WHERE (current_range_from_value::timestamptz >= from_value::timestamptz AND current_range_from_value < to_value::timestamptz) OR + (current_range_to_value::timestamptz >= from_value::timestamptz AND current_range_to_value::timestamptz < to_value::timestamptz); + IF found THEN + RAISE 'For the table % manual partition(s) has been created, Please remove them to continue using that table as timeseries table', table_name; + END IF; + + IF table_partition_column_type_name = 'date' THEN + RETURN QUERY SELECT current_range_from_value::date::text, current_range_to_value::date::text; + ELSIF table_partition_column_type_name = 'timestamp without time zone' THEN + RETURN QUERY SELECT current_range_from_value::timestamp::text, current_range_to_value::timestamp::text; + ELSIF table_partition_column_type_name = 'timestamp with time zone' THEN + RETURN QUERY SELECT current_range_from_value::timestamptz::text, current_range_to_value::timestamptz::text; + ELSE + RAISE 'type of the partition column of the table % must be date, timestamp or timestamptz', table_name; + END IF; + END LOOP; + + RETURN; +END; +$$; +COMMENT ON FUNCTION pg_catalog.create_missing_partitions( + table_name regclass, + to_date timestamptz, + start_from timestamptz) +IS 'create missing partitions for the given timeseries table'; diff --git a/src/backend/timeseries/sql/udfs/get_missing_partition_ranges/latest.sql b/src/backend/timeseries/sql/udfs/get_missing_partition_ranges/latest.sql new file mode 100644 index 000000000..cc0654516 --- /dev/null +++ b/src/backend/timeseries/sql/udfs/get_missing_partition_ranges/latest.sql @@ -0,0 +1,136 @@ +CREATE OR REPLACE FUNCTION pg_catalog.get_missing_partition_ranges( + table_name regclass, + to_date timestamptz, + start_from timestamptz DEFAULT NULL) +returns table( + range_from_value text, + range_to_value text +) +LANGUAGE plpgsql +AS $$ +DECLARE + table_partition_interval INTERVAL; + table_partition_column_type_name text; + current_range_from_value timestamptz := NULL; + current_range_to_value timestamptz := NULL; +BEGIN + /* + * First check whether such timeseries table exists. If not, error out. + * + * Then check if start_from is given. If it is, create the partition for that time + * and let remaining of the function fill the gap from start_from to to_date. + * + * TODO: That part is implemented by assuming that there is no partition exist for + * the given table, in other words that function will be called via create_timeseries_table + * only when start_from is given. That will be handled while adding starting time for old + * data ingestion. That part will be implemented with Task 1.2. + * + * TODO: Add tests to cover start_from for the PR to handle Task 1.2. + */ + + SELECT partitioninterval + INTO table_partition_interval + FROM citus_timeseries.citus_timeseries_tables + WHERE logicalrelid = table_name; + + IF NOT found THEN + RAISE '% must be timeseries table', table_name; + END IF; + + -- Get datatype here to generate range values in the right data format + -- Since we already check that timeseries tables have single column to partition the table + -- we can directly get the 0th element of the partattrs column + SELECT atttypid::regtype::text INTO table_partition_column_type_name + FROM pg_attribute + WHERE attrelid = table_name::oid + AND attnum = (select partattrs[0] from pg_partitioned_table where partrelid = table_name::oid); + + IF start_from IS NOT NULL THEN + /* + * Decide on the current_range_from_value of the initial partition according to interval of the timeseries table. + * Since we will create all other partitions by adding intervals, truncating given start time will provide + * more intuitive interval ranges, instead of starting from start_from directly. + */ + IF table_partition_interval < INTERVAL '1 hour' THEN + current_range_from_value = date_trunc('minute', start_from); + ELSIF table_partition_interval < INTERVAL '1 day' THEN + current_range_from_value = date_trunc('hour', start_from); + ELSIF table_partition_interval < INTERVAL '1 week' THEN + current_range_from_value = date_trunc('day', start_from); + ELSIF table_partition_interval < INTERVAL '1 month' THEN + current_range_from_value = date_trunc('week', start_from); + ELSIF table_partition_interval < INTERVAL '1 year' THEN + current_range_from_value = date_trunc('month', start_from); + ELSE + current_range_from_value = date_trunc('year', start_from); + END IF; + + current_range_to_value := current_range_from_value + table_partition_interval; + + -- TODO: Check for dynamic way to do it or create a function for it to use for the call at the end + IF table_partition_column_type_name = 'date' THEN + RETURN QUERY SELECT current_range_from_value::date::text, current_range_to_value::date::text; + ELSIF table_partition_column_type_name = 'timestamp without time zone' THEN + RETURN QUERY SELECT current_range_from_value::timestamp::text, current_range_to_value::timestamp::text; + ELSIF table_partition_column_type_name = 'timestamp with time zone' THEN + RETURN QUERY SELECT current_range_from_value::timestamptz::text, current_range_to_value::timestamptz::text; + ELSE + RAISE 'type of the partition column of the table % must be date, timestamp or timestamptz', table_name; + END IF; + END IF; + + /* + * To be able to fill any gaps after the initial partition of the timeseries table, + * we are starting from the first partition instead of the last. + * + * Also note that we must have either start_from or an initial partition for the timeseries + * table, as we call that function while creating timeseries table first. + */ + IF current_range_from_value IS NULL AND current_range_to_value IS NULL THEN + SELECT from_value::timestamptz, to_value::timestamptz + INTO current_range_from_value, current_range_to_value + FROM pg_catalog.time_partitions + WHERE parent_table = table_name + ORDER BY from_value::timestamptz; + END IF; + + WHILE current_range_to_value < to_date LOOP + current_range_from_value := current_range_to_value; + current_range_to_value := current_range_to_value + table_partition_interval; + + -- Check whether partition with given range has already been created + -- Since partition interval can be given as, we are converting all variables to timestamptz to make sure + -- that we are comparing same type of parameters + PERFORM * FROM pg_catalog.time_partitions WHERE from_value::timestamptz = current_range_from_value::timestamptz AND to_value::timestamptz = current_range_to_value::timestamptz; + IF found THEN + CONTINUE; + END IF; + + -- Check whether any other partition covers from_value or to_value + -- That means some partitions have been created manually and we must error out. + PERFORM * FROM pg_catalog.time_partitions + WHERE (current_range_from_value::timestamptz >= from_value::timestamptz AND current_range_from_value < to_value::timestamptz) OR + (current_range_to_value::timestamptz >= from_value::timestamptz AND current_range_to_value::timestamptz < to_value::timestamptz); + IF found THEN + RAISE 'For the table % manual partition(s) has been created, Please remove them to continue using that table as timeseries table', table_name; + END IF; + + IF table_partition_column_type_name = 'date' THEN + RETURN QUERY SELECT current_range_from_value::date::text, current_range_to_value::date::text; + ELSIF table_partition_column_type_name = 'timestamp without time zone' THEN + RETURN QUERY SELECT current_range_from_value::timestamp::text, current_range_to_value::timestamp::text; + ELSIF table_partition_column_type_name = 'timestamp with time zone' THEN + RETURN QUERY SELECT current_range_from_value::timestamptz::text, current_range_to_value::timestamptz::text; + ELSE + RAISE 'type of the partition column of the table % must be date, timestamp or timestamptz', table_name; + END IF; + END LOOP; + + RETURN; +END; +$$; +COMMENT ON FUNCTION pg_catalog.create_missing_partitions( + table_name regclass, + to_date timestamptz, + start_from timestamptz) +IS 'create missing partitions for the given timeseries table';