Adjust the migration.sql script to use range partition instead of list

This allows to partition on the primary key and have all the partitions
be of the same size, instead of having them be by year with different
sizes.

Signed-off-by: Pierre-Yves Chibon <pingou@pingoured.fr>
This commit is contained in:
Pierre-Yves Chibon 2021-02-01 14:54:51 +01:00
parent 56a37c5fb7
commit fd25669785

View file

@ -1,12 +1,5 @@
\timing on
-- Update the current messages table so it stores the year that we'll partition
-- on.
ALTER TABLE messages ADD COLUMN year integer;
update messages set year = extract(year from timestamp);
-- Schema extracted using:
-- sudo -u postgres pg_dump -t 'public.messages' --schema-only datanommer
-- then modified to handle partitioning.
@ -18,7 +11,6 @@ CREATE TABLE messages2 (
id integer NOT NULL,
i integer NOT NULL,
"timestamp" timestamp without time zone NOT NULL,
year integer,
certificate text,
signature text,
topic text,
@ -30,9 +22,11 @@ CREATE TABLE messages2 (
_headers text,
username text,
crypto text,
CONSTRAINT messages2_pkey PRIMARY KEY (id, year),
CONSTRAINT messages2_msg_id_key UNIQUE (msg_id, year)
) PARTITION BY LIST(year);
CONSTRAINT messages2_pkey PRIMARY KEY (id),
-- It is weird that we have to use the 'id' in this constraint but it doesn't seem to
-- work w/o it.
CONSTRAINT messages2_msg_id_key UNIQUE (msg_id, id)
) PARTITION BY RANGE(id);
CREATE SEQUENCE public.messages2_id_seq
START WITH 1
@ -44,30 +38,48 @@ CREATE SEQUENCE public.messages2_id_seq
ALTER TABLE public.messages2_id_seq OWNER TO datanommer;
ALTER SEQUENCE public.messages2_id_seq OWNED BY public.messages2.id;
ALTER TABLE ONLY public.messages2 ALTER COLUMN id SET DEFAULT nextval('public.messages2_id_seq'::regclass);
GRANT SELECT ON TABLE public.messages2 TO datagrepper;
GRANT SELECT ON SEQUENCE public.messages2_id_seq TO datagrepper;
-- Create the partitions
CREATE TABLE messages_010g PARTITION OF messages2 FOR VALUES FROM (MINVALUE) TO (10000000);
CREATE TABLE messages_020g PARTITION OF messages2 FOR VALUES FROM (10000000) TO (20000000);
CREATE TABLE messages_030g PARTITION OF messages2 FOR VALUES FROM (20000000) TO (30000000);
CREATE TABLE messages_040g PARTITION OF messages2 FOR VALUES FROM (30000000) TO (40000000);
CREATE TABLE messages_050g PARTITION OF messages2 FOR VALUES FROM (40000000) TO (50000000);
CREATE TABLE messages_060g PARTITION OF messages2 FOR VALUES FROM (50000000) TO (60000000);
CREATE TABLE messages_070g PARTITION OF messages2 FOR VALUES FROM (60000000) TO (70000000);
CREATE TABLE messages_080g PARTITION OF messages2 FOR VALUES FROM (70000000) TO (80000000);
CREATE TABLE messages_090g PARTITION OF messages2 FOR VALUES FROM (80000000) TO (90000000);
CREATE TABLE messages_100g PARTITION OF messages2 FOR VALUES FROM (90000000) TO (100000000);
CREATE TABLE messages_110g PARTITION OF messages2 FOR VALUES FROM (100000000) TO (110000000);
CREATE TABLE messages_120g PARTITION OF messages2 FOR VALUES FROM (110000000) TO (120000000);
CREATE TABLE messages_130g PARTITION OF messages2 FOR VALUES FROM (120000000) TO (130000000);
CREATE TABLE messages_140g PARTITION OF messages2 FOR VALUES FROM (130000000) TO (140000000);
CREATE TABLE messages_150g PARTITION OF messages2 FOR VALUES FROM (140000000) TO (150000000);
CREATE TABLE messages_160g PARTITION OF messages2 FOR VALUES FROM (150000000) TO (160000000);
CREATE TABLE messages_170g PARTITION OF messages2 FOR VALUES FROM (160000000) TO (170000000);
CREATE TABLE messages_180g PARTITION OF messages2 FOR VALUES FROM (170000000) TO (180000000);
CREATE TABLE messages_190g PARTITION OF messages2 FOR VALUES FROM (180000000) TO (190000000);
CREATE TABLE messages_200g PARTITION OF messages2 FOR VALUES FROM (190000000) TO (200000000);
CREATE TABLE messages_210g PARTITION OF messages2 FOR VALUES FROM (200000000) TO (210000000);
CREATE TABLE messages_220g PARTITION OF messages2 FOR VALUES FROM (210000000) TO (220000000);
-- Insert the data
INSERT INTO messages2 (i,"timestamp",certificate,signature,topic,_msg,category,source_name,source_version,msg_id,_headers,username,crypto)
SELECT i,"timestamp",certificate,signature,topic,_msg,category,source_name,source_version,msg_id,_headers,username,crypto FROM messages ORDER BY messages.id;
-- Create the indexes ones the data is in
CREATE INDEX index_msg2_category ON public.messages2 USING btree (category);
CREATE INDEX index_msg2_timestamp ON public.messages2 USING btree ("timestamp");
CREATE INDEX index_msg2_topic ON public.messages2 USING btree (topic);
CREATE INDEX messages2_datanommer_timestamp_category_idx ON public.messages2 USING btree ("timestamp" DESC, category);
CREATE INDEX messages2_datanommer_timestamp_topic_idx ON public.messages2 USING btree ("timestamp" DESC, topic);
GRANT SELECT ON TABLE public.messages2 TO datagrepper;
GRANT SELECT ON SEQUENCE public.messages2_id_seq TO datagrepper;
-- Create the partitions
CREATE TABLE messages_2012 PARTITION OF messages2 FOR VALUES IN (2012);
CREATE TABLE messages_2013 PARTITION OF messages2 FOR VALUES IN (2013);
CREATE TABLE messages_2014 PARTITION OF messages2 FOR VALUES IN (2014);
CREATE TABLE messages_2015 PARTITION OF messages2 FOR VALUES IN (2015);
CREATE TABLE messages_2016 PARTITION OF messages2 FOR VALUES IN (2016);
CREATE TABLE messages_2017 PARTITION OF messages2 FOR VALUES IN (2017);
CREATE TABLE messages_2018 PARTITION OF messages2 FOR VALUES IN (2018);
CREATE TABLE messages_2019 PARTITION OF messages2 FOR VALUES IN (2019);
CREATE TABLE messages_2020 PARTITION OF messages2 FOR VALUES IN (2020);
--- It seems smart to place each row in the correct table when it's added, and
--- use DEFAULT as the fallback table for items that have unexpected years:
CREATE TABLE messages_2021 PARTITION OF messages2 FOR VALUES in (2021);
CREATE TABLE messages_other PARTITION OF messages2 DEFAULT;
--- The other approach would be to have the DEFAULT be the current year:
---CREATE TABLE messages_2021 PARTITION OF messages2 DEFAULT;
--- But you'd have to rename that table and/or move those rows at the end
--- of the year to make room for the new table, and that seems fragile.