diff --git a/scripts/migration.sql b/scripts/migration.sql index 0882af0..81a205e 100644 --- a/scripts/migration.sql +++ b/scripts/migration.sql @@ -1,12 +1,5 @@ \timing on --- Update the current messages table so it stores the year that we'll partition --- on. - -ALTER TABLE messages ADD COLUMN year integer; -update messages set year = extract(year from timestamp); - - -- Schema extracted using: -- sudo -u postgres pg_dump -t 'public.messages' --schema-only datanommer -- then modified to handle partitioning. @@ -18,7 +11,6 @@ CREATE TABLE messages2 ( id integer NOT NULL, i integer NOT NULL, "timestamp" timestamp without time zone NOT NULL, - year integer, certificate text, signature text, topic text, @@ -30,9 +22,11 @@ CREATE TABLE messages2 ( _headers text, username text, crypto text, - CONSTRAINT messages2_pkey PRIMARY KEY (id, year), - CONSTRAINT messages2_msg_id_key UNIQUE (msg_id, year) -) PARTITION BY LIST(year); + CONSTRAINT messages2_pkey PRIMARY KEY (id), + -- It is weird that we have to use the 'id' in this constraint but it doesn't seem to + -- work w/o it. + CONSTRAINT messages2_msg_id_key UNIQUE (msg_id, id) +) PARTITION BY RANGE(id); CREATE SEQUENCE public.messages2_id_seq START WITH 1 @@ -44,30 +38,48 @@ CREATE SEQUENCE public.messages2_id_seq ALTER TABLE public.messages2_id_seq OWNER TO datanommer; ALTER SEQUENCE public.messages2_id_seq OWNED BY public.messages2.id; ALTER TABLE ONLY public.messages2 ALTER COLUMN id SET DEFAULT nextval('public.messages2_id_seq'::regclass); +GRANT SELECT ON TABLE public.messages2 TO datagrepper; +GRANT SELECT ON SEQUENCE public.messages2_id_seq TO datagrepper; + + +-- Create the partitions + +CREATE TABLE messages_010g PARTITION OF messages2 FOR VALUES FROM (MINVALUE) TO (10000000); +CREATE TABLE messages_020g PARTITION OF messages2 FOR VALUES FROM (10000000) TO (20000000); +CREATE TABLE messages_030g PARTITION OF messages2 FOR VALUES FROM (20000000) TO (30000000); +CREATE TABLE messages_040g PARTITION OF messages2 FOR VALUES FROM (30000000) TO (40000000); +CREATE TABLE messages_050g PARTITION OF messages2 FOR VALUES FROM (40000000) TO (50000000); +CREATE TABLE messages_060g PARTITION OF messages2 FOR VALUES FROM (50000000) TO (60000000); +CREATE TABLE messages_070g PARTITION OF messages2 FOR VALUES FROM (60000000) TO (70000000); +CREATE TABLE messages_080g PARTITION OF messages2 FOR VALUES FROM (70000000) TO (80000000); +CREATE TABLE messages_090g PARTITION OF messages2 FOR VALUES FROM (80000000) TO (90000000); +CREATE TABLE messages_100g PARTITION OF messages2 FOR VALUES FROM (90000000) TO (100000000); +CREATE TABLE messages_110g PARTITION OF messages2 FOR VALUES FROM (100000000) TO (110000000); +CREATE TABLE messages_120g PARTITION OF messages2 FOR VALUES FROM (110000000) TO (120000000); +CREATE TABLE messages_130g PARTITION OF messages2 FOR VALUES FROM (120000000) TO (130000000); +CREATE TABLE messages_140g PARTITION OF messages2 FOR VALUES FROM (130000000) TO (140000000); +CREATE TABLE messages_150g PARTITION OF messages2 FOR VALUES FROM (140000000) TO (150000000); +CREATE TABLE messages_160g PARTITION OF messages2 FOR VALUES FROM (150000000) TO (160000000); +CREATE TABLE messages_170g PARTITION OF messages2 FOR VALUES FROM (160000000) TO (170000000); +CREATE TABLE messages_180g PARTITION OF messages2 FOR VALUES FROM (170000000) TO (180000000); +CREATE TABLE messages_190g PARTITION OF messages2 FOR VALUES FROM (180000000) TO (190000000); +CREATE TABLE messages_200g PARTITION OF messages2 FOR VALUES FROM (190000000) TO (200000000); +CREATE TABLE messages_210g PARTITION OF messages2 FOR VALUES FROM (200000000) TO (210000000); +CREATE TABLE messages_220g PARTITION OF messages2 FOR VALUES FROM (210000000) TO (220000000); + +-- Insert the data + +INSERT INTO messages2 (i,"timestamp",certificate,signature,topic,_msg,category,source_name,source_version,msg_id,_headers,username,crypto) +SELECT i,"timestamp",certificate,signature,topic,_msg,category,source_name,source_version,msg_id,_headers,username,crypto FROM messages ORDER BY messages.id; + + + +-- Create the indexes ones the data is in + CREATE INDEX index_msg2_category ON public.messages2 USING btree (category); CREATE INDEX index_msg2_timestamp ON public.messages2 USING btree ("timestamp"); CREATE INDEX index_msg2_topic ON public.messages2 USING btree (topic); CREATE INDEX messages2_datanommer_timestamp_category_idx ON public.messages2 USING btree ("timestamp" DESC, category); CREATE INDEX messages2_datanommer_timestamp_topic_idx ON public.messages2 USING btree ("timestamp" DESC, topic); -GRANT SELECT ON TABLE public.messages2 TO datagrepper; -GRANT SELECT ON SEQUENCE public.messages2_id_seq TO datagrepper; --- Create the partitions -CREATE TABLE messages_2012 PARTITION OF messages2 FOR VALUES IN (2012); -CREATE TABLE messages_2013 PARTITION OF messages2 FOR VALUES IN (2013); -CREATE TABLE messages_2014 PARTITION OF messages2 FOR VALUES IN (2014); -CREATE TABLE messages_2015 PARTITION OF messages2 FOR VALUES IN (2015); -CREATE TABLE messages_2016 PARTITION OF messages2 FOR VALUES IN (2016); -CREATE TABLE messages_2017 PARTITION OF messages2 FOR VALUES IN (2017); -CREATE TABLE messages_2018 PARTITION OF messages2 FOR VALUES IN (2018); -CREATE TABLE messages_2019 PARTITION OF messages2 FOR VALUES IN (2019); -CREATE TABLE messages_2020 PARTITION OF messages2 FOR VALUES IN (2020); ---- It seems smart to place each row in the correct table when it's added, and ---- use DEFAULT as the fallback table for items that have unexpected years: -CREATE TABLE messages_2021 PARTITION OF messages2 FOR VALUES in (2021); -CREATE TABLE messages_other PARTITION OF messages2 DEFAULT; ---- The other approach would be to have the DEFAULT be the current year: ----CREATE TABLE messages_2021 PARTITION OF messages2 DEFAULT; ---- But you'd have to rename that table and/or move those rows at the end ---- of the year to make room for the new table, and that seems fragile.