diff --git a/scripts/migration.sql b/scripts/migration.sql index 9b89cf3..0882af0 100644 --- a/scripts/migration.sql +++ b/scripts/migration.sql @@ -9,6 +9,7 @@ update messages set year = extract(year from timestamp); -- Schema extracted using: -- sudo -u postgres pg_dump -t 'public.messages' --schema-only datanommer +-- then modified to handle partitioning. -- Create the messages2 table that will be partitioned and will have the messages -- moved to @@ -28,7 +29,9 @@ CREATE TABLE messages2 ( msg_id text, _headers text, username text, - crypto text + crypto text, + CONSTRAINT messages2_pkey PRIMARY KEY (id, year), + CONSTRAINT messages2_msg_id_key UNIQUE (msg_id, year) ) PARTITION BY LIST(year); CREATE SEQUENCE public.messages2_id_seq @@ -41,10 +44,6 @@ CREATE SEQUENCE public.messages2_id_seq ALTER TABLE public.messages2_id_seq OWNER TO datanommer; ALTER SEQUENCE public.messages2_id_seq OWNED BY public.messages2.id; ALTER TABLE ONLY public.messages2 ALTER COLUMN id SET DEFAULT nextval('public.messages2_id_seq'::regclass); -ALTER TABLE ONLY public.messages2 - ADD CONSTRAINT messages2_msg_id_key UNIQUE (msg_id); -ALTER TABLE ONLY public.messages2 - ADD CONSTRAINT messages2_pkey PRIMARY KEY (id); CREATE INDEX index_msg2_category ON public.messages2 USING btree (category); CREATE INDEX index_msg2_timestamp ON public.messages2 USING btree ("timestamp"); CREATE INDEX index_msg2_topic ON public.messages2 USING btree (topic); @@ -55,14 +54,20 @@ GRANT SELECT ON SEQUENCE public.messages2_id_seq TO datagrepper; -- Create the partitions -CREATE TABLE messages_2012 PARTITION OF messages FOR VALUES IN (2012); -CREATE TABLE messages_2013 PARTITION OF messages FOR VALUES IN (2013); -CREATE TABLE messages_2014 PARTITION OF messages FOR VALUES IN (2014); -CREATE TABLE messages_2015 PARTITION OF messages FOR VALUES IN (2015); -CREATE TABLE messages_2016 PARTITION OF messages FOR VALUES IN (2016); -CREATE TABLE messages_2017 PARTITION OF messages FOR VALUES IN (2017); -CREATE TABLE messages_2018 PARTITION OF messages FOR VALUES IN (2018); -CREATE TABLE messages_2019 PARTITION OF messages FOR VALUES IN (2019); -CREATE TABLE messages_2020 PARTITION OF messages FOR VALUES IN (2020); -CREATE TABLE messages_2021 PARTITION OF messages FOR VALUES in (2021); -CREATE TABLE messages_2021 PARTITION OF messages DEFAULT; +CREATE TABLE messages_2012 PARTITION OF messages2 FOR VALUES IN (2012); +CREATE TABLE messages_2013 PARTITION OF messages2 FOR VALUES IN (2013); +CREATE TABLE messages_2014 PARTITION OF messages2 FOR VALUES IN (2014); +CREATE TABLE messages_2015 PARTITION OF messages2 FOR VALUES IN (2015); +CREATE TABLE messages_2016 PARTITION OF messages2 FOR VALUES IN (2016); +CREATE TABLE messages_2017 PARTITION OF messages2 FOR VALUES IN (2017); +CREATE TABLE messages_2018 PARTITION OF messages2 FOR VALUES IN (2018); +CREATE TABLE messages_2019 PARTITION OF messages2 FOR VALUES IN (2019); +CREATE TABLE messages_2020 PARTITION OF messages2 FOR VALUES IN (2020); +--- It seems smart to place each row in the correct table when it's added, and +--- use DEFAULT as the fallback table for items that have unexpected years: +CREATE TABLE messages_2021 PARTITION OF messages2 FOR VALUES in (2021); +CREATE TABLE messages_other PARTITION OF messages2 DEFAULT; +--- The other approach would be to have the DEFAULT be the current year: +---CREATE TABLE messages_2021 PARTITION OF messages2 DEFAULT; +--- But you'd have to rename that table and/or move those rows at the end +--- of the year to make room for the new table, and that seems fragile.