-- Partition-based archival strategy
-- Create partitioned table
CREATE TABLE logs (
id BIGSERIAL,
level VARCHAR(20),
message TEXT,
created_at TIMESTAMP NOT NULL,
PRIMARY KEY (id, created_at)
) PARTITION BY RANGE (created_at);
-- Create monthly partitions
CREATE TABLE logs_2024_01 PARTITION OF logs
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
CREATE TABLE logs_2024_02 PARTITION OF logs
FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');
-- Archive old partition
-- 1. Detach partition (quick operation)
ALTER TABLE logs DETACH PARTITION logs_2024_01;
-- 2. Export to file
COPY logs_2024_01 TO '/archive/logs_2024_01.csv' WITH (FORMAT csv, HEADER);
-- Or compress while exporting
COPY logs_2024_01 TO PROGRAM 'gzip > /archive/logs_2024_01.csv.gz'
WITH (FORMAT csv);
-- 3. Drop partition
DROP TABLE logs_2024_01;
-- Automated archival function
CREATE OR REPLACE FUNCTION archive_old_partitions(
table_name TEXT,
retention_months INT,
archive_path TEXT
)
RETURNS VOID AS $$
DECLARE
partition_record RECORD;
cutoff_date DATE;
BEGIN
cutoff_date := CURRENT_DATE - (retention_months || ' months')::INTERVAL;
FOR partition_record IN
SELECT
c.relname AS partition_name,
pg_get_expr(c.relpartbound, c.oid) AS partition_bound
FROM pg_class c
JOIN pg_inherits i ON c.oid = i.inhrelid
JOIN pg_class p ON i.inhparent = p.oid
WHERE p.relname = table_name
AND c.relname ~ '\d{4}_\d{2}$'
AND c.relname < table_name || '_' || TO_CHAR(cutoff_date, 'YYYY_MM')
LOOP
RAISE NOTICE 'Archiving partition: %', partition_record.partition_name;
-- Detach
EXECUTE format('ALTER TABLE %I DETACH PARTITION %I',
table_name, partition_record.partition_name);
-- Export
EXECUTE format(
'COPY %I TO PROGRAM ''gzip > %s/%I.csv.gz'' WITH (FORMAT csv)',
partition_record.partition_name,
archive_path,
partition_record.partition_name
);
-- Drop
EXECUTE format('DROP TABLE %I', partition_record.partition_name);
END LOOP;
END;
$$ LANGUAGE plpgsql;
-- Run archival
SELECT archive_old_partitions('logs', 12, '/archive');
-- Soft delete pattern
ALTER TABLE users ADD COLUMN deleted_at TIMESTAMP;
CREATE INDEX idx_users_deleted_at ON users(deleted_at);
-- Soft delete
UPDATE users SET deleted_at = CURRENT_TIMESTAMP WHERE id = 123;
-- Query active records only
SELECT * FROM users WHERE deleted_at IS NULL;
-- Archive soft-deleted records
CREATE TABLE users_archive (LIKE users INCLUDING ALL);
INSERT INTO users_archive
SELECT * FROM users
WHERE deleted_at < CURRENT_DATE - INTERVAL '90 days';
DELETE FROM users
WHERE deleted_at < CURRENT_DATE - INTERVAL '90 days';
-- Audit log with automatic archival
CREATE TABLE audit_log (
id BIGSERIAL PRIMARY KEY,
table_name VARCHAR(50),
record_id INT,
action VARCHAR(20),
old_data JSONB,
new_data JSONB,
user_id INT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Partition by month
CREATE TABLE audit_log_2024_01 PARTITION OF audit_log
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
-- Retention policy: Keep 2 years
SELECT add_retention_policy('audit_log', INTERVAL '2 years');
-- Compressed archive table
CREATE TABLE orders_archive (
id BIGINT PRIMARY KEY,
user_id INT,
total DECIMAL(10,2),
status VARCHAR(20),
created_at TIMESTAMP,
archived_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Enable compression (TimescaleDB)
ALTER TABLE orders_archive SET (
timescaledb.compress,
timescaledb.compress_segmentby = 'user_id',
timescaledb.compress_orderby = 'created_at DESC'
);
-- Archive completed orders older than 1 year
WITH archived AS (
INSERT INTO orders_archive (id, user_id, total, status, created_at)
SELECT id, user_id, total, status, created_at
FROM orders
WHERE status = 'completed'
AND created_at < CURRENT_DATE - INTERVAL '1 year'
RETURNING id
)
DELETE FROM orders
WHERE id IN (SELECT id FROM archived);
-- Restore archived partition
-- 1. Create partition table
CREATE TABLE logs_2024_01 (LIKE logs INCLUDING ALL);
-- 2. Import data
COPY logs_2024_01 FROM PROGRAM 'gunzip < /archive/logs_2024_01.csv.gz'
WITH (FORMAT csv, HEADER);
-- 3. Attach partition
ALTER TABLE logs ATTACH PARTITION logs_2024_01
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
-- Query across active and archived data
CREATE VIEW logs_with_archive AS
SELECT * FROM logs
UNION ALL
SELECT * FROM logs_archive
WHERE created_at >= CURRENT_DATE - INTERVAL '30 days';
-- Foreign table for archived data (query without importing)
CREATE EXTENSION file_fdw;
CREATE SERVER archive_server FOREIGN DATA WRAPPER file_fdw;
CREATE FOREIGN TABLE logs_2024_01_archive (
id BIGINT,
level VARCHAR(20),
message TEXT,
created_at TIMESTAMP
)
SERVER archive_server
OPTIONS (
filename '/archive/logs_2024_01.csv',
format 'csv',
header 'true'
);
-- Query foreign table
SELECT * FROM logs_2024_01_archive WHERE level = 'ERROR';
-- Data lifecycle states
CREATE TYPE data_lifecycle AS ENUM ('active', 'warm', 'cold', 'archived');
ALTER TABLE documents ADD COLUMN lifecycle data_lifecycle DEFAULT 'active';
-- Move to warm storage (less frequently accessed)
UPDATE documents
SET lifecycle = 'warm'
WHERE last_accessed_at < CURRENT_DATE - INTERVAL '90 days'
AND lifecycle = 'active';
-- Move to cold storage
UPDATE documents
SET lifecycle = 'cold'
WHERE last_accessed_at < CURRENT_DATE - INTERVAL '1 year'
AND lifecycle = 'warm';
-- Archive to external storage
INSERT INTO documents_archive
SELECT * FROM documents
WHERE lifecycle = 'cold'
AND updated_at < CURRENT_DATE - INTERVAL '2 years';
UPDATE documents
SET lifecycle = 'archived',
content = NULL -- Clear large content column
WHERE lifecycle = 'cold'
AND updated_at < CURRENT_DATE - INTERVAL '2 years';
-- Compliance retention tracking
CREATE TABLE retention_policies (
table_name VARCHAR(100) PRIMARY KEY,
retention_days INT NOT NULL,
compliance_requirement VARCHAR(100),
last_archived TIMESTAMP
);
INSERT INTO retention_policies VALUES
('audit_log', 2555, 'SOX', NULL), -- 7 years
('transactions', 2555, 'SOX', NULL),
('user_activity', 90, 'GDPR', NULL);
-- Archival report
SELECT
t.tablename,
pg_size_pretty(pg_total_relation_size('public.'||t.tablename)) AS current_size,
rp.retention_days,
rp.compliance_requirement,
COUNT(*) FILTER (
WHERE created_at < CURRENT_DATE - (rp.retention_days || ' days')::INTERVAL
) AS records_to_archive
FROM pg_tables t
JOIN retention_policies rp ON t.tablename = rp.table_name
LEFT JOIN LATERAL (
EXECUTE format('SELECT created_at FROM %I', t.tablename)
) records ON true
WHERE t.schemaname = 'public'
GROUP BY t.tablename, rp.retention_days, rp.compliance_requirement;
-- Scheduled archival job (pg_cron)
SELECT cron.schedule(
'monthly-archival',
'0 3 1 * *', -- 3 AM on 1st of month
$$
SELECT archive_old_partitions('logs', 12, '/archive');
SELECT archive_old_partitions('metrics', 6, '/archive');
$$
);