-- Create materialized view
CREATE MATERIALIZED VIEW user_statistics AS
SELECT
users.id,
users.username,
COUNT(DISTINCT orders.id) AS order_count,
COALESCE(SUM(orders.total), 0) AS total_spent,
COALESCE(AVG(orders.total), 0) AS avg_order_value,
MAX(orders.created_at) AS last_order_date,
COUNT(DISTINCT DATE(orders.created_at)) AS active_days
FROM users
LEFT JOIN orders ON users.id = orders.user_id
GROUP BY users.id, users.username;
-- Create indexes on materialized view
CREATE UNIQUE INDEX idx_user_stats_id ON user_statistics(id);
CREATE INDEX idx_user_stats_order_count ON user_statistics(order_count);
CREATE INDEX idx_user_stats_total_spent ON user_statistics(total_spent DESC);
-- Query materialized view (fast!)
SELECT * FROM user_statistics
WHERE total_spent > 1000
ORDER BY total_spent DESC
LIMIT 10;
-- Refresh materialized view
REFRESH MATERIALIZED VIEW user_statistics;
-- Refresh concurrently (doesn't block reads)
REFRESH MATERIALIZED VIEW CONCURRENTLY user_statistics;
-- Requires unique index
-- Drop materialized view
DROP MATERIALIZED VIEW user_statistics;
-- Materialized view with aggregations
CREATE MATERIALIZED VIEW daily_sales_summary AS
SELECT
DATE(created_at) AS sale_date,
COUNT(*) AS order_count,
SUM(total) AS total_revenue,
AVG(total) AS avg_order_value,
COUNT(DISTINCT user_id) AS unique_customers,
MAX(total) AS max_order,
MIN(total) AS min_order,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY total) AS median_order
FROM orders
WHERE created_at >= CURRENT_DATE - INTERVAL '2 years'
GROUP BY DATE(created_at);
CREATE UNIQUE INDEX idx_daily_sales_date ON daily_sales_summary(sale_date);
-- Time-series rollup
CREATE MATERIALIZED VIEW hourly_metrics AS
SELECT
DATE_TRUNC('hour', created_at) AS hour,
COUNT(*) AS event_count,
COUNT(DISTINCT user_id) AS unique_users,
AVG(response_time_ms) AS avg_response_time,
MAX(response_time_ms) AS max_response_time,
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY response_time_ms) AS p95_response_time
FROM events
WHERE created_at >= CURRENT_TIMESTAMP - INTERVAL '90 days'
GROUP BY DATE_TRUNC('hour', created_at);
CREATE UNIQUE INDEX idx_hourly_metrics_hour ON hourly_metrics(hour);
-- Denormalized product catalog
CREATE MATERIALIZED VIEW product_catalog AS
SELECT
p.id,
p.name,
p.sku,
p.price,
c.name AS category_name,
m.name AS manufacturer_name,
COALESCE(AVG(r.rating), 0) AS avg_rating,
COUNT(r.id) AS review_count,
COALESCE(SUM(oi.quantity), 0) AS units_sold,
p.created_at,
p.updated_at
FROM products p
LEFT JOIN categories c ON p.category_id = c.id
LEFT JOIN manufacturers m ON p.manufacturer_id = m.id
LEFT JOIN reviews r ON p.id = r.product_id
LEFT JOIN order_items oi ON p.id = oi.product_id
GROUP BY p.id, p.name, p.sku, p.price, c.name, m.name, p.created_at, p.updated_at;
CREATE UNIQUE INDEX idx_product_catalog_id ON product_catalog(id);
CREATE INDEX idx_product_catalog_category ON product_catalog(category_name);
CREATE INDEX idx_product_catalog_rating ON product_catalog(avg_rating DESC);
-- Scheduled refresh (with cron or pg_cron extension)
CREATE EXTENSION pg_cron;
-- Refresh daily at 2 AM
SELECT cron.schedule(
'refresh-user-stats',
'0 2 * * *',
$$REFRESH MATERIALIZED VIEW CONCURRENTLY user_statistics$$
);
-- Refresh every hour
SELECT cron.schedule(
'refresh-hourly-metrics',
'0 * * * *',
$$REFRESH MATERIALIZED VIEW CONCURRENTLY hourly_metrics$$
);
-- View scheduled jobs
SELECT * FROM cron.job;
-- Unschedule job
SELECT cron.unschedule('refresh-user-stats');
-- Incremental refresh pattern (manual)
-- Track last refresh time
CREATE TABLE mv_refresh_log (
view_name VARCHAR(100) PRIMARY KEY,
last_refresh TIMESTAMP NOT NULL
);
INSERT INTO mv_refresh_log VALUES ('user_statistics', '1970-01-01');
-- Incremental refresh function
CREATE OR REPLACE FUNCTION refresh_user_statistics_incremental()
RETURNS VOID AS $$
DECLARE
last_refresh TIMESTAMP;
BEGIN
-- Get last refresh time
SELECT mv_refresh_log.last_refresh INTO last_refresh
FROM mv_refresh_log
WHERE view_name = 'user_statistics';
-- Delete changed rows
DELETE FROM user_statistics
WHERE id IN (
SELECT DISTINCT user_id
FROM orders
WHERE updated_at > last_refresh OR created_at > last_refresh
);
-- Recalculate changed rows
INSERT INTO user_statistics
SELECT
users.id,
users.username,
COUNT(DISTINCT orders.id) AS order_count,
COALESCE(SUM(orders.total), 0) AS total_spent,
COALESCE(AVG(orders.total), 0) AS avg_order_value,
MAX(orders.created_at) AS last_order_date,
COUNT(DISTINCT DATE(orders.created_at)) AS active_days
FROM users
LEFT JOIN orders ON users.id = orders.user_id
WHERE users.id IN (
SELECT DISTINCT user_id
FROM orders
WHERE updated_at > last_refresh OR created_at > last_refresh
)
GROUP BY users.id, users.username;
-- Update refresh log
UPDATE mv_refresh_log
SET last_refresh = CURRENT_TIMESTAMP
WHERE view_name = 'user_statistics';
END;
$$ LANGUAGE plpgsql;
-- Run incremental refresh
SELECT refresh_user_statistics_incremental();
-- Compare view vs materialized view performance
EXPLAIN ANALYZE
SELECT
users.id,
COUNT(orders.id) AS order_count,
SUM(orders.total) AS total_spent
FROM users
LEFT JOIN orders ON users.id = orders.user_id
GROUP BY users.id
ORDER BY total_spent DESC
LIMIT 10;
-- Execution time: 2500ms (slow, scans all data)
EXPLAIN ANALYZE
SELECT * FROM user_statistics
ORDER BY total_spent DESC
LIMIT 10;
-- Execution time: 5ms (fast, reads cached data)
-- Materialized view size
SELECT
pg_size_pretty(pg_total_relation_size('user_statistics')) AS total_size,
pg_size_pretty(pg_relation_size('user_statistics')) AS table_size,
pg_size_pretty(pg_total_relation_size('user_statistics') -
pg_relation_size('user_statistics')) AS indexes_size;
-- Regular view (computed on demand)
CREATE VIEW user_stats_view AS
SELECT
users.id,
users.username,
COUNT(DISTINCT orders.id) AS order_count,
COALESCE(SUM(orders.total), 0) AS total_spent
FROM users
LEFT JOIN orders ON users.id = orders.user_id
GROUP BY users.id, users.username;
-- Always current, but slower queries
SELECT * FROM user_stats_view WHERE id = 123;
-- Materialized view dependencies
SELECT
dependent_view.relname AS dependent_view,
source_table.relname AS source_table
FROM pg_depend
JOIN pg_rewrite ON pg_depend.objid = pg_rewrite.oid
JOIN pg_class AS dependent_view ON pg_rewrite.ev_class = dependent_view.oid
JOIN pg_class AS source_table ON pg_depend.refobjid = source_table.oid
WHERE dependent_view.relkind = 'm' -- m = materialized view
AND source_table.relkind = 'r'; -- r = ordinary table
-- Parallel refresh (for very large materialized views)
SET max_parallel_workers_per_gather = 4;
REFRESH MATERIALIZED VIEW CONCURRENTLY large_mv;
-- MySQL alternative: Summary tables with triggers
CREATE TABLE user_statistics (
user_id INT PRIMARY KEY,
order_count INT DEFAULT 0,
total_spent DECIMAL(10,2) DEFAULT 0,
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
);
-- Trigger to update statistics
DELIMITER $$
CREATE TRIGGER update_user_stats_after_order
AFTER INSERT ON orders
FOR EACH ROW
BEGIN
INSERT INTO user_statistics (user_id, order_count, total_spent)
VALUES (NEW.user_id, 1, NEW.total)
ON DUPLICATE KEY UPDATE
order_count = order_count + 1,
total_spent = total_spent + NEW.total;
END$$
DELIMITER ;