Materialized views for performance optimization
-- Create materialized view
CREATE MATERIALIZED VIEW user_statistics AS
SELECT
users.id,
users.username,
COUNT(DISTINCT orders.id) AS order_count,
COALESCE(SUM(orders.total), 0) AS total_spent,
COALESCE(AVG(orders.total), 0) AS avg_order_value,
MAX(orders.created_at) AS last_order_date,
COUNT(DISTINCT DATE(orders.created_at)) AS active_days
FROM users
LEFT JOIN orders ON users.id = orders.user_id
GROUP BY users.id, users.username;
-- Create indexes on materialized view
CREATE UNIQUE INDEX idx_user_stats_id ON user_statistics(id);
CREATE INDEX idx_user_stats_order_count ON user_statistics(order_count);
CREATE INDEX idx_user_stats_total_spent ON user_statistics(total_spent DESC);
-- Query materialized view (fast!)
SELECT * FROM user_statistics
WHERE total_spent > 1000
ORDER BY total_spent DESC
LIMIT 10;
-- Refresh materialized view
REFRESH MATERIALIZED VIEW user_statistics;
-- Refresh concurrently (doesn't block reads)
REFRESH MATERIALIZED VIEW CONCURRENTLY user_statistics;
-- Requires unique index
-- Drop materialized view
DROP MATERIALIZED VIEW user_statistics;
-- Materialized view with aggregations
CREATE MATERIALIZED VIEW daily_sales_summary AS
SELECT
DATE(created_at) AS sale_date,
COUNT(*) AS order_count,
SUM(total) AS total_revenue,
AVG(total) AS avg_order_value,
COUNT(DISTINCT user_id) AS unique_customers,
MAX(total) AS max_order,
MIN(total) AS min_order,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY total) AS median_order
FROM orders
WHERE created_at >= CURRENT_DATE - INTERVAL '2 years'
GROUP BY DATE(created_at);
CREATE UNIQUE INDEX idx_daily_sales_date ON daily_sales_summary(sale_date);
-- Time-series rollup
CREATE MATERIALIZED VIEW hourly_metrics AS
SELECT
DATE_TRUNC('hour', created_at) AS hour,
COUNT(*) AS event_count,
COUNT(DISTINCT user_id) AS unique_users,
AVG(response_time_ms) AS avg_response_time,
MAX(response_time_ms) AS max_response_time,
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY response_time_ms) AS p95_response_time
FROM events
WHERE created_at >= CURRENT_TIMESTAMP - INTERVAL '90 days'
GROUP BY DATE_TRUNC('hour', created_at);
CREATE UNIQUE INDEX idx_hourly_metrics_hour ON hourly_metrics(hour);
-- Denormalized product catalog
CREATE MATERIALIZED VIEW product_catalog AS
SELECT
p.id,
p.name,
p.sku,
p.price,
c.name AS category_name,
m.name AS manufacturer_name,
COALESCE(AVG(r.rating), 0) AS avg_rating,
COUNT(r.id) AS review_count,
COALESCE(SUM(oi.quantity), 0) AS units_sold,
p.created_at,
p.updated_at
FROM products p
LEFT JOIN categories c ON p.category_id = c.id
LEFT JOIN manufacturers m ON p.manufacturer_id = m.id
LEFT JOIN reviews r ON p.id = r.product_id
LEFT JOIN order_items oi ON p.id = oi.product_id
GROUP BY p.id, p.name, p.sku, p.price, c.name, m.name, p.created_at, p.updated_at;
CREATE UNIQUE INDEX idx_product_catalog_id ON product_catalog(id);
CREATE INDEX idx_product_catalog_category ON product_catalog(category_name);
CREATE INDEX idx_product_catalog_rating ON product_catalog(avg_rating DESC);
-- Scheduled refresh (with cron or pg_cron extension)
CREATE EXTENSION pg_cron;
-- Refresh daily at 2 AM
SELECT cron.schedule(
'refresh-user-stats',
'0 2 * * *',
$$REFRESH MATERIALIZED VIEW CONCURRENTLY user_statistics$$
);
-- Refresh every hour
SELECT cron.schedule(
'refresh-hourly-metrics',
'0 * * * *',
$$REFRESH MATERIALIZED VIEW CONCURRENTLY hourly_metrics$$
);
-- View scheduled jobs
SELECT * FROM cron.job;
-- Unschedule job
SELECT cron.unschedule('refresh-user-stats');
-- Incremental refresh pattern (manual)
-- Track last refresh time
CREATE TABLE mv_refresh_log (
view_name VARCHAR(100) PRIMARY KEY,
last_refresh TIMESTAMP NOT NULL
);
INSERT INTO mv_refresh_log VALUES ('user_statistics', '1970-01-01');
-- Incremental refresh function
CREATE OR REPLACE FUNCTION refresh_user_statistics_incremental()
RETURNS VOID AS $$
DECLARE
last_refresh TIMESTAMP;
BEGIN
-- Get last refresh time
SELECT mv_refresh_log.last_refresh INTO last_refresh
FROM mv_refresh_log
WHERE view_name = 'user_statistics';
-- Delete changed rows
DELETE FROM user_statistics
WHERE id IN (
SELECT DISTINCT user_id
FROM orders
WHERE updated_at > last_refresh OR created_at > last_refresh
);
-- Recalculate changed rows
INSERT INTO user_statistics
SELECT
users.id,
users.username,
COUNT(DISTINCT orders.id) AS order_count,
COALESCE(SUM(orders.total), 0) AS total_spent,
COALESCE(AVG(orders.total), 0) AS avg_order_value,
MAX(orders.created_at) AS last_order_date,
COUNT(DISTINCT DATE(orders.created_at)) AS active_days
FROM users
LEFT JOIN orders ON users.id = orders.user_id
WHERE users.id IN (
SELECT DISTINCT user_id
FROM orders
WHERE updated_at > last_refresh OR created_at > last_refresh
)
GROUP BY users.id, users.username;
-- Update refresh log
UPDATE mv_refresh_log
SET last_refresh = CURRENT_TIMESTAMP
WHERE view_name = 'user_statistics';
END;
$$ LANGUAGE plpgsql;
-- Run incremental refresh
SELECT refresh_user_statistics_incremental();
-- Compare view vs materialized view performance
EXPLAIN ANALYZE
SELECT
users.id,
COUNT(orders.id) AS order_count,
SUM(orders.total) AS total_spent
FROM users
LEFT JOIN orders ON users.id = orders.user_id
GROUP BY users.id
ORDER BY total_spent DESC
LIMIT 10;
-- Execution time: 2500ms (slow, scans all data)
EXPLAIN ANALYZE
SELECT * FROM user_statistics
ORDER BY total_spent DESC
LIMIT 10;
-- Execution time: 5ms (fast, reads cached data)
-- Materialized view size
SELECT
pg_size_pretty(pg_total_relation_size('user_statistics')) AS total_size,
pg_size_pretty(pg_relation_size('user_statistics')) AS table_size,
pg_size_pretty(pg_total_relation_size('user_statistics') -
pg_relation_size('user_statistics')) AS indexes_size;
-- Regular view (computed on demand)
CREATE VIEW user_stats_view AS
SELECT
users.id,
users.username,
COUNT(DISTINCT orders.id) AS order_count,
COALESCE(SUM(orders.total), 0) AS total_spent
FROM users
LEFT JOIN orders ON users.id = orders.user_id
GROUP BY users.id, users.username;
-- Always current, but slower queries
SELECT * FROM user_stats_view WHERE id = 123;
-- Materialized view dependencies
SELECT
dependent_view.relname AS dependent_view,
source_table.relname AS source_table
FROM pg_depend
JOIN pg_rewrite ON pg_depend.objid = pg_rewrite.oid
JOIN pg_class AS dependent_view ON pg_rewrite.ev_class = dependent_view.oid
JOIN pg_class AS source_table ON pg_depend.refobjid = source_table.oid
WHERE dependent_view.relkind = 'm' -- m = materialized view
AND source_table.relkind = 'r'; -- r = ordinary table
-- Parallel refresh (for very large materialized views)
SET max_parallel_workers_per_gather = 4;
REFRESH MATERIALIZED VIEW CONCURRENTLY large_mv;
-- MySQL alternative: Summary tables with triggers
CREATE TABLE user_statistics (
user_id INT PRIMARY KEY,
order_count INT DEFAULT 0,
total_spent DECIMAL(10,2) DEFAULT 0,
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
);
-- Trigger to update statistics
DELIMITER $$
CREATE TRIGGER update_user_stats_after_order
AFTER INSERT ON orders
FOR EACH ROW
BEGIN
INSERT INTO user_statistics (user_id, order_count, total_spent)
VALUES (NEW.user_id, 1, NEW.total)
ON DUPLICATE KEY UPDATE
order_count = order_count + 1,
total_spent = total_spent + NEW.total;
END$$
DELIMITER ;
Materialized views store query results physically for fast access. I use them for expensive aggregations, complex joins, reporting queries. Unlike views, materialized views cache data—need manual refresh. REFRESH MATERIALIZED VIEW updates cached data. CONCURRENTLY option allows queries during refresh. Materialized views trade freshness for speed. Partial refresh updates only changed rows. Indexes on materialized views improve query performance. Understanding refresh frequency balances performance and data freshness. Materialized views excel for analytics dashboards, reports, denormalized data. Incremental refresh strategies minimize refresh time. Materialized views are essential when real-time data isn't required and query performance is critical.