-- EXPLAIN ANALYZE (actual execution statistics)
EXPLAIN ANALYZE
SELECT u.username, COUNT(o.id) AS order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.created_at >= '2024-01-01'
GROUP BY u.id, u.username
ORDER BY order_count DESC
LIMIT 10;
-- Key metrics to watch:
-- Planning Time: Time spent planning query
-- Execution Time: Time spent executing
-- Rows: Estimated vs actual rows
-- Cost: Planner's cost estimate
-- Covering index (includes all needed columns)
CREATE INDEX idx_orders_user_id_created_at
ON orders (user_id, created_at)
INCLUDE (total, status);
-- Query uses index-only scan (no table access)
SELECT user_id, created_at, total, status
FROM orders
WHERE user_id = 123
ORDER BY created_at DESC;
-- Partial index (smaller, faster)
CREATE INDEX idx_orders_active
ON orders (user_id, created_at)
WHERE status = 'active';
-- Only active orders in index
SELECT * FROM orders
WHERE user_id = 123
AND status = 'active'
ORDER BY created_at DESC;
-- Expression index
CREATE INDEX idx_users_lower_email
ON users (LOWER(email));
-- Case-insensitive search uses index
SELECT * FROM users
WHERE LOWER(email) = 'user@example.com';
-- Avoid SELECT * (fetch only needed columns)
-- Bad:
SELECT * FROM users WHERE id = 123;
-- Good:
SELECT id, username, email FROM users WHERE id = 123;
-- Limit early to reduce processing
-- Bad:
SELECT * FROM (
SELECT * FROM large_table ORDER BY created_at DESC
) sub
LIMIT 10;
-- Good:
SELECT * FROM large_table
ORDER BY created_at DESC
LIMIT 10;
-- Join optimization: smaller table first
-- Bad join order:
SELECT *
FROM huge_table h
JOIN small_table s ON h.id = s.huge_id;
-- Better: Let planner decide, but ensure statistics are current
ANALYZE huge_table;
ANALYZE small_table;
-- Subquery to CTE transformation
-- Subquery (may execute multiple times):
SELECT
u.username,
(SELECT COUNT(*) FROM orders WHERE user_id = u.id) AS order_count
FROM users u;
-- CTE (executes once):
WITH order_counts AS (
SELECT user_id, COUNT(*) AS count
FROM orders
GROUP BY user_id
)
SELECT u.username, COALESCE(oc.count, 0) AS order_count
FROM users u
LEFT JOIN order_counts oc ON u.id = oc.user_id;
-- EXISTS vs IN
-- EXISTS (often faster, stops at first match):
SELECT username
FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.user_id = u.id AND o.status = 'completed'
);
-- IN (may be slower for large result sets):
SELECT username
FROM users
WHERE id IN (
SELECT DISTINCT user_id FROM orders WHERE status = 'completed'
);
-- UNION ALL vs UNION
-- UNION ALL (no deduplication, faster):
SELECT id, name FROM products_active
UNION ALL
SELECT id, name FROM products_archived;
-- UNION (deduplicates, slower):
SELECT id, name FROM products_active
UNION
SELECT id, name FROM products_archived;
-- Batch updates (avoid row-by-row)
-- Bad:
-- UPDATE products SET price = price * 1.1 WHERE id = 1;
-- UPDATE products SET price = price * 1.1 WHERE id = 2;
-- ...
-- Good:
UPDATE products
SET price = price * 1.1
WHERE category_id = 5;
-- Filter before join
-- Bad:
SELECT o.*
FROM orders o
JOIN users u ON o.user_id = u.id
WHERE o.created_at >= '2024-01-01';
-- Better:
SELECT o.*
FROM (
SELECT * FROM orders
WHERE created_at >= '2024-01-01'
) o
JOIN users u ON o.user_id = u.id;
-- Or use CTE for clarity:
WITH recent_orders AS (
SELECT * FROM orders
WHERE created_at >= '2024-01-01'
)
SELECT o.*
FROM recent_orders o
JOIN users u ON o.user_id = u.id;
-- Partition pruning
EXPLAIN ANALYZE
SELECT * FROM measurements
WHERE measured_at >= '2024-01-01'
AND measured_at < '2024-02-01';
-- Only scans measurements_2024_01 partition
-- Index scan vs bitmap scan vs sequential scan
SET enable_seqscan = OFF; -- Force index usage (testing only)
SET enable_seqscan = ON; -- Re-enable
-- Parallel query execution
SET max_parallel_workers_per_gather = 4;
EXPLAIN ANALYZE
SELECT COUNT(*) FROM large_table;
-- May use parallel sequential scan
-- Disable parallel for small queries
SET parallel_tuple_cost = 1000;
-- Prepared statements (plan caching)
PREPARE user_lookup (INT) AS
SELECT username, email FROM users WHERE id = $1;
EXECUTE user_lookup(123);
EXECUTE user_lookup(456);
DEALLOCATE user_lookup;
-- Query hints (PostgreSQL doesn't support, but plan can be influenced)
-- Force nested loop join
SET enable_hashjoin = OFF;
SET enable_mergejoin = OFF;
-- Run query
-- Reset
SET enable_hashjoin = ON;
SET enable_mergejoin = ON;
-- Work_mem tuning (per-query memory)
-- Affects sorts, hash joins
SHOW work_mem; -- Default: 4MB
-- Increase for specific query
SET work_mem = '256MB';
SELECT * FROM large_table ORDER BY created_at DESC LIMIT 1000;
RESET work_mem;
-- Effective_cache_size (planner hint)
SET effective_cache_size = '8GB';
-- Tells planner how much data likely cached
-- Materialized CTE (force execution before join)
WITH expensive_calc AS MATERIALIZED (
SELECT user_id, SUM(total) AS lifetime_value
FROM orders
GROUP BY user_id
)
SELECT u.username, ec.lifetime_value
FROM users u
JOIN expensive_calc ec ON u.id = ec.user_id
WHERE ec.lifetime_value > 1000;
-- Index condition vs filter condition
EXPLAIN ANALYZE
SELECT * FROM orders
WHERE user_id = 123 -- Index condition
AND status = 'completed' -- Filter condition (if not in index)
AND total > 100; -- Filter condition
-- Add to index to push more to index condition
CREATE INDEX idx_orders_user_status_total
ON orders (user_id, status, total);
-- Now all conditions are index conditions
-- Avoiding function calls in WHERE (prevents index usage)
-- Bad:
SELECT * FROM users WHERE EXTRACT(YEAR FROM created_at) = 2024;
-- Good:
SELECT * FROM users
WHERE created_at >= '2024-01-01'
AND created_at < '2025-01-01';
-- OR to UNION transformation
-- Bad (may not use indexes efficiently):
SELECT * FROM products
WHERE category_id = 5 OR featured = true;
-- Better:
SELECT * FROM products WHERE category_id = 5
UNION
SELECT * FROM products WHERE featured = true;
-- Avoid OFFSET for pagination (slow on large offsets)
-- Bad:
SELECT * FROM products ORDER BY id LIMIT 10 OFFSET 100000;
-- Good (keyset pagination):
SELECT * FROM products
WHERE id > 100000
ORDER BY id
LIMIT 10;
-- Cardinality estimation
SELECT
tablename,
attname,
n_distinct,
correlation
FROM pg_stats
WHERE tablename = 'orders'
AND schemaname = 'public';
-- If n_distinct is wrong, update statistics
ALTER TABLE orders ALTER COLUMN status SET STATISTICS 1000;
ANALYZE orders;
-- Join removal (unused join)
-- Planner may remove this join if only users.id is used
SELECT u.id
FROM users u
LEFT JOIN orders o ON u.id = o.user_id;
-- Multi-column statistics (correlated columns)
CREATE STATISTICS orders_user_status_stats (dependencies)
ON user_id, status FROM orders;
ANALYZE orders;
-- Now planner understands correlation between user_id and status
-- Query result caching (application level)
/*
const cacheKey = `user:${userId}:orders`;
let orders = await cache.get(cacheKey);
if (!orders) {
orders = await db.query('SELECT * FROM orders WHERE user_id = $1', [userId]);
await cache.set(cacheKey, orders, 300); // 5 min TTL
}
*/
-- Statement timeout (prevent runaway queries)
SET statement_timeout = '30s';
-- Query locks table
LOCK TABLE products IN ACCESS SHARE MODE;
-- Monitoring slow queries
SELECT
query,
mean_exec_time,
calls,
total_exec_time
FROM pg_stat_statements
WHERE mean_exec_time > 1000 -- > 1 second
ORDER BY mean_exec_time DESC
LIMIT 20;