Database observability and monitoring metrics

Maria Garcia Feb 2026
2 tabs
-- Connection statistics
SELECT
  count(*) AS total_connections,
  count(*) FILTER (WHERE state = 'active') AS active,
  count(*) FILTER (WHERE state = 'idle') AS idle,
  count(*) FILTER (WHERE state = 'idle in transaction') AS idle_in_tx,
  count(*) FILTER (WHERE wait_event_type IS NOT NULL) AS waiting
FROM pg_stat_activity;

-- Connection limits and usage
SELECT
  setting::INT AS max_connections,
  (SELECT count(*) FROM pg_stat_activity) AS current_connections,
  setting::INT - (SELECT count(*) FROM pg_stat_activity) AS available_connections,
  ROUND(100.0 * (SELECT count(*) FROM pg_stat_activity) / setting::INT, 2) AS pct_used
FROM pg_settings
WHERE name = 'max_connections';

-- Query performance metrics (pg_stat_statements)
SELECT
  LEFT(query, 100) AS query_preview,
  calls,
  ROUND(total_exec_time::numeric, 2) AS total_ms,
  ROUND(mean_exec_time::numeric, 2) AS mean_ms,
  ROUND(min_exec_time::numeric, 2) AS min_ms,
  ROUND(max_exec_time::numeric, 2) AS max_ms,
  ROUND(stddev_exec_time::numeric, 2) AS stddev_ms,
  rows
FROM pg_stat_statements
ORDER BY total_exec_time DESC
LIMIT 10;

-- Queries per second (QPS)
WITH current_stats AS (
  SELECT SUM(calls) AS total_calls, NOW() AS measured_at
  FROM pg_stat_statements
)
SELECT
  total_calls,
  ROUND(
    total_calls / EXTRACT(EPOCH FROM (measured_at - pg_postmaster_start_time())),
    2
  ) AS queries_per_second
FROM current_stats;

-- Cache hit ratio (should be > 99%)
SELECT
  'index' AS type,
  ROUND(
    100.0 * SUM(idx_blks_hit) / NULLIF(SUM(idx_blks_hit + idx_blks_read), 0),
    2
  ) AS hit_ratio_pct
FROM pg_statio_user_indexes
UNION ALL
SELECT
  'table' AS type,
  ROUND(
    100.0 * SUM(heap_blks_hit) / NULLIF(SUM(heap_blks_hit + heap_blks_read), 0),
    2
  ) AS hit_ratio_pct
FROM pg_statio_user_tables;

-- Database size and growth
SELECT
  datname,
  pg_size_pretty(pg_database_size(datname)) AS size,
  pg_database_size(datname) AS size_bytes
FROM pg_database
WHERE datname NOT IN ('template0', 'template1')
ORDER BY pg_database_size(datname) DESC;

-- Table size and bloat indicators
SELECT
  schemaname,
  tablename,
  pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size,
  pg_size_pretty(pg_relation_size(schemaname||'.'||tablename)) AS table_size,
  n_live_tup,
  n_dead_tup,
  ROUND(100.0 * n_dead_tup / NULLIF(n_live_tup + n_dead_tup, 0), 2) AS dead_pct,
  last_vacuum,
  last_autovacuum
FROM pg_stat_user_tables
WHERE n_live_tup > 0
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
LIMIT 20;

-- Transaction rate
SELECT
  datname,
  xact_commit + xact_rollback AS total_transactions,
  xact_commit,
  xact_rollback,
  ROUND(100.0 * xact_rollback / NULLIF(xact_commit + xact_rollback, 0), 2) AS rollback_pct,
  blks_read,
  blks_hit,
  ROUND(100.0 * blks_hit / NULLIF(blks_read + blks_hit, 0), 2) AS cache_hit_pct
FROM pg_stat_database
WHERE datname = current_database();

-- Lock monitoring
SELECT
  locktype,
  mode,
  COUNT(*) AS lock_count
FROM pg_locks
GROUP BY locktype, mode
ORDER BY lock_count DESC;

-- Blocking queries
SELECT
  blocked.pid AS blocked_pid,
  blocked.query AS blocked_query,
  blocking.pid AS blocking_pid,
  blocking.query AS blocking_query,
  NOW() - blocked.query_start AS blocked_duration
FROM pg_stat_activity blocked
JOIN pg_locks blocked_locks ON blocked.pid = blocked_locks.pid
JOIN pg_locks blocking_locks ON blocked_locks.locktype = blocking_locks.locktype
  AND blocked_locks.database IS NOT DISTINCT FROM blocking_locks.database
  AND blocked_locks.relation IS NOT DISTINCT FROM blocking_locks.relation
  AND blocked_locks.pid != blocking_locks.pid
JOIN pg_stat_activity blocking ON blocking_locks.pid = blocking.pid
WHERE NOT blocked_locks.granted
  AND blocking_locks.granted;

-- Replication lag
SELECT
  client_addr,
  application_name,
  state,
  pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn) AS lag_bytes,
  pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn)) AS lag,
  EXTRACT(EPOCH FROM (NOW() - pg_last_xact_replay_timestamp())) AS lag_seconds
FROM pg_stat_replication;

-- Checkpoint statistics
SELECT
  checkpoints_timed,
  checkpoints_req,
  ROUND(100.0 * checkpoints_req / NULLIF(checkpoints_timed + checkpoints_req, 0), 2) AS req_checkpoint_pct,
  buffers_checkpoint,
  buffers_clean,
  maxwritten_clean,
  buffers_backend,
  buffers_backend_fsync,
  checkpoint_write_time,
  checkpoint_sync_time
FROM pg_stat_bgwriter;
2 files · sql Explain with highlit

Observability provides insight into database health and performance. I monitor key metrics—queries per second, connection count, cache hit ratio. Slow query logs identify performance problems. Query latency percentiles show user experience. Lock wait time reveals contention. Replication lag affects read consistency. Disk I/O patterns guide hardware decisions. Understanding baseline metrics enables anomaly detection. Database size growth predicts capacity needs. Index usage statistics inform optimization. Proper observability prevents outages through early warning. Essential for production databases, SRE practices. PostgreSQL provides rich statistics views for comprehensive monitoring.