Time-series data and TimescaleDB optimization

1405 views

                  -- Install TimescaleDB extension
CREATE EXTENSION IF NOT EXISTS timescaledb;

-- Create regular table
CREATE TABLE sensor_data (
  time TIMESTAMPTZ NOT NULL,
  sensor_id INT NOT NULL,
  temperature DOUBLE PRECISION,
  humidity DOUBLE PRECISION,
  pressure DOUBLE PRECISION
);

-- Convert to hypertable (time-series optimized)
SELECT create_hypertable('sensor_data', 'time');

-- Hypertable automatically partitions by time
-- Default: 7-day chunks

-- Create hypertable with custom chunk interval
SELECT create_hypertable(
  'sensor_data',
  'time',
  chunk_time_interval => INTERVAL '1 day'
);

-- Create index on sensor_id
CREATE INDEX idx_sensor_data_sensor_id_time
  ON sensor_data (sensor_id, time DESC);

-- Insert time-series data
INSERT INTO sensor_data VALUES
  ('2024-01-15 10:00:00', 1, 22.5, 45.2, 1013.25),
  ('2024-01-15 10:05:00', 1, 22.7, 45.0, 1013.30),
  ('2024-01-15 10:00:00', 2, 21.8, 48.5, 1012.90);

-- Time-bucketing queries
SELECT
  time_bucket('15 minutes', time) AS bucket,
  sensor_id,
  AVG(temperature) AS avg_temp,
  MAX(temperature) AS max_temp,
  MIN(temperature) AS min_temp,
  COUNT(*) AS readings
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 day'
GROUP BY bucket, sensor_id
ORDER BY bucket DESC, sensor_id;

-- Time-bucket with GAPFILL
SELECT
  time_bucket_gapfill('1 hour', time) AS hour,
  sensor_id,
  AVG(temperature) AS avg_temp,
  LOCF(AVG(temperature)) AS filled_temp  -- Last observation carried forward
FROM sensor_data
WHERE time >= NOW() - INTERVAL '7 days'
GROUP BY hour, sensor_id
ORDER BY hour DESC, sensor_id;

-- Continuous aggregates (materialized views for time-series)
CREATE MATERIALIZED VIEW sensor_data_hourly
WITH (timescaledb.continuous) AS
SELECT
  time_bucket('1 hour', time) AS hour,
  sensor_id,
  AVG(temperature) AS avg_temperature,
  MAX(temperature) AS max_temperature,
  MIN(temperature) AS min_temperature,
  AVG(humidity) AS avg_humidity,
  COUNT(*) AS reading_count
FROM sensor_data
GROUP BY hour, sensor_id;

-- Refresh continuous aggregate
CALL refresh_continuous_aggregate('sensor_data_hourly', NULL, NULL);

-- Automatic refresh policy
SELECT add_continuous_aggregate_policy(
  'sensor_data_hourly',
  start_offset => INTERVAL '3 hours',
  end_offset => INTERVAL '1 hour',
  schedule_interval => INTERVAL '1 hour'
);

-- Compression (saves 90%+ storage)
ALTER TABLE sensor_data SET (
  timescaledb.compress,
  timescaledb.compress_segmentby = 'sensor_id',
  timescaledb.compress_orderby = 'time DESC'
);

-- Add compression policy (compress data older than 7 days)
SELECT add_compression_policy(
  'sensor_data',
  INTERVAL '7 days'
);

-- Manual compression
SELECT compress_chunk(c.chunk_name)
FROM timescaledb_information.chunks c
WHERE c.hypertable_name = 'sensor_data'
  AND c.range_end < NOW() - INTERVAL '7 days';

-- Data retention policy (auto-delete old data)
SELECT add_retention_policy(
  'sensor_data',
  INTERVAL '90 days'
);

-- View chunks
SELECT * FROM timescaledb_information.chunks
WHERE hypertable_name = 'sensor_data'
ORDER BY range_start DESC;

-- Chunk statistics
SELECT
  chunk_name,
  pg_size_pretty(total_bytes) AS size,
  pg_size_pretty(compressed_total_bytes) AS compressed_size,
  ROUND(100.0 * compressed_total_bytes / NULLIF(total_bytes, 0), 2) AS compression_ratio
FROM timescaledb_information.compressed_chunk_stats
WHERE hypertable_name = 'sensor_data';

                  -- Moving average over time
SELECT
  time,
  sensor_id,
  temperature,
  AVG(temperature) OVER (
    PARTITION BY sensor_id
    ORDER BY time
    ROWS BETWEEN 5 PRECEDING AND CURRENT ROW
  ) AS moving_avg_6_readings
FROM sensor_data
WHERE sensor_id = 1
ORDER BY time DESC;

-- Rate of change
SELECT
  time,
  temperature,
  temperature - LAG(temperature) OVER (ORDER BY time) AS temp_change,
  EXTRACT(EPOCH FROM (time - LAG(time) OVER (ORDER BY time))) AS seconds_elapsed,
  (temperature - LAG(temperature) OVER (ORDER BY time)) /
    NULLIF(EXTRACT(EPOCH FROM (time - LAG(time) OVER (ORDER BY time))), 0)
    AS rate_per_second
FROM sensor_data
WHERE sensor_id = 1
ORDER BY time DESC;

-- Time-weighted average
SELECT
  sensor_id,
  time_bucket('1 day', time) AS day,
  average(
    time_weight('LOCF', time, temperature)
  ) AS time_weighted_avg
FROM sensor_data
GROUP BY sensor_id, day;

-- Detect anomalies (values beyond 2 standard deviations)
WITH stats AS (
  SELECT
    sensor_id,
    AVG(temperature) AS mean_temp,
    STDDEV(temperature) AS stddev_temp
  FROM sensor_data
  WHERE time >= NOW() - INTERVAL '30 days'
  GROUP BY sensor_id
)
SELECT
  s.time,
  s.sensor_id,
  s.temperature,
  st.mean_temp,
  st.stddev_temp,
  ABS(s.temperature - st.mean_temp) / NULLIF(st.stddev_temp, 0) AS z_score
FROM sensor_data s
JOIN stats st ON s.sensor_id = st.sensor_id
WHERE ABS(s.temperature - st.mean_temp) > 2 * st.stddev_temp
ORDER BY s.time DESC;

-- Downsampling (reduce granularity)
SELECT
  time_bucket('5 minutes', time) AS bucket,
  sensor_id,
  AVG(temperature) AS avg_temp,
  first(temperature, time) AS first_temp,
  last(temperature, time) AS last_temp,
  MAX(temperature) AS max_temp,
  MIN(temperature) AS min_temp
FROM sensor_data
WHERE time >= NOW() - INTERVAL '1 day'
GROUP BY bucket, sensor_id
ORDER BY bucket DESC;

-- Find gaps in data
SELECT
  time AS gap_start,
  LEAD(time) OVER (PARTITION BY sensor_id ORDER BY time) AS gap_end,
  EXTRACT(EPOCH FROM (
    LEAD(time) OVER (PARTITION BY sensor_id ORDER BY time) - time
  )) / 60 AS gap_minutes
FROM sensor_data
WHERE sensor_id = 1
  AND time >= NOW() - INTERVAL '1 day'
HAVING EXTRACT(EPOCH FROM (
  LEAD(time) OVER (PARTITION BY sensor_id ORDER BY time) - time
)) > 600  -- Gaps > 10 minutes
ORDER BY time DESC;

-- Histogram of values
SELECT
  width_bucket(temperature, 15, 30, 10) AS bucket,
  COUNT(*) AS count,
  MIN(temperature) AS min_temp,
  MAX(temperature) AS max_temp
FROM sensor_data
WHERE time >= NOW() - INTERVAL '7 days'
GROUP BY bucket
ORDER BY bucket;

-- Cumulative sum over time
SELECT
  time,
  sensor_id,
  temperature,
  SUM(temperature) OVER (
    PARTITION BY sensor_id
    ORDER BY time
  ) AS cumulative_temp
FROM sensor_data
WHERE sensor_id = 1
ORDER BY time;

-- Regular time-series table (without TimescaleDB)
CREATE TABLE metrics (
  timestamp TIMESTAMP NOT NULL,
  metric_name VARCHAR(100) NOT NULL,
  value DOUBLE PRECISION NOT NULL,
  tags JSONB
);

-- Partition by month
CREATE TABLE metrics_2024_01 PARTITION OF metrics
  FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');

CREATE INDEX idx_metrics_2024_01_time
  ON metrics_2024_01(timestamp DESC);

CREATE INDEX idx_metrics_2024_01_name_time
  ON metrics_2024_01(metric_name, timestamp DESC);

-- Query with time-series patterns
SELECT
  DATE_TRUNC('hour', timestamp) AS hour,
  metric_name,
  AVG(value) AS avg_value,
  PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY value) AS p95_value,
  MAX(value) AS max_value
FROM metrics
WHERE timestamp >= NOW() - INTERVAL '24 hours'
  AND metric_name = 'response_time_ms'
GROUP BY hour, metric_name
ORDER BY hour DESC;

Time-series data tracks measurements over time—metrics, logs, sensor data. I use TimescaleDB for time-series workloads. Hypertables automatically partition by time. Continuous aggregates precompute rollups. Time-based retention policies auto-delete old data. Compression saves 90%+ storage. Time-bucketing groups data by intervals. Gap-filling handles missing data points. Understanding time-series patterns enables efficient queries. Downsampling reduces data granularity. Time-weighted averages handle irregular intervals. Proper time-series design prevents table bloat. Time-series databases outperform general RDBMS for temporal workloads. Essential for metrics, IoT, monitoring, financial data.

Maria Garcia

More from Maria Garcia

Time-series data and TimescaleDB optimization

0 Comments

More from Maria Garcia