Database design patterns and anti-patterns

1638 views

                  -- Pattern: Single Table Inheritance (STI)
CREATE TABLE users (
  id SERIAL PRIMARY KEY,
  type VARCHAR(50) NOT NULL, -- 'admin', 'customer', 'vendor'
  username VARCHAR(100) UNIQUE NOT NULL,
  email VARCHAR(100) UNIQUE NOT NULL,
  -- Admin-specific
  permission_level INT,
  -- Customer-specific
  loyalty_points INT,
  -- Vendor-specific
  company_name VARCHAR(200),
  tax_id VARCHAR(50),
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  CHECK (
    (type = 'admin' AND permission_level IS NOT NULL) OR
    (type = 'customer' AND loyalty_points IS NOT NULL) OR
    (type = 'vendor' AND company_name IS NOT NULL)
  )
);

-- Query specific type
SELECT * FROM users WHERE type = 'admin';

-- Pattern: Class Table Inheritance (CTI)
CREATE TABLE users_base (
  id SERIAL PRIMARY KEY,
  username VARCHAR(100) UNIQUE NOT NULL,
  email VARCHAR(100) UNIQUE NOT NULL,
  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

CREATE TABLE admins (
  user_id INT PRIMARY KEY REFERENCES users_base(id) ON DELETE CASCADE,
  permission_level INT NOT NULL
);

CREATE TABLE customers (
  user_id INT PRIMARY KEY REFERENCES users_base(id) ON DELETE CASCADE,
  loyalty_points INT DEFAULT 0
);

CREATE TABLE vendors (
  user_id INT PRIMARY KEY REFERENCES users_base(id) ON DELETE CASCADE,
  company_name VARCHAR(200) NOT NULL,
  tax_id VARCHAR(50)
);

-- Query with join
SELECT u.*, c.loyalty_points
FROM users_base u
JOIN customers c ON u.id = c.user_id
WHERE u.email = 'customer@example.com';

-- Pattern: Soft deletes
ALTER TABLE products ADD COLUMN deleted_at TIMESTAMP;
CREATE INDEX idx_products_deleted_at ON products(deleted_at) WHERE deleted_at IS NULL;

-- Delete (soft)
UPDATE products SET deleted_at = CURRENT_TIMESTAMP WHERE id = 123;

-- Query active records
SELECT * FROM products WHERE deleted_at IS NULL;

-- Create view for convenience
CREATE VIEW active_products AS
SELECT * FROM products WHERE deleted_at IS NULL;

SELECT * FROM active_products;

-- Pattern: Audit trail
CREATE TABLE products_audit (
  audit_id SERIAL PRIMARY KEY,
  product_id INT NOT NULL,
  operation VARCHAR(10) NOT NULL, -- INSERT, UPDATE, DELETE
  old_data JSONB,
  new_data JSONB,
  changed_by VARCHAR(100),
  changed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

CREATE OR REPLACE FUNCTION audit_products()
RETURNS TRIGGER AS $$
BEGIN
  IF TG_OP = 'INSERT' THEN
    INSERT INTO products_audit (product_id, operation, new_data, changed_by)
    VALUES (NEW.id, 'INSERT', row_to_json(NEW)::jsonb, current_user);
  ELSIF TG_OP = 'UPDATE' THEN
    INSERT INTO products_audit (product_id, operation, old_data, new_data, changed_by)
    VALUES (NEW.id, 'UPDATE', row_to_json(OLD)::jsonb, row_to_json(NEW)::jsonb, current_user);
  ELSIF TG_OP = 'DELETE' THEN
    INSERT INTO products_audit (product_id, operation, old_data, changed_by)
    VALUES (OLD.id, 'DELETE', row_to_json(OLD)::jsonb, current_user);
  END IF;
  RETURN NULL;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER products_audit_trigger
AFTER INSERT OR UPDATE OR DELETE ON products
FOR EACH ROW EXECUTE FUNCTION audit_products();

-- View audit history
SELECT
  audit_id,
  product_id,
  operation,
  old_data->>'name' AS old_name,
  new_data->>'name' AS new_name,
  changed_by,
  changed_at
FROM products_audit
WHERE product_id = 123
ORDER BY changed_at DESC;

-- Pattern: Optimistic locking with version
CREATE TABLE documents (
  id SERIAL PRIMARY KEY,
  title VARCHAR(200),
  content TEXT,
  version INT DEFAULT 1,
  updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Update with version check
UPDATE documents
SET
  content = 'New content',
  version = version + 1,
  updated_at = CURRENT_TIMESTAMP
WHERE id = 123 AND version = 5;
-- Returns 0 rows if version changed (conflict)

-- Pattern: Materialized path for hierarchies
CREATE TABLE categories_path (
  id SERIAL PRIMARY KEY,
  name VARCHAR(100),
  path VARCHAR(500), -- '/1/3/7/'
  depth INT
);

CREATE INDEX idx_categories_path ON categories_path(path);

-- Find all descendants
SELECT * FROM categories_path
WHERE path LIKE '/1/3/%';

-- Find all ancestors
-- For node '/1/3/7/15/', extract ancestors
WITH RECURSIVE ancestors AS (
  SELECT 1 AS level, '/1/' AS ancestor_path
  UNION ALL
  SELECT level + 1, ancestor_path || '3/'
  FROM ancestors WHERE level < 3
)
SELECT c.*
FROM ancestors a
JOIN categories_path c ON c.path = a.ancestor_path;

                  -- ANTI-PATTERN: EAV (Entity-Attribute-Value)
-- Problem: Difficult to query, no type safety, poor performance
CREATE TABLE eav_bad (
  entity_id INT,
  attribute_name VARCHAR(100),
  attribute_value TEXT -- Everything is text!
);

INSERT INTO eav_bad VALUES
(1, 'name', 'Product A'),
(1, 'price', '19.99'),
(1, 'in_stock', 'true');

-- Query is nightmare
SELECT
  MAX(CASE WHEN attribute_name = 'name' THEN attribute_value END) AS name,
  MAX(CASE WHEN attribute_name = 'price' THEN attribute_value END)::DECIMAL AS price,
  MAX(CASE WHEN attribute_name = 'in_stock' THEN attribute_value END)::BOOLEAN AS in_stock
FROM eav_bad
WHERE entity_id = 1
GROUP BY entity_id;

-- SOLUTION: Use JSONB or proper columns
CREATE TABLE products_good (
  id SERIAL PRIMARY KEY,
  name VARCHAR(200),
  price DECIMAL(10,2),
  in_stock BOOLEAN,
  extra_attributes JSONB -- For truly dynamic attributes
);

INSERT INTO products_good VALUES
(1, 'Product A', 19.99, true, '{"color": "red", "size": "L"}');

SELECT
  name,
  price,
  in_stock,
  extra_attributes->>'color' AS color
FROM products_good
WHERE id = 1;

-- ANTI-PATTERN: Polymorphic associations without constraints
CREATE TABLE comments_bad (
  id SERIAL PRIMARY KEY,
  commentable_type VARCHAR(50), -- 'Post', 'Photo', etc.
  commentable_id INT,           -- Foreign key to what?
  content TEXT
);

-- No referential integrity!
INSERT INTO comments_bad VALUES
(1, 'Post', 999, 'Nice post!'); -- Post 999 doesn't exist

-- SOLUTION: Separate foreign keys or exclusive arc
CREATE TABLE comments_good (
  id SERIAL PRIMARY KEY,
  post_id INT REFERENCES posts(id),
  photo_id INT REFERENCES photos(id),
  content TEXT,
  CHECK (
    (post_id IS NOT NULL AND photo_id IS NULL) OR
    (post_id IS NULL AND photo_id IS NOT NULL)
  )
);

-- Or: Separate tables
CREATE TABLE post_comments (
  id SERIAL PRIMARY KEY,
  post_id INT NOT NULL REFERENCES posts(id) ON DELETE CASCADE,
  content TEXT
);

CREATE TABLE photo_comments (
  id SERIAL PRIMARY KEY,
  photo_id INT NOT NULL REFERENCES photos(id) ON DELETE CASCADE,
  content TEXT
);

-- ANTI-PATTERN: God tables (too many columns)
CREATE TABLE users_bad (
  id SERIAL PRIMARY KEY,
  -- 100+ columns here...
  username VARCHAR(100),
  email VARCHAR(100),
  -- Billing info
  billing_address TEXT,
  billing_city VARCHAR(100),
  -- Shipping info
  shipping_address TEXT,
  shipping_city VARCHAR(100),
  -- Preferences
  theme VARCHAR(50),
  language VARCHAR(10),
  -- ...50 more columns
  created_at TIMESTAMP
);

-- SOLUTION: Normalize into related tables
CREATE TABLE users_good (
  id SERIAL PRIMARY KEY,
  username VARCHAR(100),
  email VARCHAR(100),
  created_at TIMESTAMP
);

CREATE TABLE user_addresses (
  id SERIAL PRIMARY KEY,
  user_id INT REFERENCES users_good(id),
  address_type VARCHAR(20), -- 'billing', 'shipping'
  street TEXT,
  city VARCHAR(100),
  country VARCHAR(100)
);

CREATE TABLE user_preferences (
  user_id INT PRIMARY KEY REFERENCES users_good(id),
  theme VARCHAR(50),
  language VARCHAR(10),
  timezone VARCHAR(50)
);

-- ANTI-PATTERN: Storing arrays/lists as comma-separated strings
CREATE TABLE posts_bad (
  id SERIAL PRIMARY KEY,
  tags VARCHAR(500) -- 'javascript,react,nodejs'
);

-- Hard to query
SELECT * FROM posts_bad WHERE tags LIKE '%react%';
-- Matches 'react' and 'preact'!

-- SOLUTION: Many-to-many table or array type
CREATE TABLE posts_good (
  id SERIAL PRIMARY KEY,
  tags TEXT[] -- PostgreSQL array
);

INSERT INTO posts_good VALUES
(1, ARRAY['javascript', 'react', 'nodejs']);

-- Query with array operators
SELECT * FROM posts_good WHERE 'react' = ANY(tags);

-- Or proper many-to-many
CREATE TABLE tags (
  id SERIAL PRIMARY KEY,
  name VARCHAR(50) UNIQUE
);

CREATE TABLE post_tags (
  post_id INT REFERENCES posts(id),
  tag_id INT REFERENCES tags(id),
  PRIMARY KEY (post_id, tag_id)
);

-- ANTI-PATTERN: Improper NULL handling
CREATE TABLE orders_bad (
  id SERIAL PRIMARY KEY,
  total DECIMAL(10,2),
  discount DECIMAL(10,2) -- NULL means no discount, but...
);

-- Bug: NULL in calculation
SELECT total - discount AS final_total FROM orders_bad;
-- Returns NULL if discount is NULL!

-- SOLUTION: Use COALESCE or DEFAULT
SELECT total - COALESCE(discount, 0) AS final_total FROM orders_bad;

-- Or better: Use NOT NULL with DEFAULT
CREATE TABLE orders_good (
  id SERIAL PRIMARY KEY,
  total DECIMAL(10,2) NOT NULL,
  discount DECIMAL(10,2) NOT NULL DEFAULT 0
);

-- ANTI-PATTERN: Duplicate data instead of normalization
CREATE TABLE order_items_bad (
  id SERIAL PRIMARY KEY,
  order_id INT,
  product_id INT,
  product_name VARCHAR(200), -- Duplicated!
  product_price DECIMAL(10,2), -- Duplicated!
  quantity INT
);

-- What if product name/price changes?

-- SOLUTION: Normalize or snapshot intentionally
CREATE TABLE order_items_good (
  id SERIAL PRIMARY KEY,
  order_id INT REFERENCES orders(id),
  product_id INT REFERENCES products(id),
  -- Snapshot at order time (intentional denormalization)
  price_at_purchase DECIMAL(10,2) NOT NULL,
  name_at_purchase VARCHAR(200) NOT NULL,
  quantity INT NOT NULL
);

-- ANTI-PATTERN: Magic values instead of foreign keys
CREATE TABLE users_bad2 (
  id SERIAL PRIMARY KEY,
  status INT -- 1=active, 2=suspended, 3=deleted
);

-- SOLUTION: Lookup table or enum
CREATE TYPE user_status AS ENUM ('active', 'suspended', 'deleted');

CREATE TABLE users_good2 (
  id SERIAL PRIMARY KEY,
  status user_status DEFAULT 'active'
);

-- Or lookup table for complex cases
CREATE TABLE statuses (
  id SERIAL PRIMARY KEY,
  name VARCHAR(50) UNIQUE,
  description TEXT,
  display_color VARCHAR(7)
);

CREATE TABLE users_good3 (
  id SERIAL PRIMARY KEY,
  status_id INT REFERENCES statuses(id)
);

Database design patterns solve recurring problems. I use the Repository pattern to abstract data access. Active Record maps objects to tables. Unit of Work tracks changes for batch commits. Identity Map caches loaded entities. The Specification pattern encapsulates query logic. Avoid anti-patterns: EAV (Entity-Attribute-Value) creates query complexity. Polymorphic associations without constraints risk orphans. Massive denormalization causes update anomalies. God tables accumulate too many columns. Improper indexing degrades performance. Understanding patterns prevents technical debt, improves maintainability. Essential for scalable, clean database architecture.

Maria Garcia

More from Maria Garcia

Database design patterns and anti-patterns

0 Comments

More from Maria Garcia