-- Unnormalized (0NF): Repeating groups
CREATE TABLE orders_bad (
order_id INT,
customer_name VARCHAR(100),
customer_email VARCHAR(100),
product1 VARCHAR(100),
product2 VARCHAR(100),
product3 VARCHAR(100)
);
-- Problem: Fixed number of products, redundant customer data
-- First Normal Form (1NF): Atomic values
CREATE TABLE orders_1nf (
order_id INT PRIMARY KEY,
customer_name VARCHAR(100),
customer_email VARCHAR(100),
product VARCHAR(100)
);
-- Better: One product per row, but still redundant customer data
-- Second Normal Form (2NF): No partial dependencies
CREATE TABLE orders (
order_id SERIAL PRIMARY KEY,
customer_id INT REFERENCES customers(id),
order_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE customers (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
email VARCHAR(100) UNIQUE
);
CREATE TABLE order_items (
id SERIAL PRIMARY KEY,
order_id INT REFERENCES orders(order_id),
product_id INT REFERENCES products(id),
quantity INT,
price DECIMAL(10,2)
);
-- Better: Customer data in separate table
-- Third Normal Form (3NF): No transitive dependencies
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(200),
category_id INT REFERENCES categories(id),
-- Don't store category_name here (transitive dependency)
price DECIMAL(10,2)
);
CREATE TABLE categories (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
description TEXT
);
-- Proper 3NF: Category details in separate table
-- Denormalization for performance
CREATE TABLE products_denormalized (
id SERIAL PRIMARY KEY,
name VARCHAR(200),
category_id INT REFERENCES categories(id),
category_name VARCHAR(100), -- Denormalized!
price DECIMAL(10,2),
total_orders INT DEFAULT 0, -- Cached aggregate
last_ordered_at TIMESTAMP
);
-- Trade: Faster reads, more storage, must maintain consistency
-- Maintain denormalized data with triggers
CREATE OR REPLACE FUNCTION update_product_stats()
RETURNS TRIGGER AS $$
BEGIN
UPDATE products_denormalized
SET total_orders = total_orders + 1,
last_ordered_at = CURRENT_TIMESTAMP
WHERE id = NEW.product_id;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER trg_update_product_stats
AFTER INSERT ON order_items
FOR EACH ROW
EXECUTE FUNCTION update_product_stats();
-- Star Schema (Data Warehouse)
CREATE TABLE fact_sales (
sale_id BIGSERIAL PRIMARY KEY,
date_key INT REFERENCES dim_date(date_key),
product_key INT REFERENCES dim_product(product_key),
customer_key INT REFERENCES dim_customer(customer_key),
store_key INT REFERENCES dim_store(store_key),
quantity INT,
amount DECIMAL(10,2),
cost DECIMAL(10,2),
profit DECIMAL(10,2)
);
CREATE TABLE dim_date (
date_key INT PRIMARY KEY,
date DATE,
year INT,
quarter INT,
month INT,
day_of_week INT,
is_weekend BOOLEAN
);
CREATE TABLE dim_product (
product_key INT PRIMARY KEY,
product_id INT,
product_name VARCHAR(200),
category VARCHAR(100),
brand VARCHAR(100)
);
-- Polymorphic Associations
CREATE TABLE comments (
id SERIAL PRIMARY KEY,
commentable_type VARCHAR(50), -- 'Post', 'Photo', etc.
commentable_id INT,
content TEXT,
user_id INT REFERENCES users(id)
);
CREATE INDEX idx_comments_polymorphic
ON comments(commentable_type, commentable_id);
-- Better: Separate junction tables
CREATE TABLE post_comments (
id SERIAL PRIMARY KEY,
post_id INT REFERENCES posts(id),
comment_id INT REFERENCES comments(id)
);
CREATE TABLE photo_comments (
id SERIAL PRIMARY KEY,
photo_id INT REFERENCES photos(id),
comment_id INT REFERENCES comments(id)
);
-- Soft Deletes
CREATE TABLE posts (
id SERIAL PRIMARY KEY,
title VARCHAR(200),
content TEXT,
deleted_at TIMESTAMP NULL
);
CREATE INDEX idx_posts_active ON posts(id) WHERE deleted_at IS NULL;
-- Soft delete
UPDATE posts SET deleted_at = CURRENT_TIMESTAMP WHERE id = 1;
-- Query only active
SELECT * FROM posts WHERE deleted_at IS NULL;
-- Audit Trail / History Table
CREATE TABLE users_history (
history_id SERIAL PRIMARY KEY,
user_id INT,
name VARCHAR(100),
email VARCHAR(100),
changed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
changed_by INT,
operation VARCHAR(10) -- 'INSERT', 'UPDATE', 'DELETE'
);
CREATE OR REPLACE FUNCTION audit_users()
RETURNS TRIGGER AS $$
BEGIN
IF TG_OP = 'DELETE' THEN
INSERT INTO users_history (user_id, name, email, operation)
VALUES (OLD.id, OLD.name, OLD.email, 'DELETE');
RETURN OLD;
ELSIF TG_OP = 'UPDATE' THEN
INSERT INTO users_history (user_id, name, email, operation)
VALUES (NEW.id, NEW.name, NEW.email, 'UPDATE');
RETURN NEW;
ELSIF TG_OP = 'INSERT' THEN
INSERT INTO users_history (user_id, name, email, operation)
VALUES (NEW.id, NEW.name, NEW.email, 'INSERT');
RETURN NEW;
END IF;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER trg_audit_users
AFTER INSERT OR UPDATE OR DELETE ON users
FOR EACH ROW
EXECUTE FUNCTION audit_users();