# Basic matching
email = "user@example.com"
email =~ /@/ # => 4 (position of match)
email.match?(/@/) # => true
# Capture groups
match = email.match(/(.+)@(.+)\.(.+)/)
match[1] # => "user"
match[2] # => "example"
match[3] # => "com"
# Named captures
match = email.match(/(?<user>.+)@(?<domain>.+)\.(?<tld>.+)/)
match[:user] # => "user"
match[:domain] # => "example"
match[:tld] # => "com"
# Scan - find all matches
text = "My phone is 555-1234 and backup is 555-5678"
phones = text.scan(/\d{3}-\d{4}/)
# => ["555-1234", "555-5678"]
# Scan with groups
text = "Alice: 25, Bob: 30, Charlie: 35"
text.scan(/(\w+): (\d+)/)
# => [["Alice", "25"], ["Bob", "30"], ["Charlie", "35"]]
# gsub - global substitution
text = "Hello World"
text.gsub(/[aeiou]/, '*') # => "H*ll* W*rld"
text.gsub(/\w+/) { |word| word.capitalize } # => "Hello World"
# gsub with named captures
date = "2026-02-02"
date.gsub(/(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/) do |match|
"#{$~[:month]}/#{$~[:day]}/#{$~[:year]}"
end
# => "02/02/2026"
# sub - single substitution
text.sub(/World/, 'Ruby') # => "Hello Ruby"
# Split with regex
"one,two;three:four".split(/[,;:]/)
# => ["one", "two", "three", "four"]
# Email validation
EMAIL_REGEX = /\A[\w+\-.]+@[a-z\d\-]+(\.[a-z\d\-]+)*\.[a-z]+\z/i
def valid_email?(email)
email.match?(EMAIL_REGEX)
end
# URL validation
URL_REGEX = %r{\Ahttps?://[^\s/$.?\#].[^\s]*\z}i
def valid_url?(url)
url.match?(URL_REGEX)
end
# Phone number extraction
PHONE_REGEX = /\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/
def extract_phones(text)
text.scan(PHONE_REGEX)
end
# Extract hashtags
def extract_hashtags(text)
text.scan(/#\w+/)
end
# Extract mentions
def extract_mentions(text)
text.scan(/@\w+/)
end
# Remove HTML tags
def strip_html(text)
text.gsub(/<[^>]+>/, '')
end
# Camel to snake case
def camel_to_snake(str)
str.gsub(/([A-Z])/, '_\1').downcase.sub(/^_/, '')
end
# Snake to camel case
def snake_to_camel(str)
str.split('_').map(&:capitalize).join
end
# Validate credit card (Luhn algorithm pattern)
CREDIT_CARD_REGEX = /\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/
# Extract version numbers
def extract_version(text)
text.match(/v?(\d+)\.(\d+)\.(\d+)/)&.captures
end
# Parse CSV-like data
def parse_csv_line(line)
line.scan(/"([^"]*)"|([^,]+)/).flatten.compact
end
# Redact sensitive data
def redact_ssn(text)
text.gsub(/\b\d{3}-\d{2}-\d{4}\b/, 'XXX-XX-XXXX')
end
def redact_credit_card(text)
text.gsub(CREDIT_CARD_REGEX) do |match|
"XXXX-XXXX-XXXX-" + match.last(4)
end
end
# Lookahead - match if followed by
# Find 'foo' only if followed by 'bar'
text = "foobar foobaz"
text.scan(/foo(?=bar)/) # => ["foo"] (only first match)
# Negative lookahead
# Find 'foo' only if NOT followed by 'bar'
text.scan(/foo(?!bar)/) # => ["foo"] (from "foobaz")
# Lookbehind - match if preceded by
# Find digits preceded by '$'
text = "Price: $50, Quantity: 50"
text.scan(/(?<=\$)\d+/) # => ["50"] (only the price)
# Backreferences - reference earlier captures
# Find repeated words
text = "the the quick brown brown fox"
text.scan(/\b(\w+)\s+\1\b/) # => ["the", "brown"]
# Named backreferences
/(?<word>\w+)\s+\k<word>/
# Conditional regex
# Match different patterns based on condition
/^(yes|no)\s+(?:(yes)\s+ok|(no)\s+cancel)$/
# Case-insensitive flag
/hello/i.match("HELLO") # => matches
# Multiline mode
text = "line1\nline2\nline3"
text.scan(/^line/m) # ^ matches start of each line
# Extended mode - allows whitespace and comments
email_regex = /
\A
[\w+\-.]+ # Local part
@ # At sign
[a-z\d\-]+ # Domain name
\. # Dot
[a-z]+ # TLD
\z
/ix # i = case insensitive, x = extended
# Global match data
if "hello@world.com" =~ /(\w+)@(\w+)\.(\w+)/
$1 # => "hello"
$2 # => "world"
$3 # => "com"
$& # => "hello@world.com" (entire match)
$` # => "" (before match)
$' # => "" (after match)
end
# MatchData object
match = "hello@world.com".match(/(\w+)@(\w+)\.(\w+)/)
match.captures # => ["hello", "world", "com"]
match.pre_match # Text before match
match.post_match # Text after match
match.string # Original string
Ruby's regex engine provides powerful text processing. I use =~ for matching, match for captures. Character classes \d, \w, \s match digits, words, whitespace. Quantifiers *, +, ?, {n,m} control repetition. Anchors ^ and $ match start/end. Groups () capture subpatterns; (?:) for non-capturing groups. Named captures (?<name>) improve readability. Lookaheads (?=) and lookbehinds (?<=) assert without consuming. scan finds all matches; gsub replaces patterns. Regex literals // and %r{} allow different delimiters. Understanding regex enables text validation, parsing, and transformation. I balance regex power with readability—complex patterns need comments or extraction into methods.