from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
X_scaled = StandardScaler().fit_transform(X)
models = {
'kmeans': KMeans(n_clusters=5, random_state=42, n_init='auto'),
'dbscan': DBSCAN(eps=0.7, min_samples=10),
'agglomerative': AgglomerativeClustering(n_clusters=5),
}
for name, model in models.items():
labels = model.fit_predict(X_scaled)
if len(set(labels)) > 1 and -1 not in set(labels):
score = silhouette_score(X_scaled, labels)
print(name, round(score, 4))
else:
print(name, 'silhouette not meaningful for current labels')
Unsupervised work gets much better when you compare clustering assumptions instead of treating one algorithm as truth. KMeans prefers spherical clusters, DBSCAN handles noise, and hierarchical clustering is useful when you want a multi-resolution view of segments. I evaluate with both metrics and domain sanity checks.