from collections import defaultdict from sentence_transformers import SentenceTransformer from sklearn.cluster import KMeans texts = [ "Reset a customer password from the account portal.", "Resend the password reset email to the customer.", "Review two-factor authentication recovery codes.", "Build the web release container image.", "Deploy the web release to production.", "Roll back the web release from production.", "Back up the customer database overnight.", "Restore the database backup into staging.", "Verify the database backup checksum.", ] model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device="cpu") embeddings = model.encode(texts, normalize_embeddings=True, show_progress_bar=False) kmeans = KMeans(n_clusters=3, random_state=7, n_init=10) labels = kmeans.fit_predict(embeddings) groups = defaultdict(list) for text, label in zip(texts, labels): groups[int(label)].append(text) for cluster_number, label in enumerate(sorted(groups, key=lambda key: groups[key][0]), start=1): print(f"Cluster {cluster_number}:") for text in groups[label]: print(f"- {text}") print()