from collections import defaultdict

from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans

texts = [
    "Reset a customer password from the account portal.",
    "Resend the password reset email to the customer.",
    "Review two-factor authentication recovery codes.",
    "Build the web release container image.",
    "Deploy the web release to production.",
    "Roll back the web release from production.",
    "Back up the customer database overnight.",
    "Restore the database backup into staging.",
    "Verify the database backup checksum.",
]

model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device="cpu")
embeddings = model.encode(texts, normalize_embeddings=True, show_progress_bar=False)

kmeans = KMeans(n_clusters=3, random_state=7, n_init=10)
labels = kmeans.fit_predict(embeddings)

groups = defaultdict(list)
for text, label in zip(texts, labels):
    groups[int(label)].append(text)

for cluster_number, label in enumerate(sorted(groups, key=lambda key: groups[key][0]), start=1):
    print(f"Cluster {cluster_number}:")
    for text in groups[label]:
        print(f"- {text}")
    print()