from sentence_transformers import SentenceTransformer from sentence_transformers.sentence_transformer.evaluation import ( EmbeddingSimilarityEvaluator, ) sentences1 = [ "A support agent reset the user's password.", "The database backup completed overnight.", "The release was rolled back after the deploy.", "The customer changed the billing address.", "The web server returned an SSL certificate warning.", "A new search index was built for documents.", ] sentences2 = [ "The account password was reset by support.", "A nightly backup of the database finished successfully.", "The deployment was reverted after the release.", "A user updated payment and billing details.", "The office printer ran out of toner.", "The kitchen refrigerator needs cleaning.", ] scores = [0.95, 0.90, 0.86, 0.55, 0.05, 0.03] model = SentenceTransformer( "sentence-transformers/all-MiniLM-L6-v2", device="cpu", ) evaluator = EmbeddingSimilarityEvaluator( sentences1=sentences1, sentences2=sentences2, scores=scores, name="support-sts", main_similarity="cosine", show_progress_bar=False, write_csv=False, ) results = evaluator(model) primary_metric = evaluator.primary_metric print("Pairs evaluated:", len(sentences1)) print("Primary metric:", primary_metric) print(f"Primary score: {results[primary_metric]:.3f}") print(f"Pearson cosine: {results['support-sts_pearson_cosine']:.3f}") print(f"Spearman cosine: {results['support-sts_spearman_cosine']:.3f}") if results[primary_metric] < 0.75: raise SystemExit("STS evaluator score is below the acceptance threshold") print("check: STS evaluation correlation is above 0.75")