from datasets import Dataset from sentence_transformers import SentenceTransformer from sentence_transformers.util import mine_hard_negatives dataset = Dataset.from_dict( { "query": [ "reset a user password", "enable two-factor authentication", "export audit logs", "restore a deleted project", "rotate an API token", "invite a new team member", ], "answer": [ "Open the user profile, choose Reset password, and send a recovery email.", "Open account security, scan the authenticator QR code, and save backup codes.", "Open compliance reports, choose Audit logs, and export the CSV file.", "Open deleted projects, select the project, and click Restore.", "Open API tokens, revoke the old token, and create a replacement token.", "Open team settings, enter the email address, and send the invitation.", ], } ) model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") mined = mine_hard_negatives( dataset=dataset, model=model, anchor_column_name="query", positive_column_name="answer", range_min=1, range_max=5, num_negatives=1, sampling_strategy="top", output_format="triplet", verbose=False, ) print(mined) for row in mined.select(range(min(3, len(mined)))): print(f"query: {row['query']}") print(f"positive: {row['answer']}") print(f"negative: {row['negative']}") print("---")