from sentence_transformers import SparseEncoder from sentence_transformers.util import semantic_search model = SparseEncoder("naver/splade-cocondenser-ensembledistil") documents = [ { "id": "doc-001", "text": "Reset expired password links from the account security page.", }, { "id": "doc-002", "text": "Rotate SSH deployment keys before a release window.", }, { "id": "doc-003", "text": "Renew TLS certificates before restarting the web server.", }, { "id": "doc-004", "text": "Export invoice PDFs from the billing dashboard.", }, { "id": "doc-005", "text": "Troubleshoot SAML login errors from the identity provider logs.", }, ] query = "account password reset link expired" corpus = [item["text"] for item in documents] corpus_embeddings = model.encode_document( corpus, convert_to_tensor=True, show_progress_bar=False, ) query_embedding = model.encode_query( query, convert_to_tensor=True, show_progress_bar=False, ) hits = semantic_search( query_embedding, corpus_embeddings, top_k=3, score_function=model.similarity, )[0] print(f"Query: {query}") print(f"Sparse corpus embeddings: {tuple(corpus_embeddings.shape)}") for rank, hit in enumerate(hits, start=1): item = documents[hit["corpus_id"]] print( f"{rank}. {item['id']} " f"score={hit['score']:.4f} " f"text={item['text']}" ) top_item = documents[hits[0]["corpus_id"]] if top_item["id"] != "doc-001": raise SystemExit(f"unexpected top sparse result: {top_item['id']}") print("Sparse semantic search check: pass")