from sentence_transformers import SparseEncoder
from sentence_transformers.util import semantic_search


model = SparseEncoder("naver/splade-cocondenser-ensembledistil")

documents = [
    {
        "id": "doc-001",
        "text": "Reset expired password links from the account security page.",
    },
    {
        "id": "doc-002",
        "text": "Rotate SSH deployment keys before a release window.",
    },
    {
        "id": "doc-003",
        "text": "Renew TLS certificates before restarting the web server.",
    },
    {
        "id": "doc-004",
        "text": "Export invoice PDFs from the billing dashboard.",
    },
    {
        "id": "doc-005",
        "text": "Troubleshoot SAML login errors from the identity provider logs.",
    },
]

query = "account password reset link expired"
corpus = [item["text"] for item in documents]

corpus_embeddings = model.encode_document(
    corpus,
    convert_to_tensor=True,
    show_progress_bar=False,
)
query_embedding = model.encode_query(
    query,
    convert_to_tensor=True,
    show_progress_bar=False,
)

hits = semantic_search(
    query_embedding,
    corpus_embeddings,
    top_k=3,
    score_function=model.similarity,
)[0]

print(f"Query: {query}")
print(f"Sparse corpus embeddings: {tuple(corpus_embeddings.shape)}")

for rank, hit in enumerate(hits, start=1):
    item = documents[hit["corpus_id"]]
    print(
        f"{rank}. {item['id']} "
        f"score={hit['score']:.4f} "
        f"text={item['text']}"
    )

top_item = documents[hits[0]["corpus_id"]]
if top_item["id"] != "doc-001":
    raise SystemExit(f"unexpected top sparse result: {top_item['id']}")

print("Sparse semantic search check: pass")