from sentence_transformers import SentenceTransformer, util model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") chunks = [ { "id": "kb-001", "title": "Reset a forgotten password", "text": ( "Users can reset a forgotten password from Account settings. " "Send the reset email, open the link, and choose a new password." ), }, { "id": "kb-002", "title": "Download invoice receipts", "text": ( "Billing admins can download invoice receipts from the billing " "history screen after payment is processed." ), }, { "id": "kb-003", "title": "Rotate API tokens", "text": ( "Create a replacement API token, update the integration, and revoke " "the old token after traffic moves to the new credential." ), }, { "id": "kb-004", "title": "Change notification email", "text": ( "Profile owners can change the notification email address and " "confirm the new address before alerts move." ), }, ] query = "How does a user reset a forgotten password?" documents = [f"{chunk['title']}: {chunk['text']}" for chunk in chunks] document_embeddings = model.encode_document( documents, normalize_embeddings=True, convert_to_tensor=True, show_progress_bar=False, ) query_embedding = model.encode_query( query, normalize_embeddings=True, convert_to_tensor=True, show_progress_bar=False, ) hits = util.semantic_search(query_embedding, document_embeddings, top_k=2)[0] context_chunks = [chunks[hit["corpus_id"]] for hit in hits] context = "\n\n".join( f"[{chunk['id']}] {chunk['title']}\n{chunk['text']}" for chunk in context_chunks ) prompt = ( f"Question: {query}\n\n" "Use only this context:\n" f"{context}\n\n" "Answer:" ) print(f"indexed chunks: {len(chunks)}") print(f"query: {query}") print("retrieved chunks:") for rank, hit in enumerate(hits, start=1): chunk = chunks[hit["corpus_id"]] print(f"{rank}. {chunk['id']} score={hit['score']:.4f} title={chunk['title']}") print("\nrag prompt context:") print(context) if context_chunks[0]["id"] != "kb-001": raise SystemExit(f"unexpected top chunk: {context_chunks[0]['id']}") if "Reset a forgotten password" not in prompt: raise SystemExit("prompt context is missing the retrieved password chunk") print("\nverification: PASS retriever context is ready for the generator prompt")