import numpy as np from qdrant_client import QdrantClient, models from sentence_transformers import SentenceTransformer collection_name = "support_docs" query = "password reset instructions" corpus = [ { "doc_id": "doc-001", "title": "Reset a forgotten password", "text": "Reset a forgotten password from account settings and confirm the email link.", }, { "doc_id": "doc-002", "title": "Create an invoice receipt", "text": "Create a billing invoice and download a PDF receipt.", }, { "doc_id": "doc-003", "title": "Rotate API tokens", "text": "Rotate API tokens before sharing a new integration with a teammate.", }, { "doc_id": "doc-004", "title": "Store semantic vectors", "text": "Qdrant stores Sentence Transformers embeddings for semantic search.", }, ] model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") documents = [item["text"] for item in corpus] document_embeddings = model.encode_document( documents, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=False, ) document_embeddings = np.asarray(document_embeddings, dtype="float32") dimension = document_embeddings.shape[1] client = QdrantClient(":memory:") client.create_collection( collection_name=collection_name, vectors_config=models.VectorParams( size=dimension, distance=models.Distance.COSINE, ), ) client.upload_points( collection_name=collection_name, points=[ models.PointStruct( id=index, vector=vector.tolist(), payload=item, ) for index, (item, vector) in enumerate(zip(corpus, document_embeddings), start=1) ], ) query_embedding = model.encode_query( query, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=False, ) hits = client.query_points( collection_name=collection_name, query=query_embedding.tolist(), limit=2, with_payload=True, ).points point_count = client.count(collection_name=collection_name, exact=True).count print(f"collection: {collection_name}") print(f"vector size: {dimension}") print(f"points: {point_count}") print(f"query: {query}") print("top matches:") for rank, hit in enumerate(hits, start=1): payload = hit.payload print( f"{rank}. {payload['doc_id']} score={hit.score:.4f} " f"title={payload['title']}" ) if hits[0].payload["doc_id"] != "doc-001": raise SystemExit(f"unexpected top match: {hits[0].payload['doc_id']}") print("verification: PASS query returned the password reset document")