import json from pathlib import Path import faiss import numpy as np from sentence_transformers import SentenceTransformer corpus = [ { "id": "doc-001", "text": "Sentence Transformers converts text into dense embeddings.", }, { "id": "doc-002", "text": "FAISS stores vectors and searches nearest neighbors locally.", }, { "id": "doc-003", "text": "Cross-encoders rerank a small set of retrieved passages.", }, { "id": "doc-004", "text": "Qdrant stores vectors behind a database service API.", }, ] model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") texts = [item["text"] for item in corpus] document_embeddings = model.encode_document( texts, normalize_embeddings=True, convert_to_numpy=True, ) document_embeddings = np.asarray(document_embeddings, dtype="float32") dimension = document_embeddings.shape[1] index = faiss.IndexFlatIP(dimension) index.add(document_embeddings) faiss.write_index(index, "support-faq.faiss") Path("support-faq.json").write_text(json.dumps(corpus, indent=2), encoding="utf-8") loaded_index = faiss.read_index("support-faq.faiss") query_embedding = model.encode_query( ["Which library searches vectors nearest neighbors locally?"], normalize_embeddings=True, convert_to_numpy=True, ) query_embedding = np.asarray(query_embedding, dtype="float32") scores, row_ids = loaded_index.search(query_embedding, k=2) metadata = json.loads(Path("support-faq.json").read_text(encoding="utf-8")) print(f"embedding dimension: {dimension}") print(f"indexed vectors: {loaded_index.ntotal}") print("top matches:") for rank, (score, row_id) in enumerate(zip(scores[0], row_ids[0]), start=1): record = metadata[int(row_id)] print(f"{rank}. {record['id']} score={score:.4f} text={record['text']}")