from sentence_transformers import SentenceTransformer from sentence_transformers.util.quantization import quantize_embeddings import numpy as np corpus = [ "Int8 vectors use less storage.", "Cross encoders rerank candidate pairs.", "Image search compares pictures and captions.", "Fine-tuning updates model weights.", ] query = "Int8 vectors use less storage." calibration_sentences = corpus + [ f"Calibration sentence {i} about compact vector search." for i in range(120) ] model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") corpus_float = model.encode(corpus, normalize_embeddings=True) query_float = model.encode([query], normalize_embeddings=True) calibration = model.encode(calibration_sentences, normalize_embeddings=True) ranges = np.vstack((calibration.min(axis=0), calibration.max(axis=0))) corpus_int8 = quantize_embeddings(corpus_float, precision="int8", ranges=ranges) query_int8 = quantize_embeddings(query_float, precision="int8", ranges=ranges) scores = corpus_int8.astype(np.int32) @ query_int8[0].astype(np.int32) best = int(np.argmax(scores)) print(f"float32: {corpus_float.shape}, {corpus_float.dtype}, {corpus_float.nbytes} bytes") print(f"int8: {corpus_int8.shape}, {corpus_int8.dtype}, {corpus_int8.nbytes} bytes") print(f"top match: {corpus[best]}")