from pathlib import Path import optimum.intel as oi import sentence_transformers as st def main() -> None: model_id = ( "sentence-transformers/" "all-MiniLM-L6-v2" ) base_file = "openvino/openvino_model.xml" qint8_name = ( "openvino_model_" "qint8_quantized.xml" ) qint8_file = ( "openvino/" + qint8_name ) output_dir = Path("minilm-ov-int8") quantized_file = ( output_dir / "openvino" / qint8_name ) if output_dir.exists(): raise SystemExit("Choose a new output directory.") model = st.SentenceTransformer( model_id, backend="openvino", model_kwargs={ "file_name": base_file }, ) model.save_pretrained(str(output_dir)) config = oi.OVQuantizationConfig( num_samples=16 ) st.export_static_quantized_openvino_model( model=model, quantization_config=config, model_name_or_path=str(output_dir), ) reloaded = st.SentenceTransformer( str(output_dir), backend="openvino", model_kwargs={ "file_name": qint8_file }, ) texts = [ "OpenVINO quantization reduces " "model precision." ] embeddings = reloaded.encode(texts) print(f"model directory: {output_dir}") print("qint8 file:") print(quantized_file.name) print(f"saved: {quantized_file.exists()}") print(f"embedding shape: {embeddings.shape}") print(f"embedding dtype: {embeddings.dtype}") if __name__ == "__main__": main()