from sentence_transformers import SentenceTransformer model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") sentence = ( "Sentence Transformers truncates input tokens when a document is longer " "than the configured sequence length. " ) long_text = sentence * 20 print(f"original max_seq_length={model.max_seq_length}") model.max_seq_length = 128 features = model.preprocess([long_text]) print(f"updated max_seq_length={model.max_seq_length}") print(f"tokenized sequence length={features['input_ids'].shape[1]}") embedding = model.encode([long_text], show_progress_bar=False) print(f"embedding shape={embedding.shape}")