from pathlib import Path

import numpy as np
from PIL import Image, ImageDraw
from sentence_transformers import SentenceTransformer


def make_sample(path, color, shape):
    image = Image.new("RGB", (224, 224), "white")
    draw = ImageDraw.Draw(image)

    if shape == "red square":
        draw.rectangle((46, 46, 178, 178), fill=color)
    elif shape == "blue circle":
        draw.ellipse((42, 42, 182, 182), fill=color)
    elif shape == "green triangle":
        draw.polygon([(112, 34), (38, 188), (186, 188)], fill=color)

    image.save(path)


image_dir = Path("demo-images")
image_dir.mkdir(exist_ok=True)

samples = [
    ("red-square.png", "red square", (220, 30, 30)),
    ("blue-circle.png", "blue circle", (30, 80, 220)),
    ("green-triangle.png", "green triangle", (40, 155, 75)),
]

for filename, label, color in samples:
    make_sample(image_dir / filename, color, label)

model = SentenceTransformer("sentence-transformers/clip-ViT-B-32")

images = [Image.open(image_dir / filename) for filename, _, _ in samples]
image_embeddings = model.encode(
    images,
    normalize_embeddings=True,
    convert_to_numpy=True,
)

queries = [
    ("a red square", "red square"),
    ("a blue circle", "blue circle"),
    ("a green triangle", "green triangle"),
]
query_embeddings = model.encode(
    [query for query, _ in queries],
    normalize_embeddings=True,
    convert_to_numpy=True,
)

scores = np.matmul(query_embeddings, image_embeddings.T)
labels = [label for _, label, _ in samples]

print("model: sentence-transformers/clip-ViT-B-32")
print(f"indexed images: {len(samples)}")
print(f"embedding dimension: {image_embeddings.shape[1]}")
print("top matches:")

all_expected = True
for row, (query, expected) in enumerate(queries):
    best_index = int(np.argmax(scores[row]))
    matched = labels[best_index]
    score = scores[row][best_index]
    print(f"- query={query!r} match={matched} score={score:.4f}")
    all_expected = all_expected and matched == expected

print(f"all expected matches: {all_expected}")