Update app.py
Browse files
app.py
CHANGED
@@ -41,7 +41,7 @@ def calculate_score(image, text, model_name):
|
|
41 |
model_type = MODELS[model_name][2]
|
42 |
|
43 |
# Preprocess the image and text
|
44 |
-
inputs = processor(text=labels, images=[image], return_tensors="pt", padding=
|
45 |
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
46 |
|
47 |
# Calculate embeddings
|
@@ -58,14 +58,18 @@ def calculate_score(image, text, model_name):
|
|
58 |
image_embeds = F.normalize(image_embeds, p=2, dim=1)
|
59 |
text_embeds = F.normalize(text_embeds, p=2, dim=1)
|
60 |
|
61 |
-
# Calculate
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
66 |
|
67 |
# Convert to numpy array
|
68 |
-
similarities =
|
69 |
|
70 |
results_dict = {label: float(score) for label, score in zip(labels, similarities)}
|
71 |
return results_dict
|
|
|
41 |
model_type = MODELS[model_name][2]
|
42 |
|
43 |
# Preprocess the image and text
|
44 |
+
inputs = processor(text=labels, images=[image], return_tensors="pt", padding="max_length")
|
45 |
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
46 |
|
47 |
# Calculate embeddings
|
|
|
58 |
image_embeds = F.normalize(image_embeds, p=2, dim=1)
|
59 |
text_embeds = F.normalize(text_embeds, p=2, dim=1)
|
60 |
|
61 |
+
# Calculate similarity
|
62 |
+
if model_type == "clip":
|
63 |
+
# For CLIP, use cosine similarity
|
64 |
+
similarities = torch.mm(text_embeds, image_embeds.t()).squeeze(1)
|
65 |
+
similarities = torch.clamp(similarities, min=0, max=1)
|
66 |
+
elif model_type == "siglip":
|
67 |
+
# For SigLIP, use sigmoid on dot product
|
68 |
+
logits = torch.mm(text_embeds, image_embeds.t()).squeeze(1)
|
69 |
+
similarities = torch.sigmoid(logits)
|
70 |
|
71 |
# Convert to numpy array
|
72 |
+
similarities = similarities.cpu().numpy()
|
73 |
|
74 |
results_dict = {label: float(score) for label, score in zip(labels, similarities)}
|
75 |
return results_dict
|