Spaces:
Sleeping
Sleeping
remove check for precomputed_results
Browse files
app.py
CHANGED
@@ -1017,11 +1017,6 @@ openai_en_classes = [
|
|
1017 |
"toilet paper",
|
1018 |
]
|
1019 |
|
1020 |
-
# language_names = json.load(open("data/language_mapping.json", encoding="utf-8"))
|
1021 |
-
# main_language_values = sorted(
|
1022 |
-
# [[name, code] for code, name in language_names.items()], key=lambda x: x[0]
|
1023 |
-
# )
|
1024 |
-
# [[main_language_names[lang], lang] for lang in main_languages+sorted(l for l in main_language_names if l not in main_languages)]
|
1025 |
|
1026 |
babel_imagenet = json.load(open("data/babel_imagenet-298.json", encoding="utf-8"))
|
1027 |
babelnet_images = json.load(open("data/images.json", encoding="utf-8"))
|
@@ -1029,8 +1024,7 @@ max_image_choices = 10 # Currently up to 30 images but relevance degrades quick
|
|
1029 |
no_image_idxs = [i for i, imgs in enumerate(babelnet_images) if len(imgs) == 0]
|
1030 |
IMG_HEIGHT, IMG_WIDTH = 512, 512
|
1031 |
|
1032 |
-
|
1033 |
-
# if os.path.exists("data/precomputed_results.json"):
|
1034 |
precomputed_results = json.load(open("data/precomputed_results.json"))
|
1035 |
|
1036 |
request_header = {
|
@@ -1038,15 +1032,15 @@ request_header = {
|
|
1038 |
}
|
1039 |
### Loading model; hard-coded to mSigLIP for now.
|
1040 |
|
1041 |
-
if not precomputed_results:
|
1042 |
-
|
1043 |
-
|
1044 |
-
|
1045 |
-
|
1046 |
-
|
1047 |
|
1048 |
-
|
1049 |
-
|
1050 |
|
1051 |
|
1052 |
def change_language(randomize_imgs, randomize_labels):
|
@@ -1055,14 +1049,14 @@ def change_language(randomize_imgs, randomize_labels):
|
|
1055 |
class_order = list(range(len(labels)))
|
1056 |
np.random.shuffle(class_order)
|
1057 |
### We use no prompt ensembling for now
|
1058 |
-
if not precomputed_results:
|
1059 |
-
|
1060 |
-
|
1061 |
-
|
1062 |
-
|
1063 |
-
|
1064 |
-
else:
|
1065 |
-
|
1066 |
correct_text = gr.Text(
|
1067 |
f"Correct was: ''. Question 1/{len(babel_imagenet['EN'][0])} ", label="Game"
|
1068 |
)
|
@@ -1131,32 +1125,32 @@ def prepare(raw_idx, text_embeddings, class_order):
|
|
1131 |
img_url = babelnet_images[class_idx][img_idx]["url"]
|
1132 |
class_labels = openai_en_classes
|
1133 |
|
1134 |
-
if not precomputed_results:
|
1135 |
-
|
1136 |
-
|
1137 |
-
|
1138 |
-
|
1139 |
-
|
1140 |
-
|
1141 |
-
|
1142 |
-
|
1143 |
-
|
1144 |
-
|
1145 |
-
|
1146 |
-
|
1147 |
-
|
1148 |
-
|
1149 |
-
|
1150 |
-
|
1151 |
-
|
1152 |
-
|
1153 |
|
1154 |
-
|
1155 |
-
|
1156 |
-
else:
|
1157 |
-
|
1158 |
-
|
1159 |
-
|
1160 |
if idx not in choices:
|
1161 |
choices = [idx] + choices[1:]
|
1162 |
model_choice_idx = choices[-1]
|
@@ -1206,32 +1200,32 @@ def reroll(raw_idx, text_embeddings, class_order):
|
|
1206 |
img_url = babelnet_images[class_idx][img_idx]["url"]
|
1207 |
class_labels = openai_en_classes
|
1208 |
|
1209 |
-
if not precomputed_results:
|
1210 |
-
|
1211 |
-
|
1212 |
-
|
1213 |
-
|
1214 |
-
|
1215 |
-
|
1216 |
-
|
1217 |
-
|
1218 |
-
|
1219 |
-
|
1220 |
-
|
1221 |
-
|
1222 |
-
|
1223 |
-
|
1224 |
-
|
1225 |
-
|
1226 |
-
|
1227 |
-
|
1228 |
|
1229 |
-
|
1230 |
-
|
1231 |
-
else:
|
1232 |
-
|
1233 |
-
|
1234 |
-
|
1235 |
if idx not in choices:
|
1236 |
choices = [idx] + choices[1:]
|
1237 |
model_choice_idx = choices[-1]
|
@@ -1384,13 +1378,7 @@ with gr.Blocks(title="Babel-ImageNet Quiz") as demo:
|
|
1384 |
outputs=[options, image, class_idx, correct_choice, model_choice, choices],
|
1385 |
)
|
1386 |
|
1387 |
-
|
1388 |
-
# demo.load(fn=change_language,
|
1389 |
-
# inputs=[language_select],
|
1390 |
-
# outputs=[text_embeddings, class_idx, correct_text, player_score_text, clip_score_text, player_score, clip_score]
|
1391 |
-
# ).then(fn=prepare,
|
1392 |
-
# inputs=[class_idx, language_select, text_embeddings],
|
1393 |
-
# outputs=[options, image, class_idx, correct_choice, model_choice])
|
1394 |
|
1395 |
|
1396 |
demo.launch()
|
|
|
1017 |
"toilet paper",
|
1018 |
]
|
1019 |
|
|
|
|
|
|
|
|
|
|
|
1020 |
|
1021 |
babel_imagenet = json.load(open("data/babel_imagenet-298.json", encoding="utf-8"))
|
1022 |
babelnet_images = json.load(open("data/images.json", encoding="utf-8"))
|
|
|
1024 |
no_image_idxs = [i for i, imgs in enumerate(babelnet_images) if len(imgs) == 0]
|
1025 |
IMG_HEIGHT, IMG_WIDTH = 512, 512
|
1026 |
|
1027 |
+
|
|
|
1028 |
precomputed_results = json.load(open("data/precomputed_results.json"))
|
1029 |
|
1030 |
request_header = {
|
|
|
1032 |
}
|
1033 |
### Loading model; hard-coded to mSigLIP for now.
|
1034 |
|
1035 |
+
# if not precomputed_results:
|
1036 |
+
# open_clip_model, open_clip_pretrained = "ViT-B-16-SigLIP-i18n-256", "webli"
|
1037 |
+
# model, _, transform = open_clip.create_model_and_transforms(
|
1038 |
+
# open_clip_model, pretrained=open_clip_pretrained
|
1039 |
+
# )
|
1040 |
+
# tokenizer = open_clip.get_tokenizer(open_clip_model)
|
1041 |
|
1042 |
+
# device = "cuda" if torch.cuda.is_available() else "cpu"
|
1043 |
+
# model = model.to(device)
|
1044 |
|
1045 |
|
1046 |
def change_language(randomize_imgs, randomize_labels):
|
|
|
1049 |
class_order = list(range(len(labels)))
|
1050 |
np.random.shuffle(class_order)
|
1051 |
### We use no prompt ensembling for now
|
1052 |
+
# if not precomputed_results:
|
1053 |
+
# text_tokens = tokenizer(labels).to(device)
|
1054 |
+
# with torch.no_grad():
|
1055 |
+
# text_features = model.encode_text(text_tokens).float()
|
1056 |
+
# text_features /= text_features.norm(dim=-1, keepdim=True)
|
1057 |
+
# text_features = text_features.cpu().numpy()
|
1058 |
+
# else:
|
1059 |
+
text_features = None
|
1060 |
correct_text = gr.Text(
|
1061 |
f"Correct was: ''. Question 1/{len(babel_imagenet['EN'][0])} ", label="Game"
|
1062 |
)
|
|
|
1125 |
img_url = babelnet_images[class_idx][img_idx]["url"]
|
1126 |
class_labels = openai_en_classes
|
1127 |
|
1128 |
+
# if not precomputed_results:
|
1129 |
+
# try:
|
1130 |
+
# image_input = (
|
1131 |
+
# transform(
|
1132 |
+
# Image.open(
|
1133 |
+
# requests.get(img_url, stream=True, headers=request_header).raw
|
1134 |
+
# ).convert("RGB")
|
1135 |
+
# )
|
1136 |
+
# .unsqueeze(0)
|
1137 |
+
# .to(device)
|
1138 |
+
# )
|
1139 |
+
# with torch.no_grad():
|
1140 |
+
# image_features = model.encode_image(image_input).float()
|
1141 |
+
# image_features /= image_features.norm(dim=-1, keepdim=True)
|
1142 |
+
# except:
|
1143 |
+
# gr.Warning("There is a problem with the next class. Skipping it.")
|
1144 |
+
# return prepare(
|
1145 |
+
# raw_idx, text_embeddings, class_order
|
1146 |
+
# )
|
1147 |
|
1148 |
+
# similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
|
1149 |
+
# choices = np.argsort(similarity)[-4:].tolist()
|
1150 |
+
# else:
|
1151 |
+
choices = list(
|
1152 |
+
reversed(precomputed_results["EN"][idx][img_idx])
|
1153 |
+
) # precomputing script uses torch.topk which sorts in reverse here
|
1154 |
if idx not in choices:
|
1155 |
choices = [idx] + choices[1:]
|
1156 |
model_choice_idx = choices[-1]
|
|
|
1200 |
img_url = babelnet_images[class_idx][img_idx]["url"]
|
1201 |
class_labels = openai_en_classes
|
1202 |
|
1203 |
+
# if not precomputed_results:
|
1204 |
+
# try:
|
1205 |
+
# image_input = (
|
1206 |
+
# transform(
|
1207 |
+
# Image.open(
|
1208 |
+
# requests.get(img_url, stream=True, headers=request_header).raw
|
1209 |
+
# ).convert("RGB")
|
1210 |
+
# )
|
1211 |
+
# .unsqueeze(0)
|
1212 |
+
# .to(device)
|
1213 |
+
# )
|
1214 |
+
# with torch.no_grad():
|
1215 |
+
# image_features = model.encode_image(image_input).float()
|
1216 |
+
# image_features /= image_features.norm(dim=-1, keepdim=True)
|
1217 |
+
# except:
|
1218 |
+
# gr.Warning("There is a problem with the next class. Skipping it.")
|
1219 |
+
# return prepare(
|
1220 |
+
# raw_idx, text_embeddings, class_order
|
1221 |
+
# )
|
1222 |
|
1223 |
+
# similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
|
1224 |
+
# choices = np.argsort(similarity)[-4:].tolist()
|
1225 |
+
# else:
|
1226 |
+
choices = list(
|
1227 |
+
reversed(precomputed_results["EN"][idx][img_idx])
|
1228 |
+
) # precomputing script uses torch.topk which sorts in reverse here
|
1229 |
if idx not in choices:
|
1230 |
choices = [idx] + choices[1:]
|
1231 |
model_choice_idx = choices[-1]
|
|
|
1378 |
outputs=[options, image, class_idx, correct_choice, model_choice, choices],
|
1379 |
)
|
1380 |
|
1381 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
1382 |
|
1383 |
|
1384 |
demo.launch()
|