Babel-ImageNet-Quiz

Sleeping

App Files Files Community

kokuma commited on Jul 5, 2024

Commit

87d901b

verified ·

1 Parent(s): b16b259

remove check for precomputed_results

Browse files

Files changed (1) hide show

app.py +68 -80

app.py CHANGED Viewed

@@ -1017,11 +1017,6 @@ openai_en_classes = [
     "toilet paper",
 ]
-# language_names = json.load(open("data/language_mapping.json", encoding="utf-8"))
-# main_language_values = sorted(
-#     [[name, code] for code, name in language_names.items()], key=lambda x: x[0]
-# )
-# [[main_language_names[lang], lang] for lang in main_languages+sorted(l for l in main_language_names if l not in main_languages)]
 babel_imagenet = json.load(open("data/babel_imagenet-298.json", encoding="utf-8"))
 babelnet_images = json.load(open("data/images.json", encoding="utf-8"))
@@ -1029,8 +1024,7 @@ max_image_choices = 10  # Currently up to 30 images but relevance degrades quick
 no_image_idxs = [i for i, imgs in enumerate(babelnet_images) if len(imgs) == 0]
 IMG_HEIGHT, IMG_WIDTH = 512, 512
-# precomputed_results = None
-# if os.path.exists("data/precomputed_results.json"):
 precomputed_results = json.load(open("data/precomputed_results.json"))
 request_header = {
@@ -1038,15 +1032,15 @@ request_header = {
 }
 ### Loading model; hard-coded to mSigLIP for now.
-if not precomputed_results:
-    open_clip_model, open_clip_pretrained = "ViT-B-16-SigLIP-i18n-256", "webli"
-    model, _, transform = open_clip.create_model_and_transforms(
-        open_clip_model, pretrained=open_clip_pretrained
-    )
-    tokenizer = open_clip.get_tokenizer(open_clip_model)
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    model = model.to(device)
 def change_language(randomize_imgs, randomize_labels):
@@ -1055,14 +1049,14 @@ def change_language(randomize_imgs, randomize_labels):
     class_order = list(range(len(labels)))
     np.random.shuffle(class_order)
     ### We use no prompt ensembling for now
-    if not precomputed_results:
-        text_tokens = tokenizer(labels).to(device)
-        with torch.no_grad():
-            text_features = model.encode_text(text_tokens).float()
-            text_features /= text_features.norm(dim=-1, keepdim=True)
-            text_features = text_features.cpu().numpy()
-    else:
-        text_features = None
     correct_text = gr.Text(
         f"Correct was: ''. Question 1/{len(babel_imagenet['EN'][0])} ", label="Game"
     )
@@ -1131,32 +1125,32 @@ def prepare(raw_idx, text_embeddings, class_order):
     img_url = babelnet_images[class_idx][img_idx]["url"]
     class_labels = openai_en_classes
-    if not precomputed_results:
-        try:
-            image_input = (
-                transform(
-                    Image.open(
-                        requests.get(img_url, stream=True, headers=request_header).raw
-                    ).convert("RGB")
-                )
-                .unsqueeze(0)
-                .to(device)
-            )
-            with torch.no_grad():
-                image_features = model.encode_image(image_input).float()
-                image_features /= image_features.norm(dim=-1, keepdim=True)
-        except:
-            gr.Warning("There is a problem with the next class. Skipping it.")
-            return prepare(
-                raw_idx, text_embeddings, class_order
-            )
-        similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
-        choices = np.argsort(similarity)[-4:].tolist()
-    else:
-        choices = list(
-            reversed(precomputed_results["EN"][idx][img_idx])
-        )  # precomputing script uses torch.topk which sorts in reverse here
     if idx not in choices:
         choices = [idx] + choices[1:]
     model_choice_idx = choices[-1]
@@ -1206,32 +1200,32 @@ def reroll(raw_idx, text_embeddings, class_order):
     img_url = babelnet_images[class_idx][img_idx]["url"]
     class_labels = openai_en_classes
-    if not precomputed_results:
-        try:
-            image_input = (
-                transform(
-                    Image.open(
-                        requests.get(img_url, stream=True, headers=request_header).raw
-                    ).convert("RGB")
-                )
-                .unsqueeze(0)
-                .to(device)
-            )
-            with torch.no_grad():
-                image_features = model.encode_image(image_input).float()
-                image_features /= image_features.norm(dim=-1, keepdim=True)
-        except:
-            gr.Warning("There is a problem with the next class. Skipping it.")
-            return prepare(
-                raw_idx, text_embeddings, class_order
-            )
-        similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
-        choices = np.argsort(similarity)[-4:].tolist()
-    else:
-        choices = list(
-            reversed(precomputed_results["EN"][idx][img_idx])
-        )  # precomputing script uses torch.topk which sorts in reverse here
     if idx not in choices:
         choices = [idx] + choices[1:]
     model_choice_idx = choices[-1]
@@ -1384,13 +1378,7 @@ with gr.Blocks(title="Babel-ImageNet Quiz") as demo:
         outputs=[options, image, class_idx, correct_choice, model_choice, choices],
     )
-    # initialization
-    # demo.load(fn=change_language,
-    #           inputs=[language_select],
-    #           outputs=[text_embeddings, class_idx, correct_text, player_score_text, clip_score_text, player_score, clip_score]
-    #           ).then(fn=prepare,
-    #                  inputs=[class_idx, language_select, text_embeddings],
-    #                  outputs=[options, image, class_idx, correct_choice, model_choice])
 demo.launch()

     "toilet paper",
 ]
 babel_imagenet = json.load(open("data/babel_imagenet-298.json", encoding="utf-8"))
 babelnet_images = json.load(open("data/images.json", encoding="utf-8"))
 no_image_idxs = [i for i, imgs in enumerate(babelnet_images) if len(imgs) == 0]
 IMG_HEIGHT, IMG_WIDTH = 512, 512
 precomputed_results = json.load(open("data/precomputed_results.json"))
 request_header = {
 }
 ### Loading model; hard-coded to mSigLIP for now.
+# if not precomputed_results:
+#     open_clip_model, open_clip_pretrained = "ViT-B-16-SigLIP-i18n-256", "webli"
+#     model, _, transform = open_clip.create_model_and_transforms(
+#         open_clip_model, pretrained=open_clip_pretrained
+#     )
+#     tokenizer = open_clip.get_tokenizer(open_clip_model)
+#     device = "cuda" if torch.cuda.is_available() else "cpu"
+#     model = model.to(device)
 def change_language(randomize_imgs, randomize_labels):
     class_order = list(range(len(labels)))
     np.random.shuffle(class_order)
     ### We use no prompt ensembling for now
+    # if not precomputed_results:
+    #     text_tokens = tokenizer(labels).to(device)
+    #     with torch.no_grad():
+    #         text_features = model.encode_text(text_tokens).float()
+    #         text_features /= text_features.norm(dim=-1, keepdim=True)
+    #         text_features = text_features.cpu().numpy()
+    # else:
+    text_features = None
     correct_text = gr.Text(
         f"Correct was: ''. Question 1/{len(babel_imagenet['EN'][0])} ", label="Game"
     )
     img_url = babelnet_images[class_idx][img_idx]["url"]
     class_labels = openai_en_classes
+    # if not precomputed_results:
+    #     try:
+    #         image_input = (
+    #             transform(
+    #                 Image.open(
+    #                     requests.get(img_url, stream=True, headers=request_header).raw
+    #                 ).convert("RGB")
+    #             )
+    #             .unsqueeze(0)
+    #             .to(device)
+    #         )
+    #         with torch.no_grad():
+    #             image_features = model.encode_image(image_input).float()
+    #             image_features /= image_features.norm(dim=-1, keepdim=True)
+    #     except:
+    #         gr.Warning("There is a problem with the next class. Skipping it.")
+    #         return prepare(
+    #             raw_idx, text_embeddings, class_order
+    #         )
+    #     similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
+    #     choices = np.argsort(similarity)[-4:].tolist()
+    # else:
+    choices = list(
+        reversed(precomputed_results["EN"][idx][img_idx])
+    )  # precomputing script uses torch.topk which sorts in reverse here
     if idx not in choices:
         choices = [idx] + choices[1:]
     model_choice_idx = choices[-1]
     img_url = babelnet_images[class_idx][img_idx]["url"]
     class_labels = openai_en_classes
+    # if not precomputed_results:
+    #     try:
+    #         image_input = (
+    #             transform(
+    #                 Image.open(
+    #                     requests.get(img_url, stream=True, headers=request_header).raw
+    #                 ).convert("RGB")
+    #             )
+    #             .unsqueeze(0)
+    #             .to(device)
+    #         )
+    #         with torch.no_grad():
+    #             image_features = model.encode_image(image_input).float()
+    #             image_features /= image_features.norm(dim=-1, keepdim=True)
+    #     except:
+    #         gr.Warning("There is a problem with the next class. Skipping it.")
+    #         return prepare(
+    #             raw_idx, text_embeddings, class_order
+    #         )
+    #     similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
+    #     choices = np.argsort(similarity)[-4:].tolist()
+    # else:
+    choices = list(
+        reversed(precomputed_results["EN"][idx][img_idx])
+    )  # precomputing script uses torch.topk which sorts in reverse here
     if idx not in choices:
         choices = [idx] + choices[1:]
     model_choice_idx = choices[-1]
         outputs=[options, image, class_idx, correct_choice, model_choice, choices],
     )
 demo.launch()