kokuma commited on
Commit
87d901b
·
verified ·
1 Parent(s): b16b259

remove check for precomputed_results

Browse files
Files changed (1) hide show
  1. app.py +68 -80
app.py CHANGED
@@ -1017,11 +1017,6 @@ openai_en_classes = [
1017
  "toilet paper",
1018
  ]
1019
 
1020
- # language_names = json.load(open("data/language_mapping.json", encoding="utf-8"))
1021
- # main_language_values = sorted(
1022
- # [[name, code] for code, name in language_names.items()], key=lambda x: x[0]
1023
- # )
1024
- # [[main_language_names[lang], lang] for lang in main_languages+sorted(l for l in main_language_names if l not in main_languages)]
1025
 
1026
  babel_imagenet = json.load(open("data/babel_imagenet-298.json", encoding="utf-8"))
1027
  babelnet_images = json.load(open("data/images.json", encoding="utf-8"))
@@ -1029,8 +1024,7 @@ max_image_choices = 10 # Currently up to 30 images but relevance degrades quick
1029
  no_image_idxs = [i for i, imgs in enumerate(babelnet_images) if len(imgs) == 0]
1030
  IMG_HEIGHT, IMG_WIDTH = 512, 512
1031
 
1032
- # precomputed_results = None
1033
- # if os.path.exists("data/precomputed_results.json"):
1034
  precomputed_results = json.load(open("data/precomputed_results.json"))
1035
 
1036
  request_header = {
@@ -1038,15 +1032,15 @@ request_header = {
1038
  }
1039
  ### Loading model; hard-coded to mSigLIP for now.
1040
 
1041
- if not precomputed_results:
1042
- open_clip_model, open_clip_pretrained = "ViT-B-16-SigLIP-i18n-256", "webli"
1043
- model, _, transform = open_clip.create_model_and_transforms(
1044
- open_clip_model, pretrained=open_clip_pretrained
1045
- )
1046
- tokenizer = open_clip.get_tokenizer(open_clip_model)
1047
 
1048
- device = "cuda" if torch.cuda.is_available() else "cpu"
1049
- model = model.to(device)
1050
 
1051
 
1052
  def change_language(randomize_imgs, randomize_labels):
@@ -1055,14 +1049,14 @@ def change_language(randomize_imgs, randomize_labels):
1055
  class_order = list(range(len(labels)))
1056
  np.random.shuffle(class_order)
1057
  ### We use no prompt ensembling for now
1058
- if not precomputed_results:
1059
- text_tokens = tokenizer(labels).to(device)
1060
- with torch.no_grad():
1061
- text_features = model.encode_text(text_tokens).float()
1062
- text_features /= text_features.norm(dim=-1, keepdim=True)
1063
- text_features = text_features.cpu().numpy()
1064
- else:
1065
- text_features = None
1066
  correct_text = gr.Text(
1067
  f"Correct was: ''. Question 1/{len(babel_imagenet['EN'][0])} ", label="Game"
1068
  )
@@ -1131,32 +1125,32 @@ def prepare(raw_idx, text_embeddings, class_order):
1131
  img_url = babelnet_images[class_idx][img_idx]["url"]
1132
  class_labels = openai_en_classes
1133
 
1134
- if not precomputed_results:
1135
- try:
1136
- image_input = (
1137
- transform(
1138
- Image.open(
1139
- requests.get(img_url, stream=True, headers=request_header).raw
1140
- ).convert("RGB")
1141
- )
1142
- .unsqueeze(0)
1143
- .to(device)
1144
- )
1145
- with torch.no_grad():
1146
- image_features = model.encode_image(image_input).float()
1147
- image_features /= image_features.norm(dim=-1, keepdim=True)
1148
- except:
1149
- gr.Warning("There is a problem with the next class. Skipping it.")
1150
- return prepare(
1151
- raw_idx, text_embeddings, class_order
1152
- )
1153
 
1154
- similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
1155
- choices = np.argsort(similarity)[-4:].tolist()
1156
- else:
1157
- choices = list(
1158
- reversed(precomputed_results["EN"][idx][img_idx])
1159
- ) # precomputing script uses torch.topk which sorts in reverse here
1160
  if idx not in choices:
1161
  choices = [idx] + choices[1:]
1162
  model_choice_idx = choices[-1]
@@ -1206,32 +1200,32 @@ def reroll(raw_idx, text_embeddings, class_order):
1206
  img_url = babelnet_images[class_idx][img_idx]["url"]
1207
  class_labels = openai_en_classes
1208
 
1209
- if not precomputed_results:
1210
- try:
1211
- image_input = (
1212
- transform(
1213
- Image.open(
1214
- requests.get(img_url, stream=True, headers=request_header).raw
1215
- ).convert("RGB")
1216
- )
1217
- .unsqueeze(0)
1218
- .to(device)
1219
- )
1220
- with torch.no_grad():
1221
- image_features = model.encode_image(image_input).float()
1222
- image_features /= image_features.norm(dim=-1, keepdim=True)
1223
- except:
1224
- gr.Warning("There is a problem with the next class. Skipping it.")
1225
- return prepare(
1226
- raw_idx, text_embeddings, class_order
1227
- )
1228
 
1229
- similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
1230
- choices = np.argsort(similarity)[-4:].tolist()
1231
- else:
1232
- choices = list(
1233
- reversed(precomputed_results["EN"][idx][img_idx])
1234
- ) # precomputing script uses torch.topk which sorts in reverse here
1235
  if idx not in choices:
1236
  choices = [idx] + choices[1:]
1237
  model_choice_idx = choices[-1]
@@ -1384,13 +1378,7 @@ with gr.Blocks(title="Babel-ImageNet Quiz") as demo:
1384
  outputs=[options, image, class_idx, correct_choice, model_choice, choices],
1385
  )
1386
 
1387
- # initialization
1388
- # demo.load(fn=change_language,
1389
- # inputs=[language_select],
1390
- # outputs=[text_embeddings, class_idx, correct_text, player_score_text, clip_score_text, player_score, clip_score]
1391
- # ).then(fn=prepare,
1392
- # inputs=[class_idx, language_select, text_embeddings],
1393
- # outputs=[options, image, class_idx, correct_choice, model_choice])
1394
 
1395
 
1396
  demo.launch()
 
1017
  "toilet paper",
1018
  ]
1019
 
 
 
 
 
 
1020
 
1021
  babel_imagenet = json.load(open("data/babel_imagenet-298.json", encoding="utf-8"))
1022
  babelnet_images = json.load(open("data/images.json", encoding="utf-8"))
 
1024
  no_image_idxs = [i for i, imgs in enumerate(babelnet_images) if len(imgs) == 0]
1025
  IMG_HEIGHT, IMG_WIDTH = 512, 512
1026
 
1027
+
 
1028
  precomputed_results = json.load(open("data/precomputed_results.json"))
1029
 
1030
  request_header = {
 
1032
  }
1033
  ### Loading model; hard-coded to mSigLIP for now.
1034
 
1035
+ # if not precomputed_results:
1036
+ # open_clip_model, open_clip_pretrained = "ViT-B-16-SigLIP-i18n-256", "webli"
1037
+ # model, _, transform = open_clip.create_model_and_transforms(
1038
+ # open_clip_model, pretrained=open_clip_pretrained
1039
+ # )
1040
+ # tokenizer = open_clip.get_tokenizer(open_clip_model)
1041
 
1042
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
1043
+ # model = model.to(device)
1044
 
1045
 
1046
  def change_language(randomize_imgs, randomize_labels):
 
1049
  class_order = list(range(len(labels)))
1050
  np.random.shuffle(class_order)
1051
  ### We use no prompt ensembling for now
1052
+ # if not precomputed_results:
1053
+ # text_tokens = tokenizer(labels).to(device)
1054
+ # with torch.no_grad():
1055
+ # text_features = model.encode_text(text_tokens).float()
1056
+ # text_features /= text_features.norm(dim=-1, keepdim=True)
1057
+ # text_features = text_features.cpu().numpy()
1058
+ # else:
1059
+ text_features = None
1060
  correct_text = gr.Text(
1061
  f"Correct was: ''. Question 1/{len(babel_imagenet['EN'][0])} ", label="Game"
1062
  )
 
1125
  img_url = babelnet_images[class_idx][img_idx]["url"]
1126
  class_labels = openai_en_classes
1127
 
1128
+ # if not precomputed_results:
1129
+ # try:
1130
+ # image_input = (
1131
+ # transform(
1132
+ # Image.open(
1133
+ # requests.get(img_url, stream=True, headers=request_header).raw
1134
+ # ).convert("RGB")
1135
+ # )
1136
+ # .unsqueeze(0)
1137
+ # .to(device)
1138
+ # )
1139
+ # with torch.no_grad():
1140
+ # image_features = model.encode_image(image_input).float()
1141
+ # image_features /= image_features.norm(dim=-1, keepdim=True)
1142
+ # except:
1143
+ # gr.Warning("There is a problem with the next class. Skipping it.")
1144
+ # return prepare(
1145
+ # raw_idx, text_embeddings, class_order
1146
+ # )
1147
 
1148
+ # similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
1149
+ # choices = np.argsort(similarity)[-4:].tolist()
1150
+ # else:
1151
+ choices = list(
1152
+ reversed(precomputed_results["EN"][idx][img_idx])
1153
+ ) # precomputing script uses torch.topk which sorts in reverse here
1154
  if idx not in choices:
1155
  choices = [idx] + choices[1:]
1156
  model_choice_idx = choices[-1]
 
1200
  img_url = babelnet_images[class_idx][img_idx]["url"]
1201
  class_labels = openai_en_classes
1202
 
1203
+ # if not precomputed_results:
1204
+ # try:
1205
+ # image_input = (
1206
+ # transform(
1207
+ # Image.open(
1208
+ # requests.get(img_url, stream=True, headers=request_header).raw
1209
+ # ).convert("RGB")
1210
+ # )
1211
+ # .unsqueeze(0)
1212
+ # .to(device)
1213
+ # )
1214
+ # with torch.no_grad():
1215
+ # image_features = model.encode_image(image_input).float()
1216
+ # image_features /= image_features.norm(dim=-1, keepdim=True)
1217
+ # except:
1218
+ # gr.Warning("There is a problem with the next class. Skipping it.")
1219
+ # return prepare(
1220
+ # raw_idx, text_embeddings, class_order
1221
+ # )
1222
 
1223
+ # similarity = (text_embeddings @ image_features.cpu().numpy().T).squeeze()
1224
+ # choices = np.argsort(similarity)[-4:].tolist()
1225
+ # else:
1226
+ choices = list(
1227
+ reversed(precomputed_results["EN"][idx][img_idx])
1228
+ ) # precomputing script uses torch.topk which sorts in reverse here
1229
  if idx not in choices:
1230
  choices = [idx] + choices[1:]
1231
  model_choice_idx = choices[-1]
 
1378
  outputs=[options, image, class_idx, correct_choice, model_choice, choices],
1379
  )
1380
 
1381
+
 
 
 
 
 
 
1382
 
1383
 
1384
  demo.launch()