thomasht86 commited on
Commit
8e4fbd2
β€’
1 Parent(s): be59b6e

Upload backend/colpali.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. backend/colpali.py +25 -8
backend/colpali.py CHANGED
@@ -132,6 +132,7 @@ def gen_similarity_maps(
132
  query_embs (torch.Tensor): Query embeddings.
133
  token_idx_map (dict): Mapping from tokens to their indices.
134
  images (List[Union[Path, str]]): List of image paths or base64-encoded strings.
 
135
 
136
  Returns:
137
  List[Dict[str, str]]: A list where each item is a dictionary mapping tokens to base64-encoded blended images.
@@ -178,8 +179,13 @@ def gen_similarity_maps(
178
  # ... and so on.
179
  # Now turn these into a tensor of same shape as previous similarity map
180
  vespa_sim_map_tensor = torch.zeros(
181
- (len(vespa_sim_maps), query_embs.size(dim=1), vit_config.n_patch_per_dim, vit_config.n_patch_per_dim)
 
 
 
 
182
  )
 
183
  for idx, vespa_sim_map in enumerate(vespa_sim_maps):
184
  for cell in vespa_sim_map["similarities"]["cells"]:
185
  patch = int(cell["address"]["patch"])
@@ -187,10 +193,17 @@ def gen_similarity_maps(
187
  continue
188
  query_token = int(cell["address"]["querytoken"])
189
  value = cell["value"]
190
- vespa_sim_map_tensor[idx, int(query_token), int(patch) // vit_config.n_patch_per_dim, int(patch) % vit_config.n_patch_per_dim] = value
 
 
 
 
 
191
 
192
  # Normalize the similarity map per query token
193
- similarity_map_normalized = normalize_similarity_map_per_query_token(vespa_sim_map_tensor)
 
 
194
  else:
195
  # Preprocess inputs
196
  print("Computing similarity maps")
@@ -227,7 +240,9 @@ def gen_similarity_maps(
227
  print(f"Similarity map computation took: {end2 - start2} s")
228
 
229
  # Normalize the similarity map per query token
230
- similarity_map_normalized = normalize_similarity_map_per_query_token(similarity_map)
 
 
231
 
232
  # Collect the blended images
233
  start3 = time.perf_counter()
@@ -242,8 +257,8 @@ def gen_similarity_maps(
242
  # Get the similarity map for this image and the selected token
243
  sim_map = similarity_map_normalized[idx, token_idx, :, :] # Shape: (h, w)
244
 
245
- # Move the similarity map to CPU and convert to NumPy array
246
- sim_map_np = sim_map.cpu().numpy()
247
 
248
  # Resize the similarity map to the original image size
249
  sim_map_img = Image.fromarray(sim_map_np)
@@ -344,7 +359,9 @@ async def query_vespa_default(
344
  )
345
  assert response.is_successful(), response.json
346
  stop = time.perf_counter()
347
- print(f"Query time + data transfer took: {stop - start} s, vespa said searchtime was {response.json.get('timing', {}).get('searchtime', -1)} s")
 
 
348
  open("response.json", "w").write(json.dumps(response.json))
349
  return format_query_results(query, response)
350
 
@@ -512,7 +529,7 @@ def add_sim_maps_to_result(
512
  query_embs=q_embs,
513
  token_idx_map=token_to_idx,
514
  images=imgs,
515
- vespa_sim_maps=vespa_sim_maps
516
  )
517
  for single_result, sim_map_dict in zip(result["root"]["children"], sim_map_imgs):
518
  for token, sim_mapb64 in sim_map_dict.items():
 
132
  query_embs (torch.Tensor): Query embeddings.
133
  token_idx_map (dict): Mapping from tokens to their indices.
134
  images (List[Union[Path, str]]): List of image paths or base64-encoded strings.
135
+ vespa_sim_maps (List[str]): List of Vespa similarity maps.
136
 
137
  Returns:
138
  List[Dict[str, str]]: A list where each item is a dictionary mapping tokens to base64-encoded blended images.
 
179
  # ... and so on.
180
  # Now turn these into a tensor of same shape as previous similarity map
181
  vespa_sim_map_tensor = torch.zeros(
182
+ (
183
+ len(vespa_sim_maps),
184
+ query_embs.size(dim=1),
185
+ vit_config.n_patch_per_dim,
186
+ vit_config.n_patch_per_dim,
187
  )
188
+ )
189
  for idx, vespa_sim_map in enumerate(vespa_sim_maps):
190
  for cell in vespa_sim_map["similarities"]["cells"]:
191
  patch = int(cell["address"]["patch"])
 
193
  continue
194
  query_token = int(cell["address"]["querytoken"])
195
  value = cell["value"]
196
+ vespa_sim_map_tensor[
197
+ idx,
198
+ int(query_token),
199
+ int(patch) // vit_config.n_patch_per_dim,
200
+ int(patch) % vit_config.n_patch_per_dim,
201
+ ] = value
202
 
203
  # Normalize the similarity map per query token
204
+ similarity_map_normalized = normalize_similarity_map_per_query_token(
205
+ vespa_sim_map_tensor
206
+ )
207
  else:
208
  # Preprocess inputs
209
  print("Computing similarity maps")
 
240
  print(f"Similarity map computation took: {end2 - start2} s")
241
 
242
  # Normalize the similarity map per query token
243
+ similarity_map_normalized = normalize_similarity_map_per_query_token(
244
+ similarity_map
245
+ )
246
 
247
  # Collect the blended images
248
  start3 = time.perf_counter()
 
257
  # Get the similarity map for this image and the selected token
258
  sim_map = similarity_map_normalized[idx, token_idx, :, :] # Shape: (h, w)
259
 
260
+ # Move the similarity map to CPU, convert to float (as BFloat16 not supported by Numpy) and convert to NumPy array
261
+ sim_map_np = sim_map.cpu().float().numpy()
262
 
263
  # Resize the similarity map to the original image size
264
  sim_map_img = Image.fromarray(sim_map_np)
 
359
  )
360
  assert response.is_successful(), response.json
361
  stop = time.perf_counter()
362
+ print(
363
+ f"Query time + data transfer took: {stop - start} s, vespa said searchtime was {response.json.get('timing', {}).get('searchtime', -1)} s"
364
+ )
365
  open("response.json", "w").write(json.dumps(response.json))
366
  return format_query_results(query, response)
367
 
 
529
  query_embs=q_embs,
530
  token_idx_map=token_to_idx,
531
  images=imgs,
532
+ vespa_sim_maps=vespa_sim_maps,
533
  )
534
  for single_result, sim_map_dict in zip(result["root"]["children"], sim_map_imgs):
535
  for token, sim_mapb64 in sim_map_dict.items():