thomasht86 commited on
Commit
0b432c7
·
verified ·
1 Parent(s): e52f5a6

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. frontend/app.py +5 -5
  2. main.py +30 -6
frontend/app.py CHANGED
@@ -132,7 +132,7 @@ def SearchBox(with_border=False, query_value="", ranking_value="nn+colpali"):
132
  def SampleQueries():
133
  sample_queries = [
134
  "Proportion of female new hires 2021-2023?",
135
- "Total amount of performance-based pay awarded in 2023?",
136
  "What is the percentage distribution of employees with performance-based pay relative to the limit in 2023?",
137
  "What is the breakdown of management costs by investment strategy in 2023?",
138
  "2023 profit loss portfolio",
@@ -167,13 +167,13 @@ def Hero():
167
  return Div(
168
  H1(
169
  "Vespa.ai + ColPali",
170
- cls="text-5xl md:text-7xl font-bold tracking-wide md:tracking-wider bg-clip-text text-transparent bg-gradient-to-r from-black to-gray-700 dark:from-white dark:to-gray-300 animate-fade-in",
171
  ),
172
  P(
173
  "Efficient Document Retrieval with Vision Language Models",
174
  cls="text-lg md:text-2xl text-muted-foreground md:tracking-wide",
175
  ),
176
- cls="grid gap-5 text-center",
177
  )
178
 
179
 
@@ -183,7 +183,7 @@ def Home():
183
  Hero(),
184
  SearchBox(with_border=True),
185
  SampleQueries(),
186
- cls="grid gap-8 -mt-[34vh]",
187
  ),
188
  cls="grid w-full h-full max-w-screen-md items-center gap-4 mx-auto",
189
  )
@@ -319,7 +319,7 @@ def SearchResult(results: list, query_id: Optional[str] = None):
319
  Div(
320
  Img(
321
  src=blur_image_base64,
322
- hx_get=f"/full_image?id={fields['id']}",
323
  style="filter: blur(5px);",
324
  hx_trigger="load",
325
  hx_swap="outerHTML",
 
132
  def SampleQueries():
133
  sample_queries = [
134
  "Proportion of female new hires 2021-2023?",
135
+ "Total amount of fixed salaries paid in 2023?",
136
  "What is the percentage distribution of employees with performance-based pay relative to the limit in 2023?",
137
  "What is the breakdown of management costs by investment strategy in 2023?",
138
  "2023 profit loss portfolio",
 
167
  return Div(
168
  H1(
169
  "Vespa.ai + ColPali",
170
+ cls="text-4xl md:text-7xl font-bold tracking-wide md:tracking-wider bg-clip-text text-transparent bg-gradient-to-r from-black to-gray-700 dark:from-white dark:to-gray-300 animate-fade-in",
171
  ),
172
  P(
173
  "Efficient Document Retrieval with Vision Language Models",
174
  cls="text-lg md:text-2xl text-muted-foreground md:tracking-wide",
175
  ),
176
+ cls="grid gap-5 text-center pt-5",
177
  )
178
 
179
 
 
183
  Hero(),
184
  SearchBox(with_border=True),
185
  SampleQueries(),
186
+ cls="grid gap-8 md:-mt-[34vh]", # Negative margin only on medium and larger screens
187
  ),
188
  cls="grid w-full h-full max-w-screen-md items-center gap-4 mx-auto",
189
  )
 
319
  Div(
320
  Img(
321
  src=blur_image_base64,
322
+ hx_get=f"/full_image?docid={fields['id']}&query_id={query_id}&idx={idx}",
323
  style="filter: blur(5px);",
324
  hx_trigger="load",
325
  hx_swap="outerHTML",
main.py CHANGED
@@ -256,13 +256,29 @@ async def get_sim_map(query_id: str, idx: int, token: str):
256
  )
257
 
258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  @app.get("/full_image")
260
- async def full_image(id: str):
261
  """
262
  Endpoint to get the full quality image for a given result id.
263
  """
264
- image_data = await get_full_image_from_vespa(vespa_app, id)
265
-
 
266
  # Decode the base64 image data
267
  # image_data = base64.b64decode(image_data)
268
  image_data = "data:image/jpeg;base64," + image_data
@@ -276,11 +292,19 @@ async def full_image(id: str):
276
 
277
  async def message_generator(query_id: str, query: str):
278
  result = None
279
- while result is None:
 
280
  result = result_cache.get(query_id)
 
 
 
 
 
 
 
 
281
  await asyncio.sleep(0.5)
282
- search_results = get_results_children(result)
283
- images = [result["fields"]["blur_image"] for result in search_results]
284
  # from b64 to PIL image
285
  images = [Image.open(io.BytesIO(base64.b64decode(img))) for img in images]
286
 
 
256
  )
257
 
258
 
259
+ async def update_full_image_cache(docid: str, query_id: str, idx: int, image_data: str):
260
+ result = result_cache.get(query_id)
261
+ if result is None:
262
+ await asyncio.sleep(0.5)
263
+ return
264
+ search_results = get_results_children(result)
265
+ # Check if idx exists in list of children
266
+ if idx >= len(search_results):
267
+ await asyncio.sleep(0.5)
268
+ return
269
+ search_results[idx]["fields"]["full_image"] = image_data
270
+ result_cache.set(query_id, result)
271
+ return
272
+
273
+
274
  @app.get("/full_image")
275
+ async def full_image(docid: str, query_id: str, idx: int):
276
  """
277
  Endpoint to get the full quality image for a given result id.
278
  """
279
+ image_data = await get_full_image_from_vespa(vespa_app, docid)
280
+ # Update the cache with the full image data asynchronously to not block the request
281
+ asyncio.create_task(update_full_image_cache(docid, query_id, idx, image_data))
282
  # Decode the base64 image data
283
  # image_data = base64.b64decode(image_data)
284
  image_data = "data:image/jpeg;base64," + image_data
 
292
 
293
  async def message_generator(query_id: str, query: str):
294
  result = None
295
+ images = []
296
+ while len(images) == 0:
297
  result = result_cache.get(query_id)
298
+ if result is None:
299
+ await asyncio.sleep(0.5)
300
+ continue
301
+ search_results = get_results_children(result)
302
+ for single_result in search_results:
303
+ img = single_result["fields"].get("full_image", None)
304
+ if img is not None:
305
+ images.append(img)
306
  await asyncio.sleep(0.5)
307
+
 
308
  # from b64 to PIL image
309
  images = [Image.open(io.BytesIO(base64.b64decode(img))) for img in images]
310