Spaces:

vespa-engine
/

colpali-vespa-visual-retrieval

Running on L40S

App Files Files Community

thomasht86 commited on Oct 23, 2024

Commit

0b432c7

verified ·

1 Parent(s): e52f5a6

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

frontend/app.py +5 -5
main.py +30 -6

frontend/app.py CHANGED Viewed

@@ -132,7 +132,7 @@ def SearchBox(with_border=False, query_value="", ranking_value="nn+colpali"):
 def SampleQueries():
     sample_queries = [
         "Proportion of female new hires 2021-2023?",
-        "Total amount of performance-based pay awarded in 2023?",
         "What is the percentage distribution of employees with performance-based pay relative to the limit in 2023?",
         "What is the breakdown of management costs by investment strategy in 2023?",
         "2023 profit loss portfolio",
@@ -167,13 +167,13 @@ def Hero():
     return Div(
         H1(
             "Vespa.ai + ColPali",
-            cls="text-5xl md:text-7xl font-bold tracking-wide md:tracking-wider bg-clip-text text-transparent bg-gradient-to-r from-black to-gray-700 dark:from-white dark:to-gray-300 animate-fade-in",
         ),
         P(
             "Efficient Document Retrieval with Vision Language Models",
             cls="text-lg md:text-2xl text-muted-foreground md:tracking-wide",
         ),
-        cls="grid gap-5 text-center",
     )
@@ -183,7 +183,7 @@ def Home():
             Hero(),
             SearchBox(with_border=True),
             SampleQueries(),
-            cls="grid gap-8 -mt-[34vh]",
         ),
         cls="grid w-full h-full max-w-screen-md items-center gap-4 mx-auto",
     )
@@ -319,7 +319,7 @@ def SearchResult(results: list, query_id: Optional[str] = None):
                         Div(
                             Img(
                                 src=blur_image_base64,
-                                hx_get=f"/full_image?id={fields['id']}",
                                 style="filter: blur(5px);",
                                 hx_trigger="load",
                                 hx_swap="outerHTML",

 def SampleQueries():
     sample_queries = [
         "Proportion of female new hires 2021-2023?",
+        "Total amount of fixed salaries paid in 2023?",
         "What is the percentage distribution of employees with performance-based pay relative to the limit in 2023?",
         "What is the breakdown of management costs by investment strategy in 2023?",
         "2023 profit loss portfolio",
     return Div(
         H1(
             "Vespa.ai + ColPali",
+            cls="text-4xl md:text-7xl font-bold tracking-wide md:tracking-wider bg-clip-text text-transparent bg-gradient-to-r from-black to-gray-700 dark:from-white dark:to-gray-300 animate-fade-in",
         ),
         P(
             "Efficient Document Retrieval with Vision Language Models",
             cls="text-lg md:text-2xl text-muted-foreground md:tracking-wide",
         ),
+        cls="grid gap-5 text-center pt-5",
     )
             Hero(),
             SearchBox(with_border=True),
             SampleQueries(),
+            cls="grid gap-8 md:-mt-[34vh]",  # Negative margin only on medium and larger screens
         ),
         cls="grid w-full h-full max-w-screen-md items-center gap-4 mx-auto",
     )
                         Div(
                             Img(
                                 src=blur_image_base64,
+                                hx_get=f"/full_image?docid={fields['id']}&query_id={query_id}&idx={idx}",
                                 style="filter: blur(5px);",
                                 hx_trigger="load",
                                 hx_swap="outerHTML",

main.py CHANGED Viewed

@@ -256,13 +256,29 @@ async def get_sim_map(query_id: str, idx: int, token: str):
         )
 @app.get("/full_image")
-async def full_image(id: str):
     """
     Endpoint to get the full quality image for a given result id.
     """
-    image_data = await get_full_image_from_vespa(vespa_app, id)
     # Decode the base64 image data
     # image_data = base64.b64decode(image_data)
     image_data = "data:image/jpeg;base64," + image_data
@@ -276,11 +292,19 @@ async def full_image(id: str):
 async def message_generator(query_id: str, query: str):
     result = None
-    while result is None:
         result = result_cache.get(query_id)
         await asyncio.sleep(0.5)
-    search_results = get_results_children(result)
-    images = [result["fields"]["blur_image"] for result in search_results]
     # from b64 to PIL image
     images = [Image.open(io.BytesIO(base64.b64decode(img))) for img in images]

         )
+async def update_full_image_cache(docid: str, query_id: str, idx: int, image_data: str):
+    result = result_cache.get(query_id)
+    if result is None:
+        await asyncio.sleep(0.5)
+        return
+    search_results = get_results_children(result)
+    # Check if idx exists in list of children
+    if idx >= len(search_results):
+        await asyncio.sleep(0.5)
+        return
+    search_results[idx]["fields"]["full_image"] = image_data
+    result_cache.set(query_id, result)
+    return
 @app.get("/full_image")
+async def full_image(docid: str, query_id: str, idx: int):
     """
     Endpoint to get the full quality image for a given result id.
     """
+    image_data = await get_full_image_from_vespa(vespa_app, docid)
+    # Update the cache with the full image data asynchronously to not block the request
+    asyncio.create_task(update_full_image_cache(docid, query_id, idx, image_data))
     # Decode the base64 image data
     # image_data = base64.b64decode(image_data)
     image_data = "data:image/jpeg;base64," + image_data
 async def message_generator(query_id: str, query: str):
     result = None
+    images = []
+    while len(images) == 0:
         result = result_cache.get(query_id)
+        if result is None:
+            await asyncio.sleep(0.5)
+            continue
+        search_results = get_results_children(result)
+        for single_result in search_results:
+            img = single_result["fields"].get("full_image", None)
+            if img is not None:
+                images.append(img)
         await asyncio.sleep(0.5)
     # from b64 to PIL image
     images = [Image.open(io.BytesIO(base64.b64decode(img))) for img in images]