Spaces:
Running
on
L40S
Running
on
L40S
Upload folder using huggingface_hub
Browse files- frontend/app.py +5 -5
- main.py +30 -6
frontend/app.py
CHANGED
@@ -132,7 +132,7 @@ def SearchBox(with_border=False, query_value="", ranking_value="nn+colpali"):
|
|
132 |
def SampleQueries():
|
133 |
sample_queries = [
|
134 |
"Proportion of female new hires 2021-2023?",
|
135 |
-
"Total amount of
|
136 |
"What is the percentage distribution of employees with performance-based pay relative to the limit in 2023?",
|
137 |
"What is the breakdown of management costs by investment strategy in 2023?",
|
138 |
"2023 profit loss portfolio",
|
@@ -167,13 +167,13 @@ def Hero():
|
|
167 |
return Div(
|
168 |
H1(
|
169 |
"Vespa.ai + ColPali",
|
170 |
-
cls="text-
|
171 |
),
|
172 |
P(
|
173 |
"Efficient Document Retrieval with Vision Language Models",
|
174 |
cls="text-lg md:text-2xl text-muted-foreground md:tracking-wide",
|
175 |
),
|
176 |
-
cls="grid gap-5 text-center",
|
177 |
)
|
178 |
|
179 |
|
@@ -183,7 +183,7 @@ def Home():
|
|
183 |
Hero(),
|
184 |
SearchBox(with_border=True),
|
185 |
SampleQueries(),
|
186 |
-
cls="grid gap-8
|
187 |
),
|
188 |
cls="grid w-full h-full max-w-screen-md items-center gap-4 mx-auto",
|
189 |
)
|
@@ -319,7 +319,7 @@ def SearchResult(results: list, query_id: Optional[str] = None):
|
|
319 |
Div(
|
320 |
Img(
|
321 |
src=blur_image_base64,
|
322 |
-
hx_get=f"/full_image?
|
323 |
style="filter: blur(5px);",
|
324 |
hx_trigger="load",
|
325 |
hx_swap="outerHTML",
|
|
|
132 |
def SampleQueries():
|
133 |
sample_queries = [
|
134 |
"Proportion of female new hires 2021-2023?",
|
135 |
+
"Total amount of fixed salaries paid in 2023?",
|
136 |
"What is the percentage distribution of employees with performance-based pay relative to the limit in 2023?",
|
137 |
"What is the breakdown of management costs by investment strategy in 2023?",
|
138 |
"2023 profit loss portfolio",
|
|
|
167 |
return Div(
|
168 |
H1(
|
169 |
"Vespa.ai + ColPali",
|
170 |
+
cls="text-4xl md:text-7xl font-bold tracking-wide md:tracking-wider bg-clip-text text-transparent bg-gradient-to-r from-black to-gray-700 dark:from-white dark:to-gray-300 animate-fade-in",
|
171 |
),
|
172 |
P(
|
173 |
"Efficient Document Retrieval with Vision Language Models",
|
174 |
cls="text-lg md:text-2xl text-muted-foreground md:tracking-wide",
|
175 |
),
|
176 |
+
cls="grid gap-5 text-center pt-5",
|
177 |
)
|
178 |
|
179 |
|
|
|
183 |
Hero(),
|
184 |
SearchBox(with_border=True),
|
185 |
SampleQueries(),
|
186 |
+
cls="grid gap-8 md:-mt-[34vh]", # Negative margin only on medium and larger screens
|
187 |
),
|
188 |
cls="grid w-full h-full max-w-screen-md items-center gap-4 mx-auto",
|
189 |
)
|
|
|
319 |
Div(
|
320 |
Img(
|
321 |
src=blur_image_base64,
|
322 |
+
hx_get=f"/full_image?docid={fields['id']}&query_id={query_id}&idx={idx}",
|
323 |
style="filter: blur(5px);",
|
324 |
hx_trigger="load",
|
325 |
hx_swap="outerHTML",
|
main.py
CHANGED
@@ -256,13 +256,29 @@ async def get_sim_map(query_id: str, idx: int, token: str):
|
|
256 |
)
|
257 |
|
258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
@app.get("/full_image")
|
260 |
-
async def full_image(
|
261 |
"""
|
262 |
Endpoint to get the full quality image for a given result id.
|
263 |
"""
|
264 |
-
image_data = await get_full_image_from_vespa(vespa_app,
|
265 |
-
|
|
|
266 |
# Decode the base64 image data
|
267 |
# image_data = base64.b64decode(image_data)
|
268 |
image_data = "data:image/jpeg;base64," + image_data
|
@@ -276,11 +292,19 @@ async def full_image(id: str):
|
|
276 |
|
277 |
async def message_generator(query_id: str, query: str):
|
278 |
result = None
|
279 |
-
|
|
|
280 |
result = result_cache.get(query_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
await asyncio.sleep(0.5)
|
282 |
-
|
283 |
-
images = [result["fields"]["blur_image"] for result in search_results]
|
284 |
# from b64 to PIL image
|
285 |
images = [Image.open(io.BytesIO(base64.b64decode(img))) for img in images]
|
286 |
|
|
|
256 |
)
|
257 |
|
258 |
|
259 |
+
async def update_full_image_cache(docid: str, query_id: str, idx: int, image_data: str):
|
260 |
+
result = result_cache.get(query_id)
|
261 |
+
if result is None:
|
262 |
+
await asyncio.sleep(0.5)
|
263 |
+
return
|
264 |
+
search_results = get_results_children(result)
|
265 |
+
# Check if idx exists in list of children
|
266 |
+
if idx >= len(search_results):
|
267 |
+
await asyncio.sleep(0.5)
|
268 |
+
return
|
269 |
+
search_results[idx]["fields"]["full_image"] = image_data
|
270 |
+
result_cache.set(query_id, result)
|
271 |
+
return
|
272 |
+
|
273 |
+
|
274 |
@app.get("/full_image")
|
275 |
+
async def full_image(docid: str, query_id: str, idx: int):
|
276 |
"""
|
277 |
Endpoint to get the full quality image for a given result id.
|
278 |
"""
|
279 |
+
image_data = await get_full_image_from_vespa(vespa_app, docid)
|
280 |
+
# Update the cache with the full image data asynchronously to not block the request
|
281 |
+
asyncio.create_task(update_full_image_cache(docid, query_id, idx, image_data))
|
282 |
# Decode the base64 image data
|
283 |
# image_data = base64.b64decode(image_data)
|
284 |
image_data = "data:image/jpeg;base64," + image_data
|
|
|
292 |
|
293 |
async def message_generator(query_id: str, query: str):
|
294 |
result = None
|
295 |
+
images = []
|
296 |
+
while len(images) == 0:
|
297 |
result = result_cache.get(query_id)
|
298 |
+
if result is None:
|
299 |
+
await asyncio.sleep(0.5)
|
300 |
+
continue
|
301 |
+
search_results = get_results_children(result)
|
302 |
+
for single_result in search_results:
|
303 |
+
img = single_result["fields"].get("full_image", None)
|
304 |
+
if img is not None:
|
305 |
+
images.append(img)
|
306 |
await asyncio.sleep(0.5)
|
307 |
+
|
|
|
308 |
# from b64 to PIL image
|
309 |
images = [Image.open(io.BytesIO(base64.b64decode(img))) for img in images]
|
310 |
|