thomasht86 commited on
Commit
fa270d9
·
verified ·
1 Parent(s): 9e9d8e8

Upload folder using huggingface_hub

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
.gitignore CHANGED
@@ -10,4 +10,5 @@ output/
10
  pdfs/
11
  static/full_images/
12
  static/sim_maps/
13
- embeddings/
 
 
10
  pdfs/
11
  static/full_images/
12
  static/sim_maps/
13
+ embeddings/
14
+ hf_dataset/
backend/colpali.py CHANGED
@@ -79,7 +79,7 @@ def gen_similarity_maps(
79
  vit_config: Configuration for the Vision Transformer.
80
  query (str): The query string.
81
  query_embs (torch.Tensor): Query embeddings.
82
- token_idx_map (dict): Mapping from tokens to their indices.
83
  images (List[Union[Path, str]]): List of image paths or base64-encoded strings.
84
  vespa_sim_maps (List[str]): List of Vespa similarity maps.
85
 
@@ -206,8 +206,8 @@ def gen_similarity_maps(
206
  )
207
 
208
  result_per_image = {}
209
- for token, token_idx in token_idx_map.items():
210
- if is_special_token(token):
211
  continue
212
 
213
  # Get the similarity map for this image and the selected token
@@ -262,7 +262,7 @@ def get_query_embeddings_and_token_map(
262
  processor, model, query
263
  ) -> Tuple[torch.Tensor, dict]:
264
  if model is None: # use static test query data (saves time when testing)
265
- return testquery.q_embs, testquery.token_to_idx
266
 
267
  start_time = time.perf_counter()
268
  inputs = processor.process_queries([query]).to(model.device)
@@ -273,16 +273,36 @@ def get_query_embeddings_and_token_map(
273
  query_tokens = processor.tokenizer.tokenize(processor.decode(inputs.input_ids[0]))
274
  # reverse key, values in dictionary
275
  print(query_tokens)
276
- token_to_idx = {val: idx for idx, val in enumerate(query_tokens)}
277
  end_time = time.perf_counter()
278
  print(f"Query inference took: {end_time - start_time} s")
279
- return q_emb, token_to_idx
280
 
281
 
282
- def is_special_token(token: str) -> bool:
283
- # Pattern for tokens that start with '<', numbers, whitespace, or single characters, or the string 'Question'
284
  # Will exclude these tokens from the similarity map generation
285
- pattern = re.compile(r"^<.*$|^\d+$|^\s+$|^\w$|^Question$")
286
- if (len(token) < 3) or pattern.match(token):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  return True
288
  return False
 
79
  vit_config: Configuration for the Vision Transformer.
80
  query (str): The query string.
81
  query_embs (torch.Tensor): Query embeddings.
82
+ token_idx_map (dict): Mapping from indices to tokens.
83
  images (List[Union[Path, str]]): List of image paths or base64-encoded strings.
84
  vespa_sim_maps (List[str]): List of Vespa similarity maps.
85
 
 
206
  )
207
 
208
  result_per_image = {}
209
+ for token_idx, token in token_idx_map.items():
210
+ if should_filter_token(token):
211
  continue
212
 
213
  # Get the similarity map for this image and the selected token
 
262
  processor, model, query
263
  ) -> Tuple[torch.Tensor, dict]:
264
  if model is None: # use static test query data (saves time when testing)
265
+ return testquery.q_embs, testquery.idx_to_token
266
 
267
  start_time = time.perf_counter()
268
  inputs = processor.process_queries([query]).to(model.device)
 
273
  query_tokens = processor.tokenizer.tokenize(processor.decode(inputs.input_ids[0]))
274
  # reverse key, values in dictionary
275
  print(query_tokens)
276
+ idx_to_token = {idx: val for idx, val in enumerate(query_tokens)}
277
  end_time = time.perf_counter()
278
  print(f"Query inference took: {end_time - start_time} s")
279
+ return q_emb, idx_to_token
280
 
281
 
282
+ def should_filter_token(token: str) -> bool:
283
+ # Pattern to match tokens that start with '<', numbers, whitespace, special characters (except ▁), or the string 'Question'
284
  # Will exclude these tokens from the similarity map generation
285
+ # Does NOT match:
286
+ # 2
287
+ # 0
288
+ # 2
289
+ # 3
290
+ # ▁2
291
+ # ▁hi
292
+ #
293
+ # Do match:
294
+ # <bos>
295
+ # Question
296
+ # :
297
+ # _Percentage
298
+ # <pad>
299
+ # \n
300
+ # ▁
301
+ # ?
302
+ # )
303
+ # %
304
+ # /)
305
+ pattern = re.compile(r"^<.*$|^\s+$|^(?!.*\d)(?!▁)\S+$|^Question$|^▁$")
306
+ if pattern.match(token):
307
  return True
308
  return False
backend/stopwords.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ import os
3
+
4
+ # Download the model if it is not already present
5
+ if not spacy.util.is_package("en_core_web_sm"):
6
+ spacy.cli.download("en_core_web_sm")
7
+ nlp = spacy.load("en_core_web_sm")
8
+
9
+ # It would be possible to remove bolding for stopwords without removing them from the query,
10
+ # but that would require a java plugin which we didn't want to complicate this sample app with.
11
+ def filter(text):
12
+ doc = nlp(text)
13
+ tokens = [token.text for token in doc if not token.is_stop]
14
+ if len(tokens) == 0:
15
+ # if we remove all the words we don't have a query at all, so use the original
16
+ return text
17
+ return " ".join(tokens)
backend/testquery.py CHANGED
The diff for this file is too large to render. See raw diff
 
backend/vespa_app.py CHANGED
@@ -7,8 +7,8 @@ import torch
7
  from dotenv import load_dotenv
8
  from vespa.application import Vespa
9
  from vespa.io import VespaQueryResponse
10
- from .colpali import is_special_token
11
-
12
 
13
  class VespaQueryClient:
14
  MAX_QUERY_TERMS = 64
@@ -261,7 +261,7 @@ class VespaQueryClient:
261
  query: str,
262
  q_embs: torch.Tensor,
263
  ranking: str,
264
- token_to_idx: dict,
265
  ) -> Dict[str, Any]:
266
  """
267
  Get query results from Vespa based on the ranking method.
@@ -270,11 +270,15 @@ class VespaQueryClient:
270
  query (str): The query text.
271
  q_embs (torch.Tensor): Query embeddings.
272
  ranking (str): The ranking method to use.
273
- token_to_idx (dict): Token to index mapping.
274
 
275
  Returns:
276
  Dict[str, Any]: The query results.
277
  """
 
 
 
 
278
  rank_method = ranking.split("_")[0]
279
  sim_map: bool = len(ranking.split("_")) > 1 and ranking.split("_")[1] == "sim"
280
  if rank_method == "nn+colpali":
@@ -296,7 +300,7 @@ class VespaQueryClient:
296
  return result
297
 
298
  def get_sim_maps_from_query(
299
- self, query: str, q_embs: torch.Tensor, ranking: str, token_to_idx: dict
300
  ):
301
  """
302
  Get similarity maps from Vespa based on the ranking method.
@@ -305,14 +309,14 @@ class VespaQueryClient:
305
  query (str): The query text.
306
  q_embs (torch.Tensor): Query embeddings.
307
  ranking (str): The ranking method to use.
308
- token_to_idx (dict): Token to index mapping.
309
 
310
  Returns:
311
  Dict[str, Any]: The query results.
312
  """
313
  # Get the result by calling asyncio.run
314
  result = asyncio.run(
315
- self.get_result_from_query(query, q_embs, ranking, token_to_idx)
316
  )
317
  vespa_sim_maps = []
318
  for single_result in result["root"]["children"]:
@@ -354,13 +358,13 @@ class VespaQueryClient:
354
  return result["root"]["children"]
355
 
356
  def results_to_search_results(
357
- self, result: VespaQueryResponse, token_to_idx: dict
358
  ) -> list:
359
  # Initialize sim_map_ fields in the result
360
  fields_to_add = [
361
  f"sim_map_{token}_{idx}"
362
- for idx, token in enumerate(token_to_idx.keys())
363
- if not is_special_token(token)
364
  ]
365
  for child in result["root"]["children"]:
366
  for sim_map_key in fields_to_add:
 
7
  from dotenv import load_dotenv
8
  from vespa.application import Vespa
9
  from vespa.io import VespaQueryResponse
10
+ from .colpali import should_filter_token
11
+ import backend.stopwords
12
 
13
  class VespaQueryClient:
14
  MAX_QUERY_TERMS = 64
 
261
  query: str,
262
  q_embs: torch.Tensor,
263
  ranking: str,
264
+ idx_to_token: dict,
265
  ) -> Dict[str, Any]:
266
  """
267
  Get query results from Vespa based on the ranking method.
 
270
  query (str): The query text.
271
  q_embs (torch.Tensor): Query embeddings.
272
  ranking (str): The ranking method to use.
273
+ idx_to_token (dict): Index to token mapping.
274
 
275
  Returns:
276
  Dict[str, Any]: The query results.
277
  """
278
+
279
+ # Remove stopwords from the query to avoid visual emphasis on irrelevant words (e.g., "the", "and", "of")
280
+ query = backend.stopwords.filter(query)
281
+
282
  rank_method = ranking.split("_")[0]
283
  sim_map: bool = len(ranking.split("_")) > 1 and ranking.split("_")[1] == "sim"
284
  if rank_method == "nn+colpali":
 
300
  return result
301
 
302
  def get_sim_maps_from_query(
303
+ self, query: str, q_embs: torch.Tensor, ranking: str, idx_to_token: dict
304
  ):
305
  """
306
  Get similarity maps from Vespa based on the ranking method.
 
309
  query (str): The query text.
310
  q_embs (torch.Tensor): Query embeddings.
311
  ranking (str): The ranking method to use.
312
+ idx_to_token (dict): Index to token mapping.
313
 
314
  Returns:
315
  Dict[str, Any]: The query results.
316
  """
317
  # Get the result by calling asyncio.run
318
  result = asyncio.run(
319
+ self.get_result_from_query(query, q_embs, ranking, idx_to_token)
320
  )
321
  vespa_sim_maps = []
322
  for single_result in result["root"]["children"]:
 
358
  return result["root"]["children"]
359
 
360
  def results_to_search_results(
361
+ self, result: VespaQueryResponse, idx_to_token: dict
362
  ) -> list:
363
  # Initialize sim_map_ fields in the result
364
  fields_to_add = [
365
  f"sim_map_{token}_{idx}"
366
+ for idx, token in idx_to_token.items()
367
+ if not should_filter_token(token)
368
  ]
369
  for child in result["root"]["children"]:
370
  for sim_map_key in fields_to_add:
frontend/app.py CHANGED
@@ -1,7 +1,7 @@
1
  from typing import Optional
2
  from urllib.parse import quote_plus
3
 
4
- from fasthtml.components import H1, H2, Div, Form, Img, NotStr, P, Span
5
  from fasthtml.xtend import A, Script
6
  from lucide_fasthtml import Lucide
7
  from shad4fast import Badge, Button, Input, Label, RadioGroup, RadioGroupItem, Separator
@@ -106,12 +106,43 @@ autocomplete_script = Script(
106
  """
107
  )
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  def SearchBox(with_border=False, query_value="", ranking_value="nn+colpali"):
111
- grid_cls = "grid gap-2 items-center p-3 bg-muted/80 dark:bg-muted/40 w-full"
112
 
113
  if with_border:
114
- grid_cls = "grid gap-2 p-3 rounded-md border border-input bg-muted/80 dark:bg-muted/40 w-full ring-offset-background focus-within:outline-none focus-within:ring-2 focus-within:ring-ring focus-within:ring-offset-2 focus-within:border-input"
115
 
116
  return Form(
117
  Div(
@@ -213,7 +244,7 @@ def Hero():
213
  return Div(
214
  H1(
215
  "Vespa.ai + ColPali",
216
- cls="text-5xl md:text-7xl font-bold tracking-wide md:tracking-wider bg-clip-text text-transparent bg-gradient-to-r from-black to-gray-700 dark:from-white dark:to-gray-300 animate-fade-in",
217
  ),
218
  P(
219
  "Efficient Document Retrieval with Vision Language Models",
@@ -235,7 +266,7 @@ def Home():
235
  )
236
 
237
 
238
- def WhatIsThis():
239
  return Div(
240
  Div(
241
  Div(
@@ -313,12 +344,12 @@ def LoadingSkeleton():
313
  )
314
 
315
 
316
- def SimMapButtonReady(query_id, idx, token, img_src):
317
  return Button(
318
  token.replace("\u2581", ""),
319
  size="sm",
320
  data_image_src=img_src,
321
- id=f"sim-map-button-{query_id}-{idx}-{token}",
322
  cls="sim-map-button pointer-events-auto font-mono text-xs h-5 rounded-none px-2",
323
  )
324
 
@@ -356,19 +387,22 @@ def SearchResult(results: list, query_id: Optional[str] = None):
356
  for key, value in fields.items()
357
  if key.startswith(
358
  "sim_map_"
359
- ) # filtering is done before creating with 'is_special_token'-function
360
  }
361
 
362
  # Generate buttons for the sim_map fields
363
  sim_map_buttons = []
364
  for key, value in sim_map_fields.items():
 
 
365
  if value is not None:
366
  sim_map_base64 = f"data:image/jpeg;base64,{value}"
367
  sim_map_buttons.append(
368
  SimMapButtonReady(
369
  query_id=query_id,
370
  idx=idx,
371
- token=key.split("_")[-2],
 
372
  img_src=sim_map_base64,
373
  )
374
  )
@@ -377,8 +411,8 @@ def SearchResult(results: list, query_id: Optional[str] = None):
377
  SimMapButtonPoll(
378
  query_id=query_id,
379
  idx=idx,
380
- token=key.split("_")[-2],
381
- token_idx=int(key.split("_")[-1]),
382
  )
383
  )
384
 
@@ -450,7 +484,7 @@ def SearchResult(results: list, query_id: Optional[str] = None):
450
  ),
451
  cls="relative w-full h-full",
452
  ),
453
- cls="grid bg-border p-2",
454
  ),
455
  cls="block",
456
  ),
@@ -468,19 +502,42 @@ def SearchResult(results: list, query_id: Optional[str] = None):
468
  Div(
469
  Div(
470
  Div(
471
- P(
472
- NotStr(fields.get("snippet", "")),
473
- cls="text-highlight text-muted-foreground",
 
 
 
 
 
 
 
 
474
  ),
475
- P(
476
- NotStr(fields.get("text", "")),
477
- cls="text-highlight text-muted-foreground",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  ),
479
- cls="grid gap-y-3 p-5 text-sm",
480
  ),
481
- cls="grid bg-background content-start ",
482
  ),
483
- cls="grid bg-border p-2",
484
  ),
485
  id=f"text-column-{idx}",
486
  cls="text-column relative bg-background px-3 py-5 hidden md-grid-text-column",
@@ -496,6 +553,7 @@ def SearchResult(results: list, query_id: Optional[str] = None):
496
  *result_items,
497
  image_swapping,
498
  toggle_text_content,
 
499
  id="search-results",
500
  cls="grid grid-cols-1 gap-px bg-border min-h-0",
501
  )
@@ -503,7 +561,7 @@ def SearchResult(results: list, query_id: Optional[str] = None):
503
 
504
  def ChatResult(query_id: str, query: str):
505
  return Div(
506
- Div("AI-generated response", cls="text-xl font-semibold p-3"),
507
  Div(
508
  Div(
509
  Div(
@@ -516,7 +574,7 @@ def ChatResult(query_id: str, query: str):
516
  ),
517
  ),
518
  id="chat-messages",
519
- cls="overflow-auto min-h-0 grid items-end px-3",
520
  ),
521
  cls="h-full grid grid-rows-[auto_1fr_auto] min-h-0 gap-3",
522
  )
 
1
  from typing import Optional
2
  from urllib.parse import quote_plus
3
 
4
+ from fasthtml.components import H1, H2, Div, Form, Img, NotStr, P, Span, H3, Br
5
  from fasthtml.xtend import A, Script
6
  from lucide_fasthtml import Lucide
7
  from shad4fast import Badge, Button, Input, Label, RadioGroup, RadioGroupItem, Separator
 
106
  """
107
  )
108
 
109
+ dynamic_elements_scrollbars = Script(
110
+ """
111
+ (function () {
112
+ const { applyOverlayScrollbars, getScrollbarTheme } = OverlayScrollbarsManager;
113
+
114
+ function applyScrollbarsToDynamicElements() {
115
+ const scrollbarTheme = getScrollbarTheme();
116
+
117
+ // Apply scrollbars to dynamically loaded result-text-full and result-text-snippet elements
118
+ const resultTextFullElements = document.querySelectorAll('[id^="result-text-full"]');
119
+ const resultTextSnippetElements = document.querySelectorAll('[id^="result-text-snippet"]');
120
+
121
+ resultTextFullElements.forEach(element => {
122
+ applyOverlayScrollbars(element, scrollbarTheme);
123
+ });
124
+
125
+ resultTextSnippetElements.forEach(element => {
126
+ applyOverlayScrollbars(element, scrollbarTheme);
127
+ });
128
+ }
129
+
130
+ // Apply scrollbars after dynamic content is loaded (e.g., after search results)
131
+ applyScrollbarsToDynamicElements();
132
+
133
+ // Observe changes in the 'dark' class to adjust the theme dynamically if needed
134
+ const observer = new MutationObserver(applyScrollbarsToDynamicElements);
135
+ observer.observe(document.documentElement, { attributes: true, attributeFilter: ['class'] });
136
+ })();
137
+ """
138
+ )
139
+
140
 
141
  def SearchBox(with_border=False, query_value="", ranking_value="nn+colpali"):
142
+ grid_cls = "grid gap-2 items-center p-3 bg-muted w-full"
143
 
144
  if with_border:
145
+ grid_cls = "grid gap-2 p-3 rounded-md border border-input bg-muted w-full ring-offset-background focus-within:outline-none focus-within:ring-2 focus-within:ring-ring focus-within:ring-offset-2 focus-within:border-input"
146
 
147
  return Form(
148
  Div(
 
244
  return Div(
245
  H1(
246
  "Vespa.ai + ColPali",
247
+ cls="text-5xl md:text-7xl font-bold tracking-wide md:tracking-wider bg-clip-text text-transparent bg-gradient-to-r from-black to-slate-700 dark:from-white dark:to-slate-300 animate-fade-in",
248
  ),
249
  P(
250
  "Efficient Document Retrieval with Vision Language Models",
 
266
  )
267
 
268
 
269
+ def AboutThisDemo():
270
  return Div(
271
  Div(
272
  Div(
 
344
  )
345
 
346
 
347
+ def SimMapButtonReady(query_id, idx, token, token_idx, img_src):
348
  return Button(
349
  token.replace("\u2581", ""),
350
  size="sm",
351
  data_image_src=img_src,
352
+ id=f"sim-map-button-{query_id}-{idx}-{token_idx}-{token}",
353
  cls="sim-map-button pointer-events-auto font-mono text-xs h-5 rounded-none px-2",
354
  )
355
 
 
387
  for key, value in fields.items()
388
  if key.startswith(
389
  "sim_map_"
390
+ ) # filtering is done before creating with 'should_filter_token'-function
391
  }
392
 
393
  # Generate buttons for the sim_map fields
394
  sim_map_buttons = []
395
  for key, value in sim_map_fields.items():
396
+ token = key.split("_")[-2]
397
+ token_idx = int(key.split("_")[-1])
398
  if value is not None:
399
  sim_map_base64 = f"data:image/jpeg;base64,{value}"
400
  sim_map_buttons.append(
401
  SimMapButtonReady(
402
  query_id=query_id,
403
  idx=idx,
404
+ token=token,
405
+ token_idx=token_idx,
406
  img_src=sim_map_base64,
407
  )
408
  )
 
411
  SimMapButtonPoll(
412
  query_id=query_id,
413
  idx=idx,
414
+ token=token,
415
+ token_idx=token_idx,
416
  )
417
  )
418
 
 
484
  ),
485
  cls="relative w-full h-full",
486
  ),
487
+ cls="grid bg-muted p-2",
488
  ),
489
  cls="block",
490
  ),
 
502
  Div(
503
  Div(
504
  Div(
505
+ Div(
506
+ Div(
507
+ H3("Dynamic summary", cls="text-base font-semibold"),
508
+ P(
509
+ NotStr(fields.get("snippet", "")),
510
+ cls="text-highlight text-muted-foreground",
511
+ ),
512
+ cls="grid grid-rows-[auto_0px] content-start gap-y-3",
513
+ ),
514
+ id=f"result-text-snippet-{idx}",
515
+ cls="grid gap-y-3 p-8 border border-dashed",
516
  ),
517
+ Div(
518
+ Div(
519
+ Div(
520
+ H3("Full text", cls="text-base font-semibold"),
521
+ Div(
522
+ P(
523
+ NotStr(fields.get("text", "")),
524
+ cls="text-highlight text-muted-foreground",
525
+ ),
526
+ Br()
527
+ ),
528
+ cls="grid grid-rows-[auto_0px] content-start gap-y-3",
529
+ ),
530
+ id=f"result-text-full-{idx}",
531
+ cls="grid gap-y-3 p-8 border border-dashed",
532
+ ),
533
+ Div(cls="absolute inset-x-0 bottom-0 bg-gradient-to-t from-white dark:from-slate-900 pt-[7%]"),
534
+ cls="relative grid"
535
  ),
536
+ cls="grid grid-rows-[1fr_1fr] gap-y-8 p-8 text-sm",
537
  ),
538
+ cls="grid bg-background",
539
  ),
540
+ cls="grid bg-muted p-2",
541
  ),
542
  id=f"text-column-{idx}",
543
  cls="text-column relative bg-background px-3 py-5 hidden md-grid-text-column",
 
553
  *result_items,
554
  image_swapping,
555
  toggle_text_content,
556
+ dynamic_elements_scrollbars,
557
  id="search-results",
558
  cls="grid grid-cols-1 gap-px bg-border min-h-0",
559
  )
 
561
 
562
  def ChatResult(query_id: str, query: str):
563
  return Div(
564
+ Div("AI-response (Gemini-8B)", cls="text-xl font-semibold p-5"),
565
  Div(
566
  Div(
567
  Div(
 
574
  ),
575
  ),
576
  id="chat-messages",
577
+ cls="overflow-auto min-h-0 grid items-end px-5",
578
  ),
579
  cls="h-full grid grid-rows-[auto_1fr_auto] min-h-0 gap-3",
580
  )
frontend/layout.py CHANGED
@@ -22,7 +22,7 @@ layout_script = Script(
22
  """
23
  )
24
 
25
- overlay_scrollbars = Script(
26
  """
27
  (function () {
28
  const { OverlayScrollbars } = OverlayScrollbarsGlobal;
@@ -40,8 +40,12 @@ overlay_scrollbars = Script(
40
  instance.destroy();
41
  }
42
 
43
- // Reinitialize OverlayScrollbars with the new theme
44
  OverlayScrollbars(element, {
 
 
 
 
45
  scrollbars: {
46
  theme: scrollbarTheme,
47
  visibility: 'auto',
@@ -51,12 +55,31 @@ overlay_scrollbars = Script(
51
  });
52
  }
53
 
54
- function updateScrollbarTheme() {
 
55
  const isDarkMode = getPreferredTheme() === 'dark';
56
- const scrollbarTheme = isDarkMode ? 'os-theme-light' : 'os-theme-dark'; // Light theme in dark mode, dark theme in light mode
 
 
 
 
 
 
 
 
 
 
57
 
 
 
 
 
 
 
58
  const mainElement = document.querySelector('main');
59
- const chatMessagesElement = document.querySelector('#chat-messages'); // Select the chat message container by ID
 
 
60
 
61
  if (mainElement) {
62
  applyOverlayScrollbars(mainElement, scrollbarTheme);
@@ -67,11 +90,11 @@ overlay_scrollbars = Script(
67
  }
68
  }
69
 
70
- // Apply the correct theme immediately when the page loads
71
- updateScrollbarTheme();
72
 
73
- // Observe changes in the 'dark' class on the <html> element
74
- const observer = new MutationObserver(updateScrollbarTheme);
75
  observer.observe(document.documentElement, { attributes: true, attributeFilter: ['class'] });
76
  })();
77
  """
@@ -108,8 +131,8 @@ def ThemeToggle(variant="ghost", cls=None, **kwargs):
108
  def Links():
109
  return Nav(
110
  A(
111
- Button("What's this?", variant="link"),
112
- href="/what-is-this",
113
  ),
114
  Separator(orientation="vertical"),
115
  A(
@@ -142,5 +165,6 @@ def Layout(*c, **kwargs):
142
  cls="grid grid-rows-[minmax(0,55px)_minmax(0,1fr)] min-h-0",
143
  ),
144
  layout_script,
145
- overlay_scrollbars,
 
146
  )
 
22
  """
23
  )
24
 
25
+ overlay_scrollbars_manager = Script(
26
  """
27
  (function () {
28
  const { OverlayScrollbars } = OverlayScrollbarsGlobal;
 
40
  instance.destroy();
41
  }
42
 
43
+ // Reinitialize OverlayScrollbars with the correct theme and settings
44
  OverlayScrollbars(element, {
45
+ overflow: {
46
+ x: 'hidden',
47
+ y: 'scroll'
48
+ },
49
  scrollbars: {
50
  theme: scrollbarTheme,
51
  visibility: 'auto',
 
55
  });
56
  }
57
 
58
+ // Function to get the current scrollbar theme (light or dark)
59
+ function getScrollbarTheme() {
60
  const isDarkMode = getPreferredTheme() === 'dark';
61
+ return isDarkMode ? 'os-theme-light' : 'os-theme-dark'; // Light theme in dark mode, dark theme in light mode
62
+ }
63
+
64
+ // Expose the common functions globally for reuse
65
+ window.OverlayScrollbarsManager = {
66
+ applyOverlayScrollbars: applyOverlayScrollbars,
67
+ getScrollbarTheme: getScrollbarTheme
68
+ };
69
+ })();
70
+ """
71
+ )
72
 
73
+ static_elements_scrollbars = Script(
74
+ """
75
+ (function () {
76
+ const { applyOverlayScrollbars, getScrollbarTheme } = OverlayScrollbarsManager;
77
+
78
+ function applyScrollbarsToStaticElements() {
79
  const mainElement = document.querySelector('main');
80
+ const chatMessagesElement = document.querySelector('#chat-messages');
81
+
82
+ const scrollbarTheme = getScrollbarTheme();
83
 
84
  if (mainElement) {
85
  applyOverlayScrollbars(mainElement, scrollbarTheme);
 
90
  }
91
  }
92
 
93
+ // Apply the scrollbars on page load
94
+ applyScrollbarsToStaticElements();
95
 
96
+ // Observe changes in the 'dark' class on the <html> element to adjust the theme dynamically
97
+ const observer = new MutationObserver(applyScrollbarsToStaticElements);
98
  observer.observe(document.documentElement, { attributes: true, attributeFilter: ['class'] });
99
  })();
100
  """
 
131
  def Links():
132
  return Nav(
133
  A(
134
+ Button("About this demo?", variant="link"),
135
+ href="/about-this-demo",
136
  ),
137
  Separator(orientation="vertical"),
138
  A(
 
165
  cls="grid grid-rows-[minmax(0,55px)_minmax(0,1fr)] min-h-0",
166
  ),
167
  layout_script,
168
+ overlay_scrollbars_manager,
169
+ static_elements_scrollbars,
170
  )
globals.css CHANGED
@@ -2,27 +2,28 @@
2
  @tailwind components;
3
  @tailwind utilities;
4
 
 
5
  @layer base {
6
  :root {
7
  --background: 0 0% 100%;
8
- --foreground: 240 10% 3.9%;
9
  --card: 0 0% 100%;
10
- --card-foreground: 240 10% 3.9%;
11
  --popover: 0 0% 100%;
12
- --popover-foreground: 240 10% 3.9%;
13
- --primary: 240 5.9% 10%;
14
- --primary-foreground: 0 0% 98%;
15
- --secondary: 240 4.8% 95.9%;
16
- --secondary-foreground: 240 5.9% 10%;
17
- --muted: 240 4.8% 95.9%;
18
- --muted-foreground: 240 3.8% 46.1%;
19
- --accent: 240 4.8% 95.9%;
20
- --accent-foreground: 240 5.9% 10%;
21
  --destructive: 0 84.2% 60.2%;
22
- --destructive-foreground: 0 0% 98%;
23
- --border: 240 5.9% 90%;
24
- --input: 240 5.9% 90%;
25
- --ring: 240 5.9% 10%;
26
  --radius: 0.5rem;
27
  --chart-1: 12 76% 61%;
28
  --chart-2: 173 58% 39%;
@@ -32,25 +33,25 @@
32
  }
33
 
34
  .dark {
35
- --background: 240 10% 3.9%;
36
- --foreground: 0 0% 98%;
37
- --card: 240 10% 3.9%;
38
- --card-foreground: 0 0% 98%;
39
- --popover: 240 10% 3.9%;
40
- --popover-foreground: 0 0% 98%;
41
- --primary: 0 0% 98%;
42
- --primary-foreground: 240 5.9% 10%;
43
- --secondary: 240 3.7% 15.9%;
44
- --secondary-foreground: 0 0% 98%;
45
- --muted: 240 3.7% 15.9%;
46
- --muted-foreground: 240 5% 64.9%;
47
- --accent: 240 3.7% 15.9%;
48
- --accent-foreground: 0 0% 98%;
49
  --destructive: 0 62.8% 30.6%;
50
- --destructive-foreground: 0 0% 98%;
51
- --border: 240 3.7% 15.9%;
52
- --input: 240 3.7% 15.9%;
53
- --ring: 240 4.9% 83.9%;
54
  --chart-1: 220 70% 50%;
55
  --chart-2: 160 60% 45%;
56
  --chart-3: 30 80% 55%;
@@ -166,8 +167,11 @@
166
  }
167
 
168
  .text-highlight strong {
169
- background-color: #61D790;
170
- color: #2E2F27;
 
 
 
171
  }
172
 
173
  .tokens-button {
 
2
  @tailwind components;
3
  @tailwind utilities;
4
 
5
+
6
  @layer base {
7
  :root {
8
  --background: 0 0% 100%;
9
+ --foreground: 222.2 84% 4.9%;
10
  --card: 0 0% 100%;
11
+ --card-foreground: 222.2 84% 4.9%;
12
  --popover: 0 0% 100%;
13
+ --popover-foreground: 222.2 84% 4.9%;
14
+ --primary: 222.2 47.4% 11.2%;
15
+ --primary-foreground: 210 40% 98%;
16
+ --secondary: 210 40% 96.1%;
17
+ --secondary-foreground: 222.2 47.4% 11.2%;
18
+ --muted: 210 40% 96.1%;
19
+ --muted-foreground: 215.4 16.3% 26.9%;
20
+ --accent: 210 40% 96.1%;
21
+ --accent-foreground: 222.2 47.4% 11.2%;
22
  --destructive: 0 84.2% 60.2%;
23
+ --destructive-foreground: 210 40% 98%;
24
+ --border: 214.3 31.8% 81.4%;
25
+ --input: 214.3 31.8% 81.4%;
26
+ --ring: 222.2 84% 4.9%;
27
  --radius: 0.5rem;
28
  --chart-1: 12 76% 61%;
29
  --chart-2: 173 58% 39%;
 
33
  }
34
 
35
  .dark {
36
+ --background: 222.2 84% 4.9%;
37
+ --foreground: 210 40% 98%;
38
+ --card: 222.2 84% 4.9%;
39
+ --card-foreground: 210 40% 98%;
40
+ --popover: 222.2 84% 4.9%;
41
+ --popover-foreground: 210 40% 98%;
42
+ --primary: 210 40% 98%;
43
+ --primary-foreground: 222.2 47.4% 11.2%;
44
+ --secondary: 217.2 32.6% 17.5%;
45
+ --secondary-foreground: 210 40% 98%;
46
+ --muted: 217.2 32.6% 17.5%;
47
+ --muted-foreground: 215 20.2% 85.1%;
48
+ --accent: 217.2 32.6% 17.5%;
49
+ --accent-foreground: 210 40% 98%;
50
  --destructive: 0 62.8% 30.6%;
51
+ --destructive-foreground: 210 40% 98%;
52
+ --border: 217.2 32.6% 27.5%;
53
+ --input: 217.2 32.6% 27.5%;
54
+ --ring: 212.7 26.8% 83.9;
55
  --chart-1: 220 70% 50%;
56
  --chart-2: 160 60% 45%;
57
  --chart-3: 30 80% 55%;
 
167
  }
168
 
169
  .text-highlight strong {
170
+ color: black;
171
+
172
+ .dark & {
173
+ color: white;
174
+ }
175
  }
176
 
177
  .tokens-button {
main.py CHANGED
@@ -38,7 +38,7 @@ from frontend.app import (
38
  SearchResult,
39
  SimMapButtonPoll,
40
  SimMapButtonReady,
41
- WhatIsThis,
42
  )
43
  from frontend.layout import Layout
44
 
@@ -134,9 +134,9 @@ def get(session):
134
  return Layout(Main(Home()))
135
 
136
 
137
- @rt("/what-is-this")
138
  def get():
139
- return Layout(Main(WhatIsThis()))
140
 
141
 
142
  @rt("/search")
@@ -205,7 +205,7 @@ async def get(session, request, query: str, ranking: str):
205
  # Run the embedding and query against Vespa app
206
  model = app.manager.model
207
  processor = app.manager.processor
208
- q_embs, token_to_idx = get_query_embeddings_and_token_map(processor, model, query)
209
 
210
  start = time.perf_counter()
211
  # Fetch real search results from Vespa
@@ -213,19 +213,19 @@ async def get(session, request, query: str, ranking: str):
213
  query=query,
214
  q_embs=q_embs,
215
  ranking=ranking,
216
- token_to_idx=token_to_idx,
217
  )
218
  end = time.perf_counter()
219
  print(
220
  f"Search results fetched in {end - start:.2f} seconds, Vespa says searchtime was {result['timing']['searchtime']} seconds"
221
  )
222
- search_results = vespa_app.results_to_search_results(result, token_to_idx)
223
  get_and_store_sim_maps(
224
  query_id=query_id,
225
  query=query,
226
  q_embs=q_embs,
227
  ranking=ranking,
228
- token_to_idx=token_to_idx,
229
  )
230
  return SearchResult(search_results, query_id)
231
 
@@ -247,13 +247,13 @@ async def poll_vespa_keepalive():
247
 
248
 
249
  @threaded
250
- def get_and_store_sim_maps(query_id, query: str, q_embs, ranking, token_to_idx):
251
  ranking_sim = ranking + "_sim"
252
  vespa_sim_maps = vespa_app.get_sim_maps_from_query(
253
  query=query,
254
  q_embs=q_embs,
255
  ranking=ranking_sim,
256
- token_to_idx=token_to_idx,
257
  )
258
  img_paths = [
259
  IMG_DIR / f"{query_id}_{idx}.jpg" for idx in range(len(vespa_sim_maps))
@@ -275,7 +275,7 @@ def get_and_store_sim_maps(query_id, query: str, q_embs, ranking, token_to_idx):
275
  device=app.manager.device,
276
  query=query,
277
  query_embs=q_embs,
278
- token_idx_map=token_to_idx,
279
  images=img_paths,
280
  vespa_sim_maps=vespa_sim_maps,
281
  )
@@ -303,7 +303,11 @@ async def get_sim_map(query_id: str, idx: int, token: str, token_idx: int):
303
  )
304
  else:
305
  return SimMapButtonReady(
306
- query_id=query_id, idx=idx, token=token, img_src=sim_map_path
 
 
 
 
307
  )
308
 
309
 
 
38
  SearchResult,
39
  SimMapButtonPoll,
40
  SimMapButtonReady,
41
+ AboutThisDemo,
42
  )
43
  from frontend.layout import Layout
44
 
 
134
  return Layout(Main(Home()))
135
 
136
 
137
+ @rt("/about-this-demo")
138
  def get():
139
+ return Layout(Main(AboutThisDemo()))
140
 
141
 
142
  @rt("/search")
 
205
  # Run the embedding and query against Vespa app
206
  model = app.manager.model
207
  processor = app.manager.processor
208
+ q_embs, idx_to_token = get_query_embeddings_and_token_map(processor, model, query)
209
 
210
  start = time.perf_counter()
211
  # Fetch real search results from Vespa
 
213
  query=query,
214
  q_embs=q_embs,
215
  ranking=ranking,
216
+ idx_to_token=idx_to_token,
217
  )
218
  end = time.perf_counter()
219
  print(
220
  f"Search results fetched in {end - start:.2f} seconds, Vespa says searchtime was {result['timing']['searchtime']} seconds"
221
  )
222
+ search_results = vespa_app.results_to_search_results(result, idx_to_token)
223
  get_and_store_sim_maps(
224
  query_id=query_id,
225
  query=query,
226
  q_embs=q_embs,
227
  ranking=ranking,
228
+ idx_to_token=idx_to_token,
229
  )
230
  return SearchResult(search_results, query_id)
231
 
 
247
 
248
 
249
  @threaded
250
+ def get_and_store_sim_maps(query_id, query: str, q_embs, ranking, idx_to_token):
251
  ranking_sim = ranking + "_sim"
252
  vespa_sim_maps = vespa_app.get_sim_maps_from_query(
253
  query=query,
254
  q_embs=q_embs,
255
  ranking=ranking_sim,
256
+ idx_to_token=idx_to_token,
257
  )
258
  img_paths = [
259
  IMG_DIR / f"{query_id}_{idx}.jpg" for idx in range(len(vespa_sim_maps))
 
275
  device=app.manager.device,
276
  query=query,
277
  query_embs=q_embs,
278
+ token_idx_map=idx_to_token,
279
  images=img_paths,
280
  vespa_sim_maps=vespa_sim_maps,
281
  )
 
303
  )
304
  else:
305
  return SimMapButtonReady(
306
+ query_id=query_id,
307
+ idx=idx,
308
+ token=token,
309
+ token_idx=token_idx,
310
+ img_src=sim_map_path,
311
  )
312
 
313
 
output.css CHANGED
@@ -556,24 +556,24 @@ video {
556
 
557
  :root {
558
  --background: 0 0% 100%;
559
- --foreground: 240 10% 3.9%;
560
  --card: 0 0% 100%;
561
- --card-foreground: 240 10% 3.9%;
562
  --popover: 0 0% 100%;
563
- --popover-foreground: 240 10% 3.9%;
564
- --primary: 240 5.9% 10%;
565
- --primary-foreground: 0 0% 98%;
566
- --secondary: 240 4.8% 95.9%;
567
- --secondary-foreground: 240 5.9% 10%;
568
- --muted: 240 4.8% 95.9%;
569
- --muted-foreground: 240 3.8% 46.1%;
570
- --accent: 240 4.8% 95.9%;
571
- --accent-foreground: 240 5.9% 10%;
572
  --destructive: 0 84.2% 60.2%;
573
- --destructive-foreground: 0 0% 98%;
574
- --border: 240 5.9% 90%;
575
- --input: 240 5.9% 90%;
576
- --ring: 240 5.9% 10%;
577
  --radius: 0.5rem;
578
  --chart-1: 12 76% 61%;
579
  --chart-2: 173 58% 39%;
@@ -583,25 +583,25 @@ video {
583
  }
584
 
585
  .dark {
586
- --background: 240 10% 3.9%;
587
- --foreground: 0 0% 98%;
588
- --card: 240 10% 3.9%;
589
- --card-foreground: 0 0% 98%;
590
- --popover: 240 10% 3.9%;
591
- --popover-foreground: 0 0% 98%;
592
- --primary: 0 0% 98%;
593
- --primary-foreground: 240 5.9% 10%;
594
- --secondary: 240 3.7% 15.9%;
595
- --secondary-foreground: 0 0% 98%;
596
- --muted: 240 3.7% 15.9%;
597
- --muted-foreground: 240 5% 64.9%;
598
- --accent: 240 3.7% 15.9%;
599
- --accent-foreground: 0 0% 98%;
600
  --destructive: 0 62.8% 30.6%;
601
- --destructive-foreground: 0 0% 98%;
602
- --border: 240 3.7% 15.9%;
603
- --input: 240 3.7% 15.9%;
604
- --ring: 240 4.9% 83.9%;
605
  --chart-1: 220 70% 50%;
606
  --chart-2: 160 60% 45%;
607
  --chart-3: 30 80% 55%;
@@ -813,10 +813,6 @@ body {
813
  margin-top: 8vh;
814
  }
815
 
816
- .mt-\[5vh\] {
817
- margin-top: 5vh;
818
- }
819
-
820
  .block {
821
  display: block;
822
  }
@@ -849,10 +845,6 @@ body {
849
  aspect-ratio: 1 / 1;
850
  }
851
 
852
- .aspect-auto {
853
- aspect-ratio: auto;
854
- }
855
-
856
  .size-4 {
857
  width: 1rem;
858
  height: 1rem;
@@ -915,6 +907,10 @@ body {
915
  height: 27px;
916
  }
917
 
 
 
 
 
918
  .h-\[55px\] {
919
  height: 55px;
920
  }
@@ -931,14 +927,6 @@ body {
931
  height: 1px;
932
  }
933
 
934
- .h-\[377px\] {
935
- height: 377px;
936
- }
937
-
938
- .h-\[610px\] {
939
- height: 610px;
940
- }
941
-
942
  .max-h-96 {
943
  max-height: 24rem;
944
  }
@@ -1003,14 +991,6 @@ body {
1003
  width: 100%;
1004
  }
1005
 
1006
- .w-\[377px\] {
1007
- width: 377px;
1008
- }
1009
-
1010
- .w-\[233px\] {
1011
- width: 233px;
1012
- }
1013
-
1014
  .min-w-0 {
1015
  min-width: 0px;
1016
  }
@@ -1134,6 +1114,14 @@ body {
1134
  grid-template-columns: repeat(1, minmax(0, 1fr));
1135
  }
1136
 
 
 
 
 
 
 
 
 
1137
  .grid-rows-\[auto_1fr\] {
1138
  grid-template-rows: auto 1fr;
1139
  }
@@ -1232,18 +1220,16 @@ body {
1232
  row-gap: 0.75rem;
1233
  }
1234
 
 
 
 
 
1235
  .space-x-2 > :not([hidden]) ~ :not([hidden]) {
1236
  --tw-space-x-reverse: 0;
1237
  margin-right: calc(0.5rem * var(--tw-space-x-reverse));
1238
  margin-left: calc(0.5rem * calc(1 - var(--tw-space-x-reverse)));
1239
  }
1240
 
1241
- .space-x-3 > :not([hidden]) ~ :not([hidden]) {
1242
- --tw-space-x-reverse: 0;
1243
- margin-right: calc(0.75rem * var(--tw-space-x-reverse));
1244
- margin-left: calc(0.75rem * calc(1 - var(--tw-space-x-reverse)));
1245
- }
1246
-
1247
  .space-x-4 > :not([hidden]) ~ :not([hidden]) {
1248
  --tw-space-x-reverse: 0;
1249
  margin-right: calc(1rem * var(--tw-space-x-reverse));
@@ -1330,6 +1316,10 @@ body {
1330
  border-top-width: 1px;
1331
  }
1332
 
 
 
 
 
1333
  .border-destructive {
1334
  border-color: hsl(var(--destructive));
1335
  }
@@ -1395,10 +1385,6 @@ body {
1395
  background-color: hsl(var(--muted) / 0.5);
1396
  }
1397
 
1398
- .bg-muted\/80 {
1399
- background-color: hsl(var(--muted) / 0.8);
1400
- }
1401
-
1402
  .bg-popover {
1403
  background-color: hsl(var(--popover));
1404
  }
@@ -1425,14 +1411,24 @@ body {
1425
  background-image: linear-gradient(to right, var(--tw-gradient-stops));
1426
  }
1427
 
 
 
 
 
1428
  .from-black {
1429
  --tw-gradient-from: #000 var(--tw-gradient-from-position);
1430
  --tw-gradient-to: rgb(0 0 0 / 0) var(--tw-gradient-to-position);
1431
  --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
1432
  }
1433
 
1434
- .to-gray-700 {
1435
- --tw-gradient-to: #374151 var(--tw-gradient-to-position);
 
 
 
 
 
 
1436
  }
1437
 
1438
  .bg-clip-text {
@@ -1477,6 +1473,10 @@ body {
1477
  padding: 1.5rem;
1478
  }
1479
 
 
 
 
 
1480
  .p-\[1px\] {
1481
  padding: 1px;
1482
  }
@@ -1501,6 +1501,11 @@ body {
1501
  padding-right: 1rem;
1502
  }
1503
 
 
 
 
 
 
1504
  .px-8 {
1505
  padding-left: 2rem;
1506
  padding-right: 2rem;
@@ -1564,6 +1569,10 @@ body {
1564
  padding-top: 1rem;
1565
  }
1566
 
 
 
 
 
1567
  .text-left {
1568
  text-align: left;
1569
  }
@@ -1620,11 +1629,6 @@ body {
1620
  line-height: 1rem;
1621
  }
1622
 
1623
- .text-4xl {
1624
- font-size: 2.25rem;
1625
- line-height: 2.5rem;
1626
- }
1627
-
1628
  .font-bold {
1629
  font-weight: 700;
1630
  }
@@ -2055,8 +2059,10 @@ body {
2055
  }
2056
 
2057
  .text-highlight strong {
2058
- background-color: #61D790;
2059
- color: #2E2F27;
 
 
2060
  }
2061
 
2062
  .tokens-button {
@@ -2685,11 +2691,6 @@ aside {
2685
  line-height: 1;
2686
  }
2687
 
2688
- .md\:text-xl {
2689
- font-size: 1.25rem;
2690
- line-height: 1.75rem;
2691
- }
2692
-
2693
  .md\:tracking-wide {
2694
  letter-spacing: 0.025em;
2695
  }
@@ -2715,8 +2716,10 @@ aside {
2715
  border-color: hsl(var(--destructive));
2716
  }
2717
 
2718
- .dark\:bg-muted\/40:where(.dark, .dark *) {
2719
- background-color: hsl(var(--muted) / 0.4);
 
 
2720
  }
2721
 
2722
  .dark\:from-white:where(.dark, .dark *) {
@@ -2725,8 +2728,8 @@ aside {
2725
  --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
2726
  }
2727
 
2728
- .dark\:to-gray-300:where(.dark, .dark *) {
2729
- --tw-gradient-to: #d1d5db var(--tw-gradient-to-position);
2730
  }
2731
 
2732
  .dark\:hover\:border-white:hover:where(.dark, .dark *) {
 
556
 
557
  :root {
558
  --background: 0 0% 100%;
559
+ --foreground: 222.2 84% 4.9%;
560
  --card: 0 0% 100%;
561
+ --card-foreground: 222.2 84% 4.9%;
562
  --popover: 0 0% 100%;
563
+ --popover-foreground: 222.2 84% 4.9%;
564
+ --primary: 222.2 47.4% 11.2%;
565
+ --primary-foreground: 210 40% 98%;
566
+ --secondary: 210 40% 96.1%;
567
+ --secondary-foreground: 222.2 47.4% 11.2%;
568
+ --muted: 210 40% 96.1%;
569
+ --muted-foreground: 215.4 16.3% 26.9%;
570
+ --accent: 210 40% 96.1%;
571
+ --accent-foreground: 222.2 47.4% 11.2%;
572
  --destructive: 0 84.2% 60.2%;
573
+ --destructive-foreground: 210 40% 98%;
574
+ --border: 214.3 31.8% 81.4%;
575
+ --input: 214.3 31.8% 81.4%;
576
+ --ring: 222.2 84% 4.9%;
577
  --radius: 0.5rem;
578
  --chart-1: 12 76% 61%;
579
  --chart-2: 173 58% 39%;
 
583
  }
584
 
585
  .dark {
586
+ --background: 222.2 84% 4.9%;
587
+ --foreground: 210 40% 98%;
588
+ --card: 222.2 84% 4.9%;
589
+ --card-foreground: 210 40% 98%;
590
+ --popover: 222.2 84% 4.9%;
591
+ --popover-foreground: 210 40% 98%;
592
+ --primary: 210 40% 98%;
593
+ --primary-foreground: 222.2 47.4% 11.2%;
594
+ --secondary: 217.2 32.6% 17.5%;
595
+ --secondary-foreground: 210 40% 98%;
596
+ --muted: 217.2 32.6% 17.5%;
597
+ --muted-foreground: 215 20.2% 85.1%;
598
+ --accent: 217.2 32.6% 17.5%;
599
+ --accent-foreground: 210 40% 98%;
600
  --destructive: 0 62.8% 30.6%;
601
+ --destructive-foreground: 210 40% 98%;
602
+ --border: 217.2 32.6% 27.5%;
603
+ --input: 217.2 32.6% 27.5%;
604
+ --ring: 212.7 26.8% 83.9;
605
  --chart-1: 220 70% 50%;
606
  --chart-2: 160 60% 45%;
607
  --chart-3: 30 80% 55%;
 
813
  margin-top: 8vh;
814
  }
815
 
 
 
 
 
816
  .block {
817
  display: block;
818
  }
 
845
  aspect-ratio: 1 / 1;
846
  }
847
 
 
 
 
 
848
  .size-4 {
849
  width: 1rem;
850
  height: 1rem;
 
907
  height: 27px;
908
  }
909
 
910
+ .h-\[377px\] {
911
+ height: 377px;
912
+ }
913
+
914
  .h-\[55px\] {
915
  height: 55px;
916
  }
 
927
  height: 1px;
928
  }
929
 
 
 
 
 
 
 
 
 
930
  .max-h-96 {
931
  max-height: 24rem;
932
  }
 
991
  width: 100%;
992
  }
993
 
 
 
 
 
 
 
 
 
994
  .min-w-0 {
995
  min-width: 0px;
996
  }
 
1114
  grid-template-columns: repeat(1, minmax(0, 1fr));
1115
  }
1116
 
1117
+ .grid-rows-\[1fr_1fr\] {
1118
+ grid-template-rows: 1fr 1fr;
1119
+ }
1120
+
1121
+ .grid-rows-\[auto_0px\] {
1122
+ grid-template-rows: auto 0px;
1123
+ }
1124
+
1125
  .grid-rows-\[auto_1fr\] {
1126
  grid-template-rows: auto 1fr;
1127
  }
 
1220
  row-gap: 0.75rem;
1221
  }
1222
 
1223
+ .gap-y-8 {
1224
+ row-gap: 2rem;
1225
+ }
1226
+
1227
  .space-x-2 > :not([hidden]) ~ :not([hidden]) {
1228
  --tw-space-x-reverse: 0;
1229
  margin-right: calc(0.5rem * var(--tw-space-x-reverse));
1230
  margin-left: calc(0.5rem * calc(1 - var(--tw-space-x-reverse)));
1231
  }
1232
 
 
 
 
 
 
 
1233
  .space-x-4 > :not([hidden]) ~ :not([hidden]) {
1234
  --tw-space-x-reverse: 0;
1235
  margin-right: calc(1rem * var(--tw-space-x-reverse));
 
1316
  border-top-width: 1px;
1317
  }
1318
 
1319
+ .border-dashed {
1320
+ border-style: dashed;
1321
+ }
1322
+
1323
  .border-destructive {
1324
  border-color: hsl(var(--destructive));
1325
  }
 
1385
  background-color: hsl(var(--muted) / 0.5);
1386
  }
1387
 
 
 
 
 
1388
  .bg-popover {
1389
  background-color: hsl(var(--popover));
1390
  }
 
1411
  background-image: linear-gradient(to right, var(--tw-gradient-stops));
1412
  }
1413
 
1414
+ .bg-gradient-to-t {
1415
+ background-image: linear-gradient(to top, var(--tw-gradient-stops));
1416
+ }
1417
+
1418
  .from-black {
1419
  --tw-gradient-from: #000 var(--tw-gradient-from-position);
1420
  --tw-gradient-to: rgb(0 0 0 / 0) var(--tw-gradient-to-position);
1421
  --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
1422
  }
1423
 
1424
+ .from-white {
1425
+ --tw-gradient-from: #fff var(--tw-gradient-from-position);
1426
+ --tw-gradient-to: rgb(255 255 255 / 0) var(--tw-gradient-to-position);
1427
+ --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
1428
+ }
1429
+
1430
+ .to-slate-700 {
1431
+ --tw-gradient-to: #334155 var(--tw-gradient-to-position);
1432
  }
1433
 
1434
  .bg-clip-text {
 
1473
  padding: 1.5rem;
1474
  }
1475
 
1476
+ .p-8 {
1477
+ padding: 2rem;
1478
+ }
1479
+
1480
  .p-\[1px\] {
1481
  padding: 1px;
1482
  }
 
1501
  padding-right: 1rem;
1502
  }
1503
 
1504
+ .px-5 {
1505
+ padding-left: 1.25rem;
1506
+ padding-right: 1.25rem;
1507
+ }
1508
+
1509
  .px-8 {
1510
  padding-left: 2rem;
1511
  padding-right: 2rem;
 
1569
  padding-top: 1rem;
1570
  }
1571
 
1572
+ .pt-\[7\%\] {
1573
+ padding-top: 7%;
1574
+ }
1575
+
1576
  .text-left {
1577
  text-align: left;
1578
  }
 
1629
  line-height: 1rem;
1630
  }
1631
 
 
 
 
 
 
1632
  .font-bold {
1633
  font-weight: 700;
1634
  }
 
2059
  }
2060
 
2061
  .text-highlight strong {
2062
+ color: black;
2063
+ .dark & {
2064
+ color: white;
2065
+ }
2066
  }
2067
 
2068
  .tokens-button {
 
2691
  line-height: 1;
2692
  }
2693
 
 
 
 
 
 
2694
  .md\:tracking-wide {
2695
  letter-spacing: 0.025em;
2696
  }
 
2716
  border-color: hsl(var(--destructive));
2717
  }
2718
 
2719
+ .dark\:from-slate-900:where(.dark, .dark *) {
2720
+ --tw-gradient-from: #0f172a var(--tw-gradient-from-position);
2721
+ --tw-gradient-to: rgb(15 23 42 / 0) var(--tw-gradient-to-position);
2722
+ --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
2723
  }
2724
 
2725
  .dark\:from-white:where(.dark, .dark *) {
 
2728
  --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
2729
  }
2730
 
2731
+ .dark\:to-slate-300:where(.dark, .dark *) {
2732
+ --tw-gradient-to: #cbd5e1 var(--tw-gradient-to-position);
2733
  }
2734
 
2735
  .dark\:hover\:border-white:hover:where(.dark, .dark *) {
prepare_feed_deploy.py CHANGED
@@ -1,16 +1,16 @@
1
  # %% [markdown]
2
  # # Visual PDF Retrieval - demo application
3
- #
4
  # In this notebook, we will prepare the Vespa backend application for our visual retrieval demo.
5
  # We will use ColPali as the model to extract patch vectors from images of pdf pages.
6
  # At query time, we use MaxSim to retrieve and/or (based on the configuration) rank the page results.
7
- #
8
  # To see the application in action, visit TODO:
9
- #
10
  # The web application is written in FastHTML, meaning the complete application is written in python.
11
- #
12
  # The steps we will take in this notebook are:
13
- #
14
  # 0. Setup and configuration
15
  # 1. Download the data
16
  # 2. Prepare the data
@@ -18,14 +18,14 @@
18
  # 4. Deploy the Vespa application
19
  # 5. Create the Vespa application
20
  # 6. Feed the data to the Vespa application
21
- #
22
  # All the steps that are needed to provision the Vespa application, including feeding the data, can be done from this notebook.
23
  # We have tried to make it easy for others to run this notebook, to create your own PDF Enterprise Search application using Vespa.
24
- #
25
 
26
  # %% [markdown]
27
  # ## 0. Setup and Configuration
28
- #
29
 
30
  # %%
31
  import os
@@ -83,11 +83,11 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
83
 
84
  # %% [markdown]
85
  # ### Create a free trial in Vespa Cloud
86
- #
87
  # Create a tenant from [here](https://vespa.ai/free-trial/).
88
  # The trial includes $300 credit.
89
  # Take note of your tenant name.
90
- #
91
 
92
  # %%
93
  VESPA_TENANT_NAME = "vespa-team"
@@ -95,7 +95,7 @@ VESPA_TENANT_NAME = "vespa-team"
95
  # %% [markdown]
96
  # Here, set your desired application name. (Will be created in later steps)
97
  # Note that you can not have hyphen `-` or underscore `_` in the application name.
98
- #
99
 
100
  # %%
101
  VESPA_APPLICATION_NAME = "colpalidemo2"
@@ -105,7 +105,7 @@ VESPA_SCHEMA_NAME = "pdf_page"
105
  # Next, you need to create some tokens for feeding data, and querying the application.
106
  # We recommend separate tokens for feeding and querying, (the former with write permission, and the latter with read permission).
107
  # The tokens can be created from the [Vespa Cloud console](https://console.vespa-cloud.com/) in the 'Account' -> 'Tokens' section.
108
- #
109
 
110
  # %%
111
  VESPA_TOKEN_ID_WRITE = "colpalidemo_write"
@@ -113,7 +113,7 @@ VESPA_TOKEN_ID_READ = "colpalidemo_read"
113
 
114
  # %% [markdown]
115
  # We also need to set the value of the write token to be able to feed data to the Vespa application.
116
- #
117
 
118
  # %%
119
  VESPA_CLOUD_SECRET_TOKEN = os.getenv("VESPA_CLOUD_SECRET_TOKEN") or input(
@@ -124,7 +124,7 @@ VESPA_CLOUD_SECRET_TOKEN = os.getenv("VESPA_CLOUD_SECRET_TOKEN") or input(
124
  # We will also use the Gemini API to create sample queries for our images.
125
  # You can also use other VLM's to create these queries.
126
  # Create a Gemini API key from [here](https://aistudio.google.com/app/apikey).
127
- #
128
 
129
  # %%
130
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") or input(
@@ -152,21 +152,21 @@ processor = ColPaliProcessor.from_pretrained(MODEL_NAME)
152
 
153
  # %% [markdown]
154
  # ## 1. Download PDFs
155
- #
156
  # We are going to use public reports from the Norwegian Government Pension Fund Global (also known as the Oil Fund).
157
  # The fund puts transparency at the forefront and publishes reports on its investments, holdings, and returns, as well as its strategy and governance.
158
- #
159
  # These reports are the ones we are going to use for this showcase.
160
  # Here are some sample images:
161
- #
162
  # ![Sample1](./static/img/gfpg-sample-1.png)
163
  # ![Sample2](./static/img/gfpg-sample-2.png)
164
- #
165
 
166
  # %% [markdown]
167
  # As we can see, a lot of the information is in the form of tables, charts and numbers.
168
  # These are not easily extractable using pdf-readers or OCR tools.
169
- #
170
 
171
  # %%
172
  import requests
@@ -274,8 +274,7 @@ pdfs
274
 
275
  # %% [markdown]
276
  # ## 2. Convert PDFs to Images
277
- #
278
-
279
 
280
  # %%
281
  def get_pdf_images(pdf_path):
@@ -325,17 +324,17 @@ print(f"Number of text with length == 0: {Counter(text_lengths)[0]}")
325
 
326
  # %% [markdown]
327
  # ## 3. Generate Queries
328
- #
329
  # In this step, we want to generate queries for each page image.
330
  # These will be useful for 2 reasons:
331
- #
332
  # 1. We can use these queries as typeahead suggestions in the search bar.
333
  # 2. We can use the queries to generate an evaluation dataset. See [Improving Retrieval with LLM-as-a-judge](https://blog.vespa.ai/improving-retrieval-with-llm-as-a-judge/) for a deeper dive into this topic.
334
- #
335
  # The prompt for generating queries is taken from [this](https://danielvanstrien.xyz/posts/post-with-code/colpali/2024-09-23-generate_colpali_dataset.html#an-update-retrieval-focused-prompt) wonderful blog post by Daniel van Strien.
336
- #
337
  # We will use the Gemini API to generate these queries, with `gemini-1.5-flash-8b` as the model.
338
- #
339
 
340
  # %%
341
  from pydantic import BaseModel
@@ -414,7 +413,6 @@ def generate_queries(image, prompt_text, pydantic_model):
414
  }
415
  return queries
416
 
417
-
418
  # %%
419
  for pdf in tqdm(pdf_pages):
420
  image = pdf.get("image")
@@ -490,10 +488,9 @@ with open("output/pdf_pages.json", "w") as f:
490
 
491
  # %% [markdown]
492
  # ## 4. Generate embeddings
493
- #
494
  # Now that we have the queries, we can use the ColPali model to generate embeddings for each page image.
495
- #
496
-
497
 
498
  # %%
499
  def generate_embeddings(images, model, processor, batch_size=2) -> np.ndarray:
@@ -533,7 +530,6 @@ def generate_embeddings(images, model, processor, batch_size=2) -> np.ndarray:
533
  all_embeddings = np.concatenate(embeddings_list, axis=0)
534
  return all_embeddings
535
 
536
-
537
  # %%
538
  # Generate embeddings for all images
539
  images = [pdf["image"] for pdf in pdf_pages]
@@ -544,10 +540,9 @@ embeddings.shape
544
 
545
  # %% [markdown]
546
  # ## 5. Prepare Data on Vespa Format
547
- #
548
  # Now, that we have all the data we need, all that remains is to make sure it is in the right format for Vespa.
549
- #
550
-
551
 
552
  # %%
553
  def float_to_binary_embedding(float_query_embedding: dict) -> dict:
@@ -560,7 +555,6 @@ def float_to_binary_embedding(float_query_embedding: dict) -> dict:
560
  binary_query_embeddings[k] = binary_vector
561
  return binary_query_embeddings
562
 
563
-
564
  # %%
565
  vespa_feed = []
566
  for pdf, embedding in zip(pdf_pages, embeddings):
@@ -622,7 +616,7 @@ len(vespa_feed)
622
 
623
  # %% [markdown]
624
  # ## 5. Prepare Vespa Application
625
- #
626
 
627
  # %%
628
  # Define the Vespa schema
@@ -762,12 +756,22 @@ mapfunctions = [
762
  # Define the 'bm25' rank profile
763
  colpali_bm25_profile = RankProfile(
764
  name="bm25",
765
- inputs=[("query(qt)", "tensor<float>(querytoken{}, v[128])")],
766
  first_phase="bm25(title) + bm25(text)",
767
  functions=mapfunctions,
768
- summary_features=["quantized"],
769
  )
 
 
 
 
 
 
 
 
 
 
770
  colpali_schema.add_rank_profile(colpali_bm25_profile)
 
771
 
772
  # Update the 'default' rank profile
773
  colpali_profile = RankProfile(
@@ -793,9 +797,9 @@ colpali_profile = RankProfile(
793
  ),
794
  Function(name="bm25_score", expression="bm25(title) + bm25(text)"),
795
  ],
796
- summary_features=["quantized"],
797
  )
798
  colpali_schema.add_rank_profile(colpali_profile)
 
799
 
800
  # Update the 'retrieval-and-rerank' rank profile
801
  input_query_tensors = []
@@ -846,9 +850,9 @@ colpali_retrieval_profile = RankProfile(
846
  """,
847
  ),
848
  ],
849
- summary_features=["quantized"],
850
  )
851
  colpali_schema.add_rank_profile(colpali_retrieval_profile)
 
852
 
853
  # %%
854
  from vespa.configuration.services import (
@@ -937,7 +941,7 @@ vespa_application_package = ApplicationPackage(
937
 
938
  # %% [markdown]
939
  # ## 6. Deploy Vespa Application
940
- #
941
 
942
  # %%
943
  VESPA_TEAM_API_KEY = os.getenv("VESPA_TEAM_API_KEY") or input(
@@ -962,18 +966,17 @@ print(f"Application deployed. Token endpoint URL: {endpoint_url}")
962
  # %% [markdown]
963
  # Make sure to take note of the token endpoint_url.
964
  # You need to put this in your `.env` file - `VESPA_APP_URL=https://abcd.vespa-app.cloud` - to access the Vespa application from your web application.
965
- #
966
 
967
  # %% [markdown]
968
  # ## 8. Feed Data to Vespa
969
- #
970
 
971
  # %%
972
  # Instantiate Vespa connection using token
973
  app = Vespa(url=endpoint_url, vespa_cloud_secret_token=VESPA_CLOUD_SECRET_TOKEN)
974
  app.get_application_status()
975
 
976
-
977
  # %%
978
  def callback(response: VespaResponse, id: str):
979
  if not response.is_successful():
@@ -984,3 +987,5 @@ def callback(response: VespaResponse, id: str):
984
 
985
  # Feed data into Vespa asynchronously
986
  app.feed_async_iterable(vespa_feed, schema=VESPA_SCHEMA_NAME, callback=callback)
 
 
 
1
  # %% [markdown]
2
  # # Visual PDF Retrieval - demo application
3
+ #
4
  # In this notebook, we will prepare the Vespa backend application for our visual retrieval demo.
5
  # We will use ColPali as the model to extract patch vectors from images of pdf pages.
6
  # At query time, we use MaxSim to retrieve and/or (based on the configuration) rank the page results.
7
+ #
8
  # To see the application in action, visit TODO:
9
+ #
10
  # The web application is written in FastHTML, meaning the complete application is written in python.
11
+ #
12
  # The steps we will take in this notebook are:
13
+ #
14
  # 0. Setup and configuration
15
  # 1. Download the data
16
  # 2. Prepare the data
 
18
  # 4. Deploy the Vespa application
19
  # 5. Create the Vespa application
20
  # 6. Feed the data to the Vespa application
21
+ #
22
  # All the steps that are needed to provision the Vespa application, including feeding the data, can be done from this notebook.
23
  # We have tried to make it easy for others to run this notebook, to create your own PDF Enterprise Search application using Vespa.
24
+ #
25
 
26
  # %% [markdown]
27
  # ## 0. Setup and Configuration
28
+ #
29
 
30
  # %%
31
  import os
 
83
 
84
  # %% [markdown]
85
  # ### Create a free trial in Vespa Cloud
86
+ #
87
  # Create a tenant from [here](https://vespa.ai/free-trial/).
88
  # The trial includes $300 credit.
89
  # Take note of your tenant name.
90
+ #
91
 
92
  # %%
93
  VESPA_TENANT_NAME = "vespa-team"
 
95
  # %% [markdown]
96
  # Here, set your desired application name. (Will be created in later steps)
97
  # Note that you can not have hyphen `-` or underscore `_` in the application name.
98
+ #
99
 
100
  # %%
101
  VESPA_APPLICATION_NAME = "colpalidemo2"
 
105
  # Next, you need to create some tokens for feeding data, and querying the application.
106
  # We recommend separate tokens for feeding and querying, (the former with write permission, and the latter with read permission).
107
  # The tokens can be created from the [Vespa Cloud console](https://console.vespa-cloud.com/) in the 'Account' -> 'Tokens' section.
108
+ #
109
 
110
  # %%
111
  VESPA_TOKEN_ID_WRITE = "colpalidemo_write"
 
113
 
114
  # %% [markdown]
115
  # We also need to set the value of the write token to be able to feed data to the Vespa application.
116
+ #
117
 
118
  # %%
119
  VESPA_CLOUD_SECRET_TOKEN = os.getenv("VESPA_CLOUD_SECRET_TOKEN") or input(
 
124
  # We will also use the Gemini API to create sample queries for our images.
125
  # You can also use other VLM's to create these queries.
126
  # Create a Gemini API key from [here](https://aistudio.google.com/app/apikey).
127
+ #
128
 
129
  # %%
130
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") or input(
 
152
 
153
  # %% [markdown]
154
  # ## 1. Download PDFs
155
+ #
156
  # We are going to use public reports from the Norwegian Government Pension Fund Global (also known as the Oil Fund).
157
  # The fund puts transparency at the forefront and publishes reports on its investments, holdings, and returns, as well as its strategy and governance.
158
+ #
159
  # These reports are the ones we are going to use for this showcase.
160
  # Here are some sample images:
161
+ #
162
  # ![Sample1](./static/img/gfpg-sample-1.png)
163
  # ![Sample2](./static/img/gfpg-sample-2.png)
164
+ #
165
 
166
  # %% [markdown]
167
  # As we can see, a lot of the information is in the form of tables, charts and numbers.
168
  # These are not easily extractable using pdf-readers or OCR tools.
169
+ #
170
 
171
  # %%
172
  import requests
 
274
 
275
  # %% [markdown]
276
  # ## 2. Convert PDFs to Images
277
+ #
 
278
 
279
  # %%
280
  def get_pdf_images(pdf_path):
 
324
 
325
  # %% [markdown]
326
  # ## 3. Generate Queries
327
+ #
328
  # In this step, we want to generate queries for each page image.
329
  # These will be useful for 2 reasons:
330
+ #
331
  # 1. We can use these queries as typeahead suggestions in the search bar.
332
  # 2. We can use the queries to generate an evaluation dataset. See [Improving Retrieval with LLM-as-a-judge](https://blog.vespa.ai/improving-retrieval-with-llm-as-a-judge/) for a deeper dive into this topic.
333
+ #
334
  # The prompt for generating queries is taken from [this](https://danielvanstrien.xyz/posts/post-with-code/colpali/2024-09-23-generate_colpali_dataset.html#an-update-retrieval-focused-prompt) wonderful blog post by Daniel van Strien.
335
+ #
336
  # We will use the Gemini API to generate these queries, with `gemini-1.5-flash-8b` as the model.
337
+ #
338
 
339
  # %%
340
  from pydantic import BaseModel
 
413
  }
414
  return queries
415
 
 
416
  # %%
417
  for pdf in tqdm(pdf_pages):
418
  image = pdf.get("image")
 
488
 
489
  # %% [markdown]
490
  # ## 4. Generate embeddings
491
+ #
492
  # Now that we have the queries, we can use the ColPali model to generate embeddings for each page image.
493
+ #
 
494
 
495
  # %%
496
  def generate_embeddings(images, model, processor, batch_size=2) -> np.ndarray:
 
530
  all_embeddings = np.concatenate(embeddings_list, axis=0)
531
  return all_embeddings
532
 
 
533
  # %%
534
  # Generate embeddings for all images
535
  images = [pdf["image"] for pdf in pdf_pages]
 
540
 
541
  # %% [markdown]
542
  # ## 5. Prepare Data on Vespa Format
543
+ #
544
  # Now, that we have all the data we need, all that remains is to make sure it is in the right format for Vespa.
545
+ #
 
546
 
547
  # %%
548
  def float_to_binary_embedding(float_query_embedding: dict) -> dict:
 
555
  binary_query_embeddings[k] = binary_vector
556
  return binary_query_embeddings
557
 
 
558
  # %%
559
  vespa_feed = []
560
  for pdf, embedding in zip(pdf_pages, embeddings):
 
616
 
617
  # %% [markdown]
618
  # ## 5. Prepare Vespa Application
619
+ #
620
 
621
  # %%
622
  # Define the Vespa schema
 
756
  # Define the 'bm25' rank profile
757
  colpali_bm25_profile = RankProfile(
758
  name="bm25",
759
+ inputs=[("query(qt)", "tensor<float>(querytoken{}, v[128])")],
760
  first_phase="bm25(title) + bm25(text)",
761
  functions=mapfunctions,
 
762
  )
763
+
764
+ # A function to create an inherited rank profile which also returns quantized similarity scores
765
+ def with_quantized_similarity(rank_profile: RankProfile) -> RankProfile:
766
+ return RankProfile(
767
+ name=f"{rank_profile.name}_sim",
768
+ first_phase=rank_profile.first_phase,
769
+ inherits=rank_profile.name,
770
+ summary_features=["quantized"],
771
+ )
772
+
773
  colpali_schema.add_rank_profile(colpali_bm25_profile)
774
+ colpali_schema.add_rank_profile(with_quantized_similarity(colpali_bm25_profile))
775
 
776
  # Update the 'default' rank profile
777
  colpali_profile = RankProfile(
 
797
  ),
798
  Function(name="bm25_score", expression="bm25(title) + bm25(text)"),
799
  ],
 
800
  )
801
  colpali_schema.add_rank_profile(colpali_profile)
802
+ colpali_schema.add_rank_profile(with_quantized_similarity(colpali_profile))
803
 
804
  # Update the 'retrieval-and-rerank' rank profile
805
  input_query_tensors = []
 
850
  """,
851
  ),
852
  ],
 
853
  )
854
  colpali_schema.add_rank_profile(colpali_retrieval_profile)
855
+ colpali_schema.add_rank_profile(with_quantized_similarity(colpali_retrieval_profile))
856
 
857
  # %%
858
  from vespa.configuration.services import (
 
941
 
942
  # %% [markdown]
943
  # ## 6. Deploy Vespa Application
944
+ #
945
 
946
  # %%
947
  VESPA_TEAM_API_KEY = os.getenv("VESPA_TEAM_API_KEY") or input(
 
966
  # %% [markdown]
967
  # Make sure to take note of the token endpoint_url.
968
  # You need to put this in your `.env` file - `VESPA_APP_URL=https://abcd.vespa-app.cloud` - to access the Vespa application from your web application.
969
+ #
970
 
971
  # %% [markdown]
972
  # ## 8. Feed Data to Vespa
973
+ #
974
 
975
  # %%
976
  # Instantiate Vespa connection using token
977
  app = Vespa(url=endpoint_url, vespa_cloud_secret_token=VESPA_CLOUD_SECRET_TOKEN)
978
  app.get_application_status()
979
 
 
980
  # %%
981
  def callback(response: VespaResponse, id: str):
982
  if not response.is_successful():
 
987
 
988
  # Feed data into Vespa asynchronously
989
  app.feed_async_iterable(vespa_feed, schema=VESPA_SCHEMA_NAME, callback=callback)
990
+
991
+
pyproject.toml CHANGED
@@ -18,7 +18,9 @@ dependencies = [
18
  "setuptools",
19
  "python-dotenv",
20
  "shad4fast>=1.2.1",
21
- "google-generativeai>=0.7.2"
 
 
22
  ]
23
 
24
  # dev-dependencies
 
18
  "setuptools",
19
  "python-dotenv",
20
  "shad4fast>=1.2.1",
21
+ "google-generativeai>=0.7.2",
22
+ "spacy",
23
+ "pip"
24
  ]
25
 
26
  # dev-dependencies
static/.DS_Store CHANGED
Binary files a/static/.DS_Store and b/static/.DS_Store differ
 
uv.lock CHANGED
@@ -198,6 +198,32 @@ wheels = [
198
  { url = "https://files.pythonhosted.org/packages/b1/fe/e8c672695b37eecc5cbf43e1d0638d88d66ba3a44c4d321c796f4e59167f/beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed", size = 147925 },
199
  ]
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  [[package]]
202
  name = "cachetools"
203
  version = "5.5.0"
@@ -207,6 +233,15 @@ wheels = [
207
  { url = "https://files.pythonhosted.org/packages/a4/07/14f8ad37f2d12a5ce41206c21820d8cb6561b728e51fad4530dff0552a67/cachetools-5.5.0-py3-none-any.whl", hash = "sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292", size = 9524 },
208
  ]
209
 
 
 
 
 
 
 
 
 
 
210
  [[package]]
211
  name = "certifi"
212
  version = "2024.8.30"
@@ -354,6 +389,18 @@ wheels = [
354
  { url = "https://files.pythonhosted.org/packages/00/2e/d53fa4befbf2cfa713304affc7ca780ce4fc1fd8710527771b58311a3229/click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", size = 97941 },
355
  ]
356
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  [[package]]
358
  name = "colorama"
359
  version = "0.4.6"
@@ -393,6 +440,19 @@ wheels = [
393
  { url = "https://files.pythonhosted.org/packages/e6/75/49e5bfe642f71f272236b5b2d2691cf915a7283cc0ceda56357b61daa538/comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3", size = 7180 },
394
  ]
395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
  [[package]]
397
  name = "contourpy"
398
  version = "1.3.0"
@@ -497,6 +557,29 @@ wheels = [
497
  { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321 },
498
  ]
499
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
  [[package]]
501
  name = "datasets"
502
  version = "3.0.1"
@@ -1346,6 +1429,30 @@ wheels = [
1346
  { url = "https://files.pythonhosted.org/packages/ea/8b/d7497df4a1cae9367adf21665dd1f896c2a7aeb8769ad77b662c5e2bcce7/kiwisolver-1.4.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:eb542fe7933aa09d8d8f9d9097ef37532a7df6497819d16efe4359890a2f417a", size = 55715 },
1347
  ]
1348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1349
  [[package]]
1350
  name = "loguru"
1351
  version = "0.7.2"
@@ -1455,6 +1562,61 @@ wheels = [
1455
  { url = "https://files.pythonhosted.org/packages/ba/b2/6a22fb5c0885da3b00e116aee81f0b829ec9ac8f736cd414b4a09413fc7d/lxml-5.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6e91cf736959057f7aac7adfc83481e03615a8e8dd5758aa1d95ea69e8931dba", size = 3487557 },
1456
  ]
1457
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1458
  [[package]]
1459
  name = "markdown-it-py"
1460
  version = "3.0.0"
@@ -1728,6 +1890,29 @@ wheels = [
1728
  { url = "https://files.pythonhosted.org/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e", size = 146741 },
1729
  ]
1730
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1731
  [[package]]
1732
  name = "nbformat"
1733
  version = "5.10.4"
@@ -2101,6 +2286,15 @@ wheels = [
2101
  { url = "https://files.pythonhosted.org/packages/52/3b/ce7a01026a7cf46e5452afa86f97a5e88ca97f562cafa76570178ab56d8d/pillow-10.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0755ffd4a0c6f267cccbae2e9903d95477ca2f77c4fcf3a3a09570001856c8a5", size = 2554661 },
2102
  ]
2103
 
 
 
 
 
 
 
 
 
 
2104
  [[package]]
2105
  name = "platformdirs"
2106
  version = "4.3.6"
@@ -2123,6 +2317,33 @@ wheels = [
2123
  { url = "https://files.pythonhosted.org/packages/ed/e2/134a4c381f63e8498314f15d5f8db32bdd9ee40806aba34c3e270915a629/polars-1.10.0-cp39-abi3-win_amd64.whl", hash = "sha256:182e03bd3486490c980a59cbae0be53c0688f6f6f6a2bccc28e07cc1b7f8a4b5", size = 32868791 },
2124
  ]
2125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2126
  [[package]]
2127
  name = "prompt-toolkit"
2128
  version = "3.0.48"
@@ -3163,6 +3384,18 @@ wheels = [
3163
  { url = "https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254", size = 11053 },
3164
  ]
3165
 
 
 
 
 
 
 
 
 
 
 
 
 
3166
  [[package]]
3167
  name = "sniffio"
3168
  version = "1.3.1"
@@ -3181,6 +3414,68 @@ wheels = [
3181
  { url = "https://files.pythonhosted.org/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9", size = 36186 },
3182
  ]
3183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3184
  [[package]]
3185
  name = "sqlite-minutils"
3186
  version = "3.37.0.post4"
@@ -3193,6 +3488,32 @@ wheels = [
3193
  { url = "https://files.pythonhosted.org/packages/71/fc/c8718f39d96169235e12be7272f96d1365f3193cb46216f3930d3327b292/sqlite_minutils-3.37.0.post4-py3-none-any.whl", hash = "sha256:8de2b6dacf7bc34f7d3d4b3c576dd8f3ab725514b331b53e2b23a5d130fd1d1f", size = 77737 },
3194
  ]
3195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3196
  [[package]]
3197
  name = "stack-data"
3198
  version = "0.6.3"
@@ -3240,6 +3561,43 @@ wheels = [
3240
  { url = "https://files.pythonhosted.org/packages/b6/cb/b86984bed139586d01532a587464b5805f12e397594f19f931c4c2fbfa61/tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539", size = 28169 },
3241
  ]
3242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3243
  [[package]]
3244
  name = "threadpoolctl"
3245
  version = "3.5.0"
@@ -3419,7 +3777,7 @@ name = "triton"
3419
  version = "3.1.0"
3420
  source = { registry = "https://pypi.org/simple" }
3421
  dependencies = [
3422
- { name = "filelock" },
3423
  ]
3424
  wheels = [
3425
  { url = "https://files.pythonhosted.org/packages/98/29/69aa56dc0b2eb2602b553881e34243475ea2afd9699be042316842788ff5/triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b0dd10a925263abbe9fa37dcde67a5e9b2383fc269fdf59f5657cac38c5d1d8", size = 209460013 },
@@ -3582,12 +3940,14 @@ dependencies = [
3582
  { name = "einops" },
3583
  { name = "google-generativeai" },
3584
  { name = "huggingface-hub" },
 
3585
  { name = "pypdf" },
3586
  { name = "python-dotenv" },
3587
  { name = "python-fasthtml" },
3588
  { name = "pyvespa" },
3589
  { name = "setuptools" },
3590
  { name = "shad4fast" },
 
3591
  { name = "torch" },
3592
  { name = "vespacli" },
3593
  { name = "vidore-benchmark", extra = ["interpretability"] },
@@ -3620,6 +3980,7 @@ requires-dist = [
3620
  { name = "ipykernel", marker = "extra == 'feed'" },
3621
  { name = "jupytext", marker = "extra == 'feed'" },
3622
  { name = "pdf2image", marker = "extra == 'feed'" },
 
3623
  { name = "pydantic", marker = "extra == 'feed'" },
3624
  { name = "pypdf" },
3625
  { name = "python-dotenv" },
@@ -3629,11 +3990,24 @@ requires-dist = [
3629
  { name = "ruff", marker = "extra == 'dev'" },
3630
  { name = "setuptools" },
3631
  { name = "shad4fast", specifier = ">=1.2.1" },
 
3632
  { name = "torch" },
3633
  { name = "vespacli" },
3634
  { name = "vidore-benchmark", extras = ["interpretability"], specifier = ">=4.0.0,<5.0.0" },
3635
  ]
3636
 
 
 
 
 
 
 
 
 
 
 
 
 
3637
  [[package]]
3638
  name = "watchfiles"
3639
  version = "0.24.0"
@@ -3708,6 +4082,26 @@ wheels = [
3708
  { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 },
3709
  ]
3710
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3711
  [[package]]
3712
  name = "websockets"
3713
  version = "13.1"
@@ -3776,6 +4170,45 @@ wheels = [
3776
  { url = "https://files.pythonhosted.org/packages/0a/e6/a7d828fef907843b2a5773ebff47fb79ac0c1c88d60c0ca9530ee941e248/win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad", size = 3604 },
3777
  ]
3778
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3779
  [[package]]
3780
  name = "xxhash"
3781
  version = "3.5.0"
 
198
  { url = "https://files.pythonhosted.org/packages/b1/fe/e8c672695b37eecc5cbf43e1d0638d88d66ba3a44c4d321c796f4e59167f/beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed", size = 147925 },
199
  ]
200
 
201
+ [[package]]
202
+ name = "blis"
203
+ version = "0.7.11"
204
+ source = { registry = "https://pypi.org/simple" }
205
+ dependencies = [
206
+ { name = "numpy" },
207
+ ]
208
+ sdist = { url = "https://files.pythonhosted.org/packages/51/8c/60c85350f2e1c9647df580083a0f6acc686ef32d1a91f4ab0c624b3ff867/blis-0.7.11.tar.gz", hash = "sha256:cec6d48f75f7ac328ae1b6fbb372dde8c8a57c89559172277f66e01ff08d4d42", size = 2897107 }
209
+ wheels = [
210
+ { url = "https://files.pythonhosted.org/packages/41/8b/b61978aa36de134d1056c55c2efe818042df68aff211b91fa5b1b9ae3f85/blis-0.7.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cd5fba34c5775e4c440d80e4dea8acb40e2d3855b546e07c4e21fad8f972404c", size = 6127109 },
211
+ { url = "https://files.pythonhosted.org/packages/3d/95/f23fbbf3010bf057302ebbb8ad697fb9a0f8624e833025c4a58bfb8d3389/blis-0.7.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:31273d9086cab9c56986d478e3ed6da6752fa4cdd0f7b5e8e5db30827912d90d", size = 1110252 },
212
+ { url = "https://files.pythonhosted.org/packages/fd/82/8d9576904833a8575ae6758dd8c1a2152fdec1705dd3ae65a10e99d8896a/blis-0.7.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d06883f83d4c8de8264154f7c4a420b4af323050ed07398c1ff201c34c25c0d2", size = 1711161 },
213
+ { url = "https://files.pythonhosted.org/packages/9b/81/55092e1c016fe05ef7a57623920209012f05e8b897acbad355c9bf854181/blis-0.7.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee493683e3043650d4413d531e79e580d28a3c7bdd184f1b9cfa565497bda1e7", size = 10171589 },
214
+ { url = "https://files.pythonhosted.org/packages/ad/65/d9fd07e11499e0a3162c6d61ae430172125e5c340c89c40504189d5299b9/blis-0.7.11-cp310-cp310-win_amd64.whl", hash = "sha256:a73945a9d635eea528bccfdfcaa59dd35bd5f82a4a40d5ca31f08f507f3a6f81", size = 6620069 },
215
+ { url = "https://files.pythonhosted.org/packages/c7/59/c8010f380a16709e6d3ef5534845d1ca1e689079914ec67ab60f57edfc37/blis-0.7.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1b68df4d01d62f9adaef3dad6f96418787265a6878891fc4e0fabafd6d02afba", size = 6123547 },
216
+ { url = "https://files.pythonhosted.org/packages/a8/73/0a9d4e7f6e78ef270e3a4532b17e060a02087590cf615ba9943fd1a283e9/blis-0.7.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:162e60d941a8151418d558a94ee5547cb1bbeed9f26b3b6f89ec9243f111a201", size = 1106895 },
217
+ { url = "https://files.pythonhosted.org/packages/51/f7/a5d9a0be0729f4172248dbae74d7e02b139b3a32cc29650d3ade7ab91fea/blis-0.7.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:686a7d0111d5ba727cd62f374748952fd6eb74701b18177f525b16209a253c01", size = 1707389 },
218
+ { url = "https://files.pythonhosted.org/packages/dc/23/eb01450dc284a7ea8ebc0e5296f1f8fdbe5299169f4c318f836b4284a119/blis-0.7.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0421d6e44cda202b113a34761f9a062b53f8c2ae8e4ec8325a76e709fca93b6e", size = 10172888 },
219
+ { url = "https://files.pythonhosted.org/packages/2f/09/da0592c74560cc33396504698122f7a56747c82a5e072ca7d2c3397898e1/blis-0.7.11-cp311-cp311-win_amd64.whl", hash = "sha256:0dc9dcb3843045b6b8b00432409fd5ee96b8344a324e031bfec7303838c41a1a", size = 6602835 },
220
+ { url = "https://files.pythonhosted.org/packages/e2/12/90897bc489626cb71e51ce8bb89e492fabe96a57811e53159c0f74ae90ec/blis-0.7.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dadf8713ea51d91444d14ad4104a5493fa7ecc401bbb5f4a203ff6448fadb113", size = 6121528 },
221
+ { url = "https://files.pythonhosted.org/packages/e2/5d/67a3f6b6108c39d3fd1cf55a7dca9267152190dad419c9de6d764b3708ca/blis-0.7.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5bcdaf370f03adaf4171d6405a89fa66cb3c09399d75fc02e1230a78cd2759e4", size = 1105039 },
222
+ { url = "https://files.pythonhosted.org/packages/03/62/0d214dde0703863ed2d3dabb3f10606f7f55ac4eb07a52c3906601331b63/blis-0.7.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7de19264b1d49a178bf8035406d0ae77831f3bfaa3ce02942964a81a202abb03", size = 1701009 },
223
+ { url = "https://files.pythonhosted.org/packages/66/aa/bcbd1c6b1c7dfd717ff5c899a1c8adcc6b3e391fb7a0b00fdc64e4e54235/blis-0.7.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ea55c6a4a60fcbf6a0fdce40df6e254451ce636988323a34b9c94b583fc11e5", size = 10161187 },
224
+ { url = "https://files.pythonhosted.org/packages/9a/91/4aea63dccee6491a54c630d9817656a886e086ab97222e2d8101d8cdf894/blis-0.7.11-cp312-cp312-win_amd64.whl", hash = "sha256:5a305dbfc96d202a20d0edd6edf74a406b7e1404f4fa4397d24c68454e60b1b4", size = 6624079 },
225
+ ]
226
+
227
  [[package]]
228
  name = "cachetools"
229
  version = "5.5.0"
 
233
  { url = "https://files.pythonhosted.org/packages/a4/07/14f8ad37f2d12a5ce41206c21820d8cb6561b728e51fad4530dff0552a67/cachetools-5.5.0-py3-none-any.whl", hash = "sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292", size = 9524 },
234
  ]
235
 
236
+ [[package]]
237
+ name = "catalogue"
238
+ version = "2.0.10"
239
+ source = { registry = "https://pypi.org/simple" }
240
+ sdist = { url = "https://files.pythonhosted.org/packages/38/b4/244d58127e1cdf04cf2dc7d9566f0d24ef01d5ce21811bab088ecc62b5ea/catalogue-2.0.10.tar.gz", hash = "sha256:4f56daa940913d3f09d589c191c74e5a6d51762b3a9e37dd53b7437afd6cda15", size = 19561 }
241
+ wheels = [
242
+ { url = "https://files.pythonhosted.org/packages/9e/96/d32b941a501ab566a16358d68b6eb4e4acc373fab3c3c4d7d9e649f7b4bb/catalogue-2.0.10-py3-none-any.whl", hash = "sha256:58c2de0020aa90f4a2da7dfad161bf7b3b054c86a5f09fcedc0b2b740c109a9f", size = 17325 },
243
+ ]
244
+
245
  [[package]]
246
  name = "certifi"
247
  version = "2024.8.30"
 
389
  { url = "https://files.pythonhosted.org/packages/00/2e/d53fa4befbf2cfa713304affc7ca780ce4fc1fd8710527771b58311a3229/click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", size = 97941 },
390
  ]
391
 
392
+ [[package]]
393
+ name = "cloudpathlib"
394
+ version = "0.20.0"
395
+ source = { registry = "https://pypi.org/simple" }
396
+ dependencies = [
397
+ { name = "typing-extensions", marker = "python_full_version < '3.11'" },
398
+ ]
399
+ sdist = { url = "https://files.pythonhosted.org/packages/71/0b/a47d78ed2816db100543b504fdbfc2070f422aac858e6bcf775713e37b8a/cloudpathlib-0.20.0.tar.gz", hash = "sha256:f6ef7ca409a510f7ba4639ba50ab3fc5b6dee82d6dff0d7f5715fd0c9ab35891", size = 45149 }
400
+ wheels = [
401
+ { url = "https://files.pythonhosted.org/packages/1f/6e/b64600156934dab14cc8b403095a9ea8bd722aad2e775673c68346b76220/cloudpathlib-0.20.0-py3-none-any.whl", hash = "sha256:7af3bcefbf73392ae7f31c08b3660ec31607f8c01b7f6262d4d73469a845f641", size = 52547 },
402
+ ]
403
+
404
  [[package]]
405
  name = "colorama"
406
  version = "0.4.6"
 
440
  { url = "https://files.pythonhosted.org/packages/e6/75/49e5bfe642f71f272236b5b2d2691cf915a7283cc0ceda56357b61daa538/comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3", size = 7180 },
441
  ]
442
 
443
+ [[package]]
444
+ name = "confection"
445
+ version = "0.1.5"
446
+ source = { registry = "https://pypi.org/simple" }
447
+ dependencies = [
448
+ { name = "pydantic" },
449
+ { name = "srsly" },
450
+ ]
451
+ sdist = { url = "https://files.pythonhosted.org/packages/51/d3/57c6631159a1b48d273b40865c315cf51f89df7a9d1101094ef12e3a37c2/confection-0.1.5.tar.gz", hash = "sha256:8e72dd3ca6bd4f48913cd220f10b8275978e740411654b6e8ca6d7008c590f0e", size = 38924 }
452
+ wheels = [
453
+ { url = "https://files.pythonhosted.org/packages/0c/00/3106b1854b45bd0474ced037dfe6b73b90fe68a68968cef47c23de3d43d2/confection-0.1.5-py3-none-any.whl", hash = "sha256:e29d3c3f8eac06b3f77eb9dfb4bf2fc6bcc9622a98ca00a698e3d019c6430b14", size = 35451 },
454
+ ]
455
+
456
  [[package]]
457
  name = "contourpy"
458
  version = "1.3.0"
 
557
  { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321 },
558
  ]
559
 
560
+ [[package]]
561
+ name = "cymem"
562
+ version = "2.0.8"
563
+ source = { registry = "https://pypi.org/simple" }
564
+ sdist = { url = "https://files.pythonhosted.org/packages/36/32/f4a457fc6c160a9e72b15dab1ca14ca5c8869074638bca8bfc26120c04e9/cymem-2.0.8.tar.gz", hash = "sha256:8fb09d222e21dcf1c7e907dc85cf74501d4cea6c4ed4ac6c9e016f98fb59cbbf", size = 9836 }
565
+ wheels = [
566
+ { url = "https://files.pythonhosted.org/packages/06/e8/0ab9faadd0911307c4158cc52abcaae6141283abb17275326b4d3b99089f/cymem-2.0.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:77b5d3a73c41a394efd5913ab7e48512054cd2dabb9582d489535456641c7666", size = 41618 },
567
+ { url = "https://files.pythonhosted.org/packages/83/bb/21dcb7cb06c97fd99019369071f0b9ad544c3db68343abbceb283e8a5223/cymem-2.0.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bd33da892fb560ba85ea14b1528c381ff474048e861accc3366c8b491035a378", size = 41017 },
568
+ { url = "https://files.pythonhosted.org/packages/42/f0/a5cfe24f98b9fa1c6552e7d6f3e67db3a2bd9d68cc3946651cd53513f588/cymem-2.0.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29a551eda23eebd6d076b855f77a5ed14a1d1cae5946f7b3cb5de502e21b39b0", size = 44037 },
569
+ { url = "https://files.pythonhosted.org/packages/e9/13/3bed1a1d1cce7937eb797d760c0cca973dbdc1891ad7e2f066ae418fd697/cymem-2.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8260445652ae5ab19fff6851f32969a7b774f309162e83367dd0f69aac5dbf7", size = 46116 },
570
+ { url = "https://files.pythonhosted.org/packages/51/12/4aa9eec680c6d12b2275d479e159c3d063d7c757175063dd45386e15b39d/cymem-2.0.8-cp310-cp310-win_amd64.whl", hash = "sha256:a63a2bef4c7e0aec7c9908bca0a503bf91ac7ec18d41dd50dc7dff5d994e4387", size = 39048 },
571
+ { url = "https://files.pythonhosted.org/packages/20/1f/2ae07056430a0276e0cbd765652db82ea153c5fb2a3d753fbffd553827d5/cymem-2.0.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6b84b780d52cb2db53d4494fe0083c4c5ee1f7b5380ceaea5b824569009ee5bd", size = 41935 },
572
+ { url = "https://files.pythonhosted.org/packages/d7/f6/67babf1439cdd6d46e4e805616bee84981305c80e562320c293712f54034/cymem-2.0.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0d5f83dc3cb5a39f0e32653cceb7c8ce0183d82f1162ca418356f4a8ed9e203e", size = 41235 },
573
+ { url = "https://files.pythonhosted.org/packages/bb/3b/3d6b284c82be7571c0a67b11edce486f404971b4ec849fac4a679f85f93a/cymem-2.0.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ac218cf8a43a761dc6b2f14ae8d183aca2bbb85b60fe316fd6613693b2a7914", size = 44173 },
574
+ { url = "https://files.pythonhosted.org/packages/e5/bc/761acaf88b1fa69a6b75b55c24fbd8b47dab1a3c414d9512e907a646a048/cymem-2.0.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42c993589d1811ec665d37437d5677b8757f53afadd927bf8516ac8ce2d3a50c", size = 46333 },
575
+ { url = "https://files.pythonhosted.org/packages/c1/c3/dd044e6f62a3d317c461f6f0c153c6573ed13025752d779e514000c15dd2/cymem-2.0.8-cp311-cp311-win_amd64.whl", hash = "sha256:ab3cf20e0eabee9b6025ceb0245dadd534a96710d43fb7a91a35e0b9e672ee44", size = 39132 },
576
+ { url = "https://files.pythonhosted.org/packages/a3/f8/030ee2fc2665f7d2e62079299e593a79a661b8a32f69653fee6cc0cd2f30/cymem-2.0.8-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cb51fddf1b920abb1f2742d1d385469bc7b4b8083e1cfa60255e19bc0900ccb5", size = 42267 },
577
+ { url = "https://files.pythonhosted.org/packages/14/f4/fb926be8f0d826f35eb86e021a1cbdc67966fa0f2ce94cd24ad898260b9c/cymem-2.0.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9235957f8c6bc2574a6a506a1687164ad629d0b4451ded89d49ebfc61b52660c", size = 41391 },
578
+ { url = "https://files.pythonhosted.org/packages/8a/77/70f8b77c4db30e5765092033e283aadd51ad78364f10cd2d331a1f158fcb/cymem-2.0.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2cc38930ff5409f8d61f69a01e39ecb185c175785a1c9bec13bcd3ac8a614ba", size = 44170 },
579
+ { url = "https://files.pythonhosted.org/packages/3b/59/1cc0df0f8a5fb90412cfc7eb084ceeb079f4349232c422e10e502eb255c3/cymem-2.0.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bf49e3ea2c441f7b7848d5c61b50803e8cbd49541a70bb41ad22fce76d87603", size = 46653 },
580
+ { url = "https://files.pythonhosted.org/packages/35/e0/34b11adc80502f0760ce2892dfdfcd8a7f450acd3147156c98620cb4071d/cymem-2.0.8-cp312-cp312-win_amd64.whl", hash = "sha256:ecd12e3bacf3eed5486e4cd8ede3c12da66ee0e0a9d0ae046962bc2bb503acef", size = 39052 },
581
+ ]
582
+
583
  [[package]]
584
  name = "datasets"
585
  version = "3.0.1"
 
1429
  { url = "https://files.pythonhosted.org/packages/ea/8b/d7497df4a1cae9367adf21665dd1f896c2a7aeb8769ad77b662c5e2bcce7/kiwisolver-1.4.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:eb542fe7933aa09d8d8f9d9097ef37532a7df6497819d16efe4359890a2f417a", size = 55715 },
1430
  ]
1431
 
1432
+ [[package]]
1433
+ name = "langcodes"
1434
+ version = "3.4.1"
1435
+ source = { registry = "https://pypi.org/simple" }
1436
+ dependencies = [
1437
+ { name = "language-data" },
1438
+ ]
1439
+ sdist = { url = "https://files.pythonhosted.org/packages/f4/79/adb488d97c8bad22fe69a1966c3fb47eb38b22598324d8ffbc5e88bc475d/langcodes-3.4.1.tar.gz", hash = "sha256:a24879fed238013ac3af2424b9d1124e38b4a38b2044fd297c8ff38e5912e718", size = 190832 }
1440
+ wheels = [
1441
+ { url = "https://files.pythonhosted.org/packages/e8/fc/79a44f4bc84b8e669dad3ca5652263477c7ecfc830d09777a214317915f9/langcodes-3.4.1-py3-none-any.whl", hash = "sha256:68f686fc3d358f222674ecf697ddcee3ace3c2fe325083ecad2543fd28a20e77", size = 182392 },
1442
+ ]
1443
+
1444
+ [[package]]
1445
+ name = "language-data"
1446
+ version = "1.2.0"
1447
+ source = { registry = "https://pypi.org/simple" }
1448
+ dependencies = [
1449
+ { name = "marisa-trie" },
1450
+ ]
1451
+ sdist = { url = "https://files.pythonhosted.org/packages/ad/53/d3657025d32bfacc832769ab3c925f8f4ad2165cd2c8467c2446b21400d1/language_data-1.2.0.tar.gz", hash = "sha256:82a86050bbd677bfde87d97885b17566cfe75dad3ac4f5ce44b52c28f752e773", size = 5137321 }
1452
+ wheels = [
1453
+ { url = "https://files.pythonhosted.org/packages/12/5f/139464da89c49afcc8bb97ebad48818a535220ce01b1f24c61fb80dbe4d0/language_data-1.2.0-py3-none-any.whl", hash = "sha256:77d5cab917f91ee0b2f1aa7018443e911cf8985ef734ca2ba3940770f6a3816b", size = 5385777 },
1454
+ ]
1455
+
1456
  [[package]]
1457
  name = "loguru"
1458
  version = "0.7.2"
 
1562
  { url = "https://files.pythonhosted.org/packages/ba/b2/6a22fb5c0885da3b00e116aee81f0b829ec9ac8f736cd414b4a09413fc7d/lxml-5.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6e91cf736959057f7aac7adfc83481e03615a8e8dd5758aa1d95ea69e8931dba", size = 3487557 },
1563
  ]
1564
 
1565
+ [[package]]
1566
+ name = "marisa-trie"
1567
+ version = "1.2.1"
1568
+ source = { registry = "https://pypi.org/simple" }
1569
+ dependencies = [
1570
+ { name = "setuptools" },
1571
+ ]
1572
+ sdist = { url = "https://files.pythonhosted.org/packages/31/15/9d9743897e4450b2de199ee673b50cb018980c4ced477d41cf91304a85e3/marisa_trie-1.2.1.tar.gz", hash = "sha256:3a27c408e2aefc03e0f1d25b2ff2afb85aac3568f6fa2ae2a53b57a2e87ce29d", size = 416124 }
1573
+ wheels = [
1574
+ { url = "https://files.pythonhosted.org/packages/e4/83/ccf5b33f2123f3110705c608f8e0caa82002626511aafafc58f82e50d322/marisa_trie-1.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a2eb41d2f9114d8b7bd66772c237111e00d2bae2260824560eaa0a1e291ce9e8", size = 362200 },
1575
+ { url = "https://files.pythonhosted.org/packages/9d/74/f7ce1fc2ee480c7f8ceadd9b992caceaba442a97e5e99d6aea00d3635a0b/marisa_trie-1.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9e956e6a46f604b17d570901e66f5214fb6f658c21e5e7665deace236793cef6", size = 192309 },
1576
+ { url = "https://files.pythonhosted.org/packages/e4/52/5dbbc13e57ce54c2ef0d04962d7d8f66edc69ed34310c734a2913199a581/marisa_trie-1.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bd45142501300e7538b2e544905580918b67b1c82abed1275fe4c682c95635fa", size = 174713 },
1577
+ { url = "https://files.pythonhosted.org/packages/57/49/2580372f3f980aea95c23d05b2c1d3bbb9ee1ab8cfd441545153e44f1be7/marisa_trie-1.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8443d116c612cfd1961fbf76769faf0561a46d8e317315dd13f9d9639ad500c", size = 1314808 },
1578
+ { url = "https://files.pythonhosted.org/packages/5a/ba/e12a4d450f265414cc68df6a116a78beece72b95f774f04d29cd48e08d19/marisa_trie-1.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:875a6248e60fbb48d947b574ffa4170f34981f9e579bde960d0f9a49ea393ecc", size = 1346678 },
1579
+ { url = "https://files.pythonhosted.org/packages/b2/81/8e130cb1eea741fd17694d821096f7ec9841f0e3d3c69b740257f5eeafa8/marisa_trie-1.2.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:746a7c60a17fccd3cfcfd4326926f02ea4fcdfc25d513411a0c4fc8e4a1ca51f", size = 1307254 },
1580
+ { url = "https://files.pythonhosted.org/packages/d7/d0/3deb5ea2bf7e4d845339875dbb31f3c3f66c8d6568723db1d137fb08a91c/marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e70869737cc0e5bd903f620667da6c330d6737048d1f44db792a6af68a1d35be", size = 2194712 },
1581
+ { url = "https://files.pythonhosted.org/packages/9c/5f/b38d728dd30954816497b53425cfaddaf7b93ac0912db5911888f191b07a/marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:06b099dd743676dbcd8abd8465ceac8f6d97d8bfaabe2c83b965495523b4cef2", size = 2355625 },
1582
+ { url = "https://files.pythonhosted.org/packages/7e/4f/61c0faa9ae9e53600a1b7a0c367bc9db1a4fdc625402ec232c755a05e094/marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d2a82eb21afdaf22b50d9b996472305c05ca67fc4ff5a026a220320c9c961db6", size = 2290290 },
1583
+ { url = "https://files.pythonhosted.org/packages/7c/7d/713b970fb3043248881ed776dbf4d54918398aa5dde843a38711d0d62c8f/marisa_trie-1.2.1-cp310-cp310-win32.whl", hash = "sha256:8951e7ce5d3167fbd085703b4cbb3f47948ed66826bef9a2173c379508776cf5", size = 130743 },
1584
+ { url = "https://files.pythonhosted.org/packages/cc/94/3d619cc82c30daeacd18a88674f4e6540ebfb7b4b7752ca0552793be80cf/marisa_trie-1.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:5685a14b3099b1422c4f59fa38b0bf4b5342ee6cc38ae57df9666a0b28eeaad3", size = 151891 },
1585
+ { url = "https://files.pythonhosted.org/packages/4a/93/ffb01dfa22b6eee918e798e0bc3487427036c608aa4c065725f31aaf4104/marisa_trie-1.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ed3fb4ed7f2084597e862bcd56c56c5529e773729a426c083238682dba540e98", size = 362823 },
1586
+ { url = "https://files.pythonhosted.org/packages/6d/1d/5c36500ac350c278c9bdfd88e17fa846fa4136d75597c167141ed973cdf2/marisa_trie-1.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fe69fb9ffb2767746181f7b3b29bbd3454d1d24717b5958e030494f3d3cddf3", size = 192741 },
1587
+ { url = "https://files.pythonhosted.org/packages/e8/04/87dd0840f3f720e511eba56193c02bf64d7d96df1ca9f6d19994f55154be/marisa_trie-1.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4728ed3ae372d1ea2cdbd5eaa27b8f20a10e415d1f9d153314831e67d963f281", size = 174995 },
1588
+ { url = "https://files.pythonhosted.org/packages/c9/51/9e903a7e13b7593e2e675d0ec4c390ca076dc5df1c1a0d5e85a513b886a3/marisa_trie-1.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8cf4f25cf895692b232f49aa5397af6aba78bb679fb917a05fce8d3cb1ee446d", size = 1384728 },
1589
+ { url = "https://files.pythonhosted.org/packages/e8/3f/7362a5ac60c2b0aad0f52cd57e7bd0c708f20d2660d8df85360f3d8f1c4b/marisa_trie-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7cca7f96236ffdbf49be4b2e42c132e3df05968ac424544034767650913524de", size = 1412620 },
1590
+ { url = "https://files.pythonhosted.org/packages/1f/bc/aaa3eaf6875f78a204a8da9692d56e3a36f89997dad2c388628385614576/marisa_trie-1.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d7eb20bf0e8b55a58d2a9b518aabc4c18278787bdba476c551dd1c1ed109e509", size = 1361555 },
1591
+ { url = "https://files.pythonhosted.org/packages/18/98/e11b5a6206c5d110f32adab37fa84a85410d684e9c731acdd5c9250e2ce4/marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b1ec93f0d1ee6d7ab680a6d8ea1a08bf264636358e92692072170032dda652ba", size = 2257717 },
1592
+ { url = "https://files.pythonhosted.org/packages/d2/9d/6b4a40867875e738a67c5b29f83e2e490a66bd9067ace3dd9a5c497e2b7f/marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e2699255d7ac610dee26d4ae7bda5951d05c7d9123a22e1f7c6a6f1964e0a4e4", size = 2417044 },
1593
+ { url = "https://files.pythonhosted.org/packages/fe/61/e25613c72f2931757334b8bcf6b501569ef713f5ee9c6c7688ec460bd720/marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c484410911182457a8a1a0249d0c09c01e2071b78a0a8538cd5f7fa45589b13a", size = 2351960 },
1594
+ { url = "https://files.pythonhosted.org/packages/19/0a/a90ccaf3eb476d13ec261f80c6c52defaf10ebc7f35eb2bcd7dfb533aef7/marisa_trie-1.2.1-cp311-cp311-win32.whl", hash = "sha256:ad548117744b2bcf0e3d97374608be0a92d18c2af13d98b728d37cd06248e571", size = 130446 },
1595
+ { url = "https://files.pythonhosted.org/packages/fc/98/574b4e143e0a2f5f71af8716b6c4a8a46220f75a6e0847ce7d11ee0ba4aa/marisa_trie-1.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:436f62d27714970b9cdd3b3c41bdad046f260e62ebb0daa38125ef70536fc73b", size = 152037 },
1596
+ { url = "https://files.pythonhosted.org/packages/4e/bf/8bd4ac8436b33fd46c9e1ffe3c2a131cd9744cc1649dbbe13308f744ef2b/marisa_trie-1.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:638506eacf20ca503fff72221a7e66a6eadbf28d6a4a6f949fcf5b1701bb05ec", size = 360041 },
1597
+ { url = "https://files.pythonhosted.org/packages/ab/dd/4d3151e302e66ae387885f6ec265bd189e096b0c43c1379bfd9a3b9d2543/marisa_trie-1.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:de1665eaafefa48a308e4753786519888021740501a15461c77bdfd57638e6b4", size = 190520 },
1598
+ { url = "https://files.pythonhosted.org/packages/00/28/ae5991c74fb90b173167a366a634c83445f948ad044d37287b478d6b457e/marisa_trie-1.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f713af9b8aa66a34cd3a78c7d150a560a75734713abe818a69021fd269e927fa", size = 174175 },
1599
+ { url = "https://files.pythonhosted.org/packages/5a/6a/fbfa89a8680eaabc6847a6c421e65427c43182db0c4bdb60e1516c81c822/marisa_trie-1.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2a7d00f53f4945320b551bccb826b3fb26948bde1a10d50bb9802fabb611b10", size = 1354995 },
1600
+ { url = "https://files.pythonhosted.org/packages/9e/4c/2ba0b385e5f64ca4ddb0c10ec52ddf881bc4521f135948786fc339d1d6c8/marisa_trie-1.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98042040d1d6085792e8d0f74004fc0f5f9ca6091c298f593dd81a22a4643854", size = 1390989 },
1601
+ { url = "https://files.pythonhosted.org/packages/6b/22/0791ed3045c91d0938345a86be472fc7c188b894f16c5dfad2ef31e7f882/marisa_trie-1.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6532615111eec2c79e711965ece0bc95adac1ff547a7fff5ffca525463116deb", size = 1328810 },
1602
+ { url = "https://files.pythonhosted.org/packages/9d/7d/3f566e563abae6efce7fc311c63282a447c611739b3cd66c0e36077c86f8/marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:20948e40ab2038e62b7000ca6b4a913bc16c91a2c2e6da501bd1f917eeb28d51", size = 2230222 },
1603
+ { url = "https://files.pythonhosted.org/packages/a5/0b/38fbb4611b5d1030242ddc2aa62e524438c8076e26f87395dbbf222dc62d/marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:66b23e5b35dd547f85bf98db7c749bc0ffc57916ade2534a6bbc32db9a4abc44", size = 2383620 },
1604
+ { url = "https://files.pythonhosted.org/packages/ae/17/4553c63de29904d5d2521a24cad817bc7883cfa90506ab702ec4dae59a7b/marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6704adf0247d2dda42e876b793be40775dff46624309ad99bc7537098bee106d", size = 2329202 },
1605
+ { url = "https://files.pythonhosted.org/packages/45/08/6307a630e63cd763fe77ac56516faa67fa9cd342060691e40fabc84be6b0/marisa_trie-1.2.1-cp312-cp312-win32.whl", hash = "sha256:3ad356442c2fea4c2a6f514738ddf213d23930f942299a2b2c05df464a00848a", size = 129652 },
1606
+ { url = "https://files.pythonhosted.org/packages/a1/fe/67c357bfd92710d95a16b86e1453c663d565415d7f7838781c79ff7e1a7e/marisa_trie-1.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:f2806f75817392cedcacb24ac5d80b0350dde8d3861d67d045c1d9b109764114", size = 150845 },
1607
+ { url = "https://files.pythonhosted.org/packages/2a/a4/a110cd9952f0e72da7bafea1f0084b18b9e03952110d9083bfda52279f5c/marisa_trie-1.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:b5ea16e69bfda0ac028c921b58de1a4aaf83d43934892977368579cd3c0a2554", size = 354439 },
1608
+ { url = "https://files.pythonhosted.org/packages/3c/a5/a6099eb1c3fd8d7e93408c45501e1d08536ac57dfef02ec331f78e1ace18/marisa_trie-1.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9f627f4e41be710b6cb6ed54b0128b229ac9d50e2054d9cde3af0fef277c23cf", size = 188187 },
1609
+ { url = "https://files.pythonhosted.org/packages/7c/cc/f637127e2beffa920d21f7fc45b4029575bcd1b28a90c0d90cb2b08c2205/marisa_trie-1.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5e649f3dc8ab5476732094f2828cc90cac3be7c79bc0c8318b6fda0c1d248db4", size = 171484 },
1610
+ { url = "https://files.pythonhosted.org/packages/6d/0f/29f2ad7260b956570f69f25a542efa51ba76eb76ecd53c63ee9d21987c3d/marisa_trie-1.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46e528ee71808c961baf8c3ce1c46a8337ec7a96cc55389d11baafe5b632f8e9", size = 1319770 },
1611
+ { url = "https://files.pythonhosted.org/packages/f2/12/0b69ed61fba59551a5f3d569af367afae614db7214ce1da12946ba9a433a/marisa_trie-1.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36aa4401a1180615f74d575571a6550081d84fc6461e9aefc0bb7b2427af098e", size = 1356488 },
1612
+ { url = "https://files.pythonhosted.org/packages/33/23/483b110db7ffe8729d6ebea2bf74258aef51f10fef5775f99e4bac7aef69/marisa_trie-1.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce59bcd2cda9bb52b0e90cc7f36413cd86c3d0ce7224143447424aafb9f4aa48", size = 1302334 },
1613
+ { url = "https://files.pythonhosted.org/packages/1c/6f/46c2be99ce925985127fdf78900f1673bce8cb72debfebee6dccd11032c6/marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f4cd800704a5fc57e53c39c3a6b0c9b1519ebdbcb644ede3ee67a06eb542697d", size = 2202624 },
1614
+ { url = "https://files.pythonhosted.org/packages/fd/b6/ef642327dbd4ec35be55d5682520b8f70fca98a54024f441ef2732f6b305/marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2428b495003c189695fb91ceeb499f9fcced3a2dce853e17fa475519433c67ff", size = 2364206 },
1615
+ { url = "https://files.pythonhosted.org/packages/69/04/ef8197a79d0ab5043b781cc9b457bd11b81d4204fe78adf7625a67f48c21/marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:735c363d9aaac82eaf516a28f7c6b95084c2e176d8231c87328dc80e112a9afa", size = 2304801 },
1616
+ { url = "https://files.pythonhosted.org/packages/03/72/f87564d653daf31d8f33d9bf0121e99ccc21f18f5c485fb404ba06abc10e/marisa_trie-1.2.1-cp313-cp313-win32.whl", hash = "sha256:eba6ca45500ca1a042466a0684aacc9838e7f20fe2605521ee19f2853062798f", size = 128799 },
1617
+ { url = "https://files.pythonhosted.org/packages/27/40/5f9eb8b73030cc4b0d6817176e66079a62a2ddd9d5530da54f8011473428/marisa_trie-1.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:aa7cd17e1c690ce96c538b2f4aae003d9a498e65067dd433c52dd069009951d4", size = 149035 },
1618
+ ]
1619
+
1620
  [[package]]
1621
  name = "markdown-it-py"
1622
  version = "3.0.0"
 
1890
  { url = "https://files.pythonhosted.org/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e", size = 146741 },
1891
  ]
1892
 
1893
+ [[package]]
1894
+ name = "murmurhash"
1895
+ version = "1.0.10"
1896
+ source = { registry = "https://pypi.org/simple" }
1897
+ sdist = { url = "https://files.pythonhosted.org/packages/6e/07/56f98a57698e6abf80e58d6c93a0422fd3f443f5b4dad06e83e8a3729ab1/murmurhash-1.0.10.tar.gz", hash = "sha256:5282aab1317804c6ebd6dd7f69f15ba9075aee671c44a34be2bde0f1b11ef88a", size = 12629 }
1898
+ wheels = [
1899
+ { url = "https://files.pythonhosted.org/packages/4b/13/522e3366c44474e43a192390f2622ae514605c1cfe6277a657e641823692/murmurhash-1.0.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3e90eef568adca5e17a91f96975e9a782ace3a617bbb3f8c8c2d917096e9bfeb", size = 26111 },
1900
+ { url = "https://files.pythonhosted.org/packages/d2/77/f185f6bd526ed6c893a72f2ec3fab90dc3af207a8949faf93228bb99ad26/murmurhash-1.0.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f8ecb00cc1ab57e4b065f9fb3ea923b55160c402d959c69a0b6dbbe8bc73efc3", size = 26308 },
1901
+ { url = "https://files.pythonhosted.org/packages/a7/fc/c0a61fcd51e4551e0404dba5444be4bd476276bc4fd80389b54d81a6d785/murmurhash-1.0.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3310101004d9e2e0530c2fed30174448d998ffd1b50dcbfb7677e95db101aa4b", size = 29036 },
1902
+ { url = "https://files.pythonhosted.org/packages/a8/ca/359ae4246cccaf3f6386b66bd9ba4a39e6ec342f89e2c4def361a8cbe7cf/murmurhash-1.0.10-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c65401a6f1778676253cbf89c1f45a8a7feb7d73038e483925df7d5943c08ed9", size = 29212 },
1903
+ { url = "https://files.pythonhosted.org/packages/ed/9d/d62d12e3ecc6f99eddea6289413669a905d2ebb15cf9fe075336ca6cceaa/murmurhash-1.0.10-cp310-cp310-win_amd64.whl", hash = "sha256:f23f2dfc7174de2cdc5007c0771ab8376a2a3f48247f32cac4a5563e40c6adcc", size = 25169 },
1904
+ { url = "https://files.pythonhosted.org/packages/05/29/5f48eea8712697f66531c4b6018b1713a3aec2b4eddbce1c63f93adbf6b1/murmurhash-1.0.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:90ed37ee2cace9381b83d56068334f77e3e30bc521169a1f886a2a2800e965d6", size = 26273 },
1905
+ { url = "https://files.pythonhosted.org/packages/7a/05/4a3b5c3043c6d84c00bf0f574d326660702b1c10174fe6b44cef3c3dff08/murmurhash-1.0.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:22e9926fdbec9d24ced9b0a42f0fee68c730438be3cfb00c2499fd495caec226", size = 26419 },
1906
+ { url = "https://files.pythonhosted.org/packages/69/32/f5327150e02af00e67badb50d9e230a631f920b9c926027b36aa93b53ec0/murmurhash-1.0.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54bfbfd68baa99717239b8844600db627f336a08b1caf4df89762999f681cdd1", size = 29263 },
1907
+ { url = "https://files.pythonhosted.org/packages/93/1b/d880be7ac028cab6bf980acf005c16c0ff381f0c0ba1fd60c284626df3fd/murmurhash-1.0.10-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18b9d200a09d48ef67f6840b77c14f151f2b6c48fd69661eb75c7276ebdb146c", size = 29326 },
1908
+ { url = "https://files.pythonhosted.org/packages/71/46/af01a20ec368bd9cb49a1d2df15e3eca113bbf6952cc1f2a47f1c6801a7f/murmurhash-1.0.10-cp311-cp311-win_amd64.whl", hash = "sha256:e5d7cfe392c0a28129226271008e61e77bf307afc24abf34f386771daa7b28b0", size = 25271 },
1909
+ { url = "https://files.pythonhosted.org/packages/da/64/58433fc6266a32dda0e5a776952d74b533614b8706f07c731fee00194bb6/murmurhash-1.0.10-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:96f0a070344d4802ea76a160e0d4c88b7dc10454d2426f48814482ba60b38b9e", size = 26572 },
1910
+ { url = "https://files.pythonhosted.org/packages/ae/19/60df81c070283a0fefb659af7f6b0b5396f34307bc10731640efa556ac9d/murmurhash-1.0.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9f61862060d677c84556610ac0300a0776cb13cb3155f5075ed97e80f86e55d9", size = 26537 },
1911
+ { url = "https://files.pythonhosted.org/packages/48/e7/6627e0f8173f0bd2463e3aa50dff6c71e951a0cbd835db18843e0546bc0c/murmurhash-1.0.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b3b6d2d877d8881a08be66d906856d05944be0faf22b9a0390338bcf45299989", size = 28311 },
1912
+ { url = "https://files.pythonhosted.org/packages/e8/1b/6a6a3d942a31a37e6e30e18e8ddf284098a50cc4e9a7e2742a83c66845e8/murmurhash-1.0.10-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8f54b0031d8696fed17ed6e9628f339cdea0ba2367ca051e18ff59193f52687", size = 29130 },
1913
+ { url = "https://files.pythonhosted.org/packages/3b/56/8630be974aeb05868f2058db0ce6f19d85c27adb9b8f733cf69c856afdaa/murmurhash-1.0.10-cp312-cp312-win_amd64.whl", hash = "sha256:97e09d675de2359e586f09de1d0de1ab39f9911edffc65c9255fb5e04f7c1f85", size = 25350 },
1914
+ ]
1915
+
1916
  [[package]]
1917
  name = "nbformat"
1918
  version = "5.10.4"
 
2286
  { url = "https://files.pythonhosted.org/packages/52/3b/ce7a01026a7cf46e5452afa86f97a5e88ca97f562cafa76570178ab56d8d/pillow-10.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0755ffd4a0c6f267cccbae2e9903d95477ca2f77c4fcf3a3a09570001856c8a5", size = 2554661 },
2287
  ]
2288
 
2289
+ [[package]]
2290
+ name = "pip"
2291
+ version = "24.3.1"
2292
+ source = { registry = "https://pypi.org/simple" }
2293
+ sdist = { url = "https://files.pythonhosted.org/packages/f4/b1/b422acd212ad7eedddaf7981eee6e5de085154ff726459cf2da7c5a184c1/pip-24.3.1.tar.gz", hash = "sha256:ebcb60557f2aefabc2e0f918751cd24ea0d56d8ec5445fe1807f1d2109660b99", size = 1931073 }
2294
+ wheels = [
2295
+ { url = "https://files.pythonhosted.org/packages/ef/7d/500c9ad20238fcfcb4cb9243eede163594d7020ce87bd9610c9e02771876/pip-24.3.1-py3-none-any.whl", hash = "sha256:3790624780082365f47549d032f3770eeb2b1e8bd1f7b2e02dace1afa361b4ed", size = 1822182 },
2296
+ ]
2297
+
2298
  [[package]]
2299
  name = "platformdirs"
2300
  version = "4.3.6"
 
2317
  { url = "https://files.pythonhosted.org/packages/ed/e2/134a4c381f63e8498314f15d5f8db32bdd9ee40806aba34c3e270915a629/polars-1.10.0-cp39-abi3-win_amd64.whl", hash = "sha256:182e03bd3486490c980a59cbae0be53c0688f6f6f6a2bccc28e07cc1b7f8a4b5", size = 32868791 },
2318
  ]
2319
 
2320
+ [[package]]
2321
+ name = "preshed"
2322
+ version = "3.0.9"
2323
+ source = { registry = "https://pypi.org/simple" }
2324
+ dependencies = [
2325
+ { name = "cymem" },
2326
+ { name = "murmurhash" },
2327
+ ]
2328
+ sdist = { url = "https://files.pythonhosted.org/packages/f2/4e/76dbf784e7d4ed069f91a4c249b1d6ec6856ef0c0b2fd96992895d458b15/preshed-3.0.9.tar.gz", hash = "sha256:721863c5244ffcd2651ad0928951a2c7c77b102f4e11a251ad85d37ee7621660", size = 14478 }
2329
+ wheels = [
2330
+ { url = "https://files.pythonhosted.org/packages/38/7f/a7d3eeaee67ecebbe51866c1aae6310e34cefa0a64821aed963a0a167b51/preshed-3.0.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4f96ef4caf9847b2bb9868574dcbe2496f974e41c2b83d6621c24fb4c3fc57e3", size = 132225 },
2331
+ { url = "https://files.pythonhosted.org/packages/61/4e/f251271ee9f0e0eb0ebe219a8df57ff8511a3b7a83e79e24d37105034164/preshed-3.0.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a61302cf8bd30568631adcdaf9e6b21d40491bd89ba8ebf67324f98b6c2a2c05", size = 127791 },
2332
+ { url = "https://files.pythonhosted.org/packages/eb/8b/6c8a153ea39b4750c20ed48dd9be4bf9d8c0b4e7822fc63c68cd2891703d/preshed-3.0.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99499e8a58f58949d3f591295a97bca4e197066049c96f5d34944dd21a497193", size = 150279 },
2333
+ { url = "https://files.pythonhosted.org/packages/42/59/8f65ad22c13020ff281529e415c32a56cfa691d24b0eca2eb3d756e4d644/preshed-3.0.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea6b6566997dc3acd8c6ee11a89539ac85c77275b4dcefb2dc746d11053a5af8", size = 156914 },
2334
+ { url = "https://files.pythonhosted.org/packages/f3/72/108426ca3b6e7f16db30b3b9396e3fa45a3fd5a76f6532ab04beada2e4e3/preshed-3.0.9-cp310-cp310-win_amd64.whl", hash = "sha256:bfd523085a84b1338ff18f61538e1cfcdedc4b9e76002589a301c364d19a2e36", size = 122224 },
2335
+ { url = "https://files.pythonhosted.org/packages/c0/1e/05fa559f53b635d96b233b63e93accb75215025b997486f7290991bec6c3/preshed-3.0.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7c2364da27f2875524ce1ca754dc071515a9ad26eb5def4c7e69129a13c9a59", size = 132972 },
2336
+ { url = "https://files.pythonhosted.org/packages/a8/b3/1a73ba16bab53043fd19dd0a7838ae05c705dccb329404dd4ad5925767f1/preshed-3.0.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:182138033c0730c683a6d97e567ceb8a3e83f3bff5704f300d582238dbd384b3", size = 128751 },
2337
+ { url = "https://files.pythonhosted.org/packages/2c/9a/919d3708f6fa98d9eab1a186e6b30ab25a4595907bbc1fea5c1e8faa9b9d/preshed-3.0.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:345a10be3b86bcc6c0591d343a6dc2bfd86aa6838c30ced4256dfcfa836c3a64", size = 150050 },
2338
+ { url = "https://files.pythonhosted.org/packages/db/69/d9ab108dc670b5be9e292bbd555f39e6eb0a4baab25cd28f792850d5e65b/preshed-3.0.9-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51d0192274aa061699b284f9fd08416065348edbafd64840c3889617ee1609de", size = 157159 },
2339
+ { url = "https://files.pythonhosted.org/packages/e4/fc/78cdbdb79f5d6d45949e72c32445d6c060977ad50a1dcfc0392622165f7c/preshed-3.0.9-cp311-cp311-win_amd64.whl", hash = "sha256:96b857d7a62cbccc3845ac8c41fd23addf052821be4eb987f2eb0da3d8745aa1", size = 122323 },
2340
+ { url = "https://files.pythonhosted.org/packages/fe/7e/a41595876f644d8bd2c3d5422d7211e876b1848a8cc0c03cce33d9cd048a/preshed-3.0.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b4fe6720012c62e6d550d6a5c1c7ad88cacef8388d186dad4bafea4140d9d198", size = 133196 },
2341
+ { url = "https://files.pythonhosted.org/packages/e7/68/1b4772ff3232e71b63a9206936eb1f75e976ebf4e4e24dc9b3ea7b68369b/preshed-3.0.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e04f05758875be9751e483bd3c519c22b00d3b07f5a64441ec328bb9e3c03700", size = 128594 },
2342
+ { url = "https://files.pythonhosted.org/packages/f3/52/48eefe876a3841c5850bd955daf145d0e408567c8f46a997bce136dc259d/preshed-3.0.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a55091d0e395f1fdb62ab43401bb9f8b46c7d7794d5b071813c29dc1ab22fd0", size = 149220 },
2343
+ { url = "https://files.pythonhosted.org/packages/55/ea/9e6c1a7b1d623f6340379290d603a3b8a71ce52a93f842fbf7547f7f1812/preshed-3.0.9-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7de8f5138bcac7870424e09684dc3dd33c8e30e81b269f6c9ede3d8c7bb8e257", size = 156809 },
2344
+ { url = "https://files.pythonhosted.org/packages/db/e4/d074efb7e8a8873d346d2fb8dd43e19b1eae0697351c0d79cff947cba46e/preshed-3.0.9-cp312-cp312-win_amd64.whl", hash = "sha256:24229c77364628743bc29c5620c5d6607ed104f0e02ae31f8a030f99a78a5ceb", size = 122428 },
2345
+ ]
2346
+
2347
  [[package]]
2348
  name = "prompt-toolkit"
2349
  version = "3.0.48"
 
3384
  { url = "https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254", size = 11053 },
3385
  ]
3386
 
3387
+ [[package]]
3388
+ name = "smart-open"
3389
+ version = "7.0.5"
3390
+ source = { registry = "https://pypi.org/simple" }
3391
+ dependencies = [
3392
+ { name = "wrapt" },
3393
+ ]
3394
+ sdist = { url = "https://files.pythonhosted.org/packages/a3/d8/1481294b2d110b805c0f5d23ef34158b7d5d4283633c0d34c69ea89bb76b/smart_open-7.0.5.tar.gz", hash = "sha256:d3672003b1dbc85e2013e4983b88eb9a5ccfd389b0d4e5015f39a9ee5620ec18", size = 71693 }
3395
+ wheels = [
3396
+ { url = "https://files.pythonhosted.org/packages/06/bc/706838af28a542458bffe74a5d0772ca7f207b5495cd9fccfce61ef71f2a/smart_open-7.0.5-py3-none-any.whl", hash = "sha256:8523ed805c12dff3eaa50e9c903a6cb0ae78800626631c5fe7ea073439847b89", size = 61387 },
3397
+ ]
3398
+
3399
  [[package]]
3400
  name = "sniffio"
3401
  version = "1.3.1"
 
3414
  { url = "https://files.pythonhosted.org/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9", size = 36186 },
3415
  ]
3416
 
3417
+ [[package]]
3418
+ name = "spacy"
3419
+ version = "3.7.5"
3420
+ source = { registry = "https://pypi.org/simple" }
3421
+ dependencies = [
3422
+ { name = "catalogue" },
3423
+ { name = "cymem" },
3424
+ { name = "jinja2" },
3425
+ { name = "langcodes" },
3426
+ { name = "murmurhash" },
3427
+ { name = "numpy" },
3428
+ { name = "packaging" },
3429
+ { name = "preshed" },
3430
+ { name = "pydantic" },
3431
+ { name = "requests" },
3432
+ { name = "setuptools" },
3433
+ { name = "spacy-legacy" },
3434
+ { name = "spacy-loggers" },
3435
+ { name = "srsly" },
3436
+ { name = "thinc" },
3437
+ { name = "tqdm" },
3438
+ { name = "typer" },
3439
+ { name = "wasabi" },
3440
+ { name = "weasel" },
3441
+ ]
3442
+ sdist = { url = "https://files.pythonhosted.org/packages/21/1e/94e3981516db6fcd6685f058c43c3fa81805c120b04829596367dad1aa4e/spacy-3.7.5.tar.gz", hash = "sha256:a648c6cbf2acc7a55a69ee9e7fa4f22bdf69aa828a587a1bc5cfff08cf3c2dd3", size = 1274806 }
3443
+ wheels = [
3444
+ { url = "https://files.pythonhosted.org/packages/c2/5e/f3a851f4c90e61c64956c952387db9b6557863a15050616929886cdcab45/spacy-3.7.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8002897701429ee2ab5ff6921ae43560f4cd17184cb1e10dad761901c12dcb85", size = 6847778 },
3445
+ { url = "https://files.pythonhosted.org/packages/c6/3a/8c5446c40306f876f12c2f9c814c731913f775c9342348333342312bf202/spacy-3.7.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:43acd19efc845e9126b61a05ed7508a0aff509e96e15563f30f810c19e636b7c", size = 6609906 },
3446
+ { url = "https://files.pythonhosted.org/packages/18/91/2fbd1c23467cbad666dbcdb9cf7d3c04d620a2f470281a9d341acccad9b2/spacy-3.7.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f044522b1271ea54718dc43b6f593b5dad349cd31b3827764c501529b599e09a", size = 6250741 },
3447
+ { url = "https://files.pythonhosted.org/packages/07/52/117eae6b96e79207234bf546271bc4d8bb1ec5bf5dd1d8ddf15f12cdbf2e/spacy-3.7.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a7dbfbca42c1c128fefa6832631fe49e11c850e963af99229f14e2d0ae94f34", size = 6606864 },
3448
+ { url = "https://files.pythonhosted.org/packages/df/9d/b46b6f0a4ad66498c388a94e7efbff51044be92ecc1d0f5ea02dc45ef2d1/spacy-3.7.5-cp310-cp310-win_amd64.whl", hash = "sha256:2a21b2a1e1e5d10d15c6f75990b7341d0fc9b454083dfd4222fdd75b9164831c", size = 12081318 },
3449
+ { url = "https://files.pythonhosted.org/packages/80/36/53a831d2e82a432d785823cdff56f84737aed26e8f7667d423ee32c3983d/spacy-3.7.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cd93c34bf2a02bbed7df73d42aed8df5e3eb9688c4ea84ec576f740ba939cce5", size = 6750524 },
3450
+ { url = "https://files.pythonhosted.org/packages/72/49/bd65abe76607c86dc1f104ad545eeb3e771f474b7e259e64e5a16614615b/spacy-3.7.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:190ba0032a5efdb138487c587c0ebb7a98f86adb917f464b252ee8766b8eec4a", size = 6517941 },
3451
+ { url = "https://files.pythonhosted.org/packages/51/83/ec38e9bddb17b8f07539a49a19f2b30ce8e7d7a3d4f94dda31ea9bd043f7/spacy-3.7.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38de1c9bbb73b8cdfea2dd6e57450f093c1a1af47515870c1c8640b85b35ab16", size = 6236788 },
3452
+ { url = "https://files.pythonhosted.org/packages/e0/ce/b5e6b02165881547ad251b0b172ebf496b9181a95833f94012af82d044df/spacy-3.7.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3dad4853950a2fe6c7a0bdfd791a762d1f8cedd2915c4ae41b2e0ca3a850eefc", size = 6585757 },
3453
+ { url = "https://files.pythonhosted.org/packages/39/e1/08681583569f435347ced0535b27c073fcc9a927d9b4293c963092f2d01c/spacy-3.7.5-cp311-cp311-win_amd64.whl", hash = "sha256:4e00d076871af784c2e43185a71ee676b58893853a05c5b81717b8af2b666c07", size = 12078792 },
3454
+ { url = "https://files.pythonhosted.org/packages/3d/c8/413225de79e71dd9ca353d597ea4890a43fa60ff98cf9615b1606680ab95/spacy-3.7.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:bf54c3c2425428b328b53a65913d47eb4cb27a1429aa4e8ed979ffc97d4663e0", size = 6324302 },
3455
+ { url = "https://files.pythonhosted.org/packages/60/f9/726e977c5430c44912ed97d7d965ef35d2563978b38076b254379652a522/spacy-3.7.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4145cea7f9814fa7d86b2028c2dd83e02f13f80d5ac604a400b2f7d7b26a0e8c", size = 6112434 },
3456
+ { url = "https://files.pythonhosted.org/packages/53/ff/4b3a9d3063ba98d3ce27a0c2a60e3c25e4650b7c3c7555a47179dac5c282/spacy-3.7.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:262f8ebb71f7ed5ffe8e4f384b2594b7a296be50241ce9fbd9277b5da2f46f38", size = 6065925 },
3457
+ { url = "https://files.pythonhosted.org/packages/ef/9f/70bed4cb66629ad1fa5f45220d47bbbf6c858e70e5d69f7ca1de95dd2b92/spacy-3.7.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:faa1e2b6234ae33c0b1f8dfa5a8dcb66fb891f19231725dfcff4b2666125c250", size = 6455942 },
3458
+ { url = "https://files.pythonhosted.org/packages/58/42/b6bb76b08f4a0ccb0e2d0e4f3524acadf1ba929e2b93f90e4652d1c3cbd3/spacy-3.7.5-cp312-cp312-win_amd64.whl", hash = "sha256:07677e270a6d729453cc04b5e2247a96a86320b8845e6428d9f90f217eff0f56", size = 11673681 },
3459
+ ]
3460
+
3461
+ [[package]]
3462
+ name = "spacy-legacy"
3463
+ version = "3.0.12"
3464
+ source = { registry = "https://pypi.org/simple" }
3465
+ sdist = { url = "https://files.pythonhosted.org/packages/d9/79/91f9d7cc8db5642acad830dcc4b49ba65a7790152832c4eceb305e46d681/spacy-legacy-3.0.12.tar.gz", hash = "sha256:b37d6e0c9b6e1d7ca1cf5bc7152ab64a4c4671f59c85adaf7a3fcb870357a774", size = 23806 }
3466
+ wheels = [
3467
+ { url = "https://files.pythonhosted.org/packages/c3/55/12e842c70ff8828e34e543a2c7176dac4da006ca6901c9e8b43efab8bc6b/spacy_legacy-3.0.12-py2.py3-none-any.whl", hash = "sha256:476e3bd0d05f8c339ed60f40986c07387c0a71479245d6d0f4298dbd52cda55f", size = 29971 },
3468
+ ]
3469
+
3470
+ [[package]]
3471
+ name = "spacy-loggers"
3472
+ version = "1.0.5"
3473
+ source = { registry = "https://pypi.org/simple" }
3474
+ sdist = { url = "https://files.pythonhosted.org/packages/67/3d/926db774c9c98acf66cb4ed7faf6c377746f3e00b84b700d0868b95d0712/spacy-loggers-1.0.5.tar.gz", hash = "sha256:d60b0bdbf915a60e516cc2e653baeff946f0cfc461b452d11a4d5458c6fe5f24", size = 20811 }
3475
+ wheels = [
3476
+ { url = "https://files.pythonhosted.org/packages/33/78/d1a1a026ef3af911159398c939b1509d5c36fe524c7b644f34a5146c4e16/spacy_loggers-1.0.5-py3-none-any.whl", hash = "sha256:196284c9c446cc0cdb944005384270d775fdeaf4f494d8e269466cfa497ef645", size = 22343 },
3477
+ ]
3478
+
3479
  [[package]]
3480
  name = "sqlite-minutils"
3481
  version = "3.37.0.post4"
 
3488
  { url = "https://files.pythonhosted.org/packages/71/fc/c8718f39d96169235e12be7272f96d1365f3193cb46216f3930d3327b292/sqlite_minutils-3.37.0.post4-py3-none-any.whl", hash = "sha256:8de2b6dacf7bc34f7d3d4b3c576dd8f3ab725514b331b53e2b23a5d130fd1d1f", size = 77737 },
3489
  ]
3490
 
3491
+ [[package]]
3492
+ name = "srsly"
3493
+ version = "2.4.8"
3494
+ source = { registry = "https://pypi.org/simple" }
3495
+ dependencies = [
3496
+ { name = "catalogue" },
3497
+ ]
3498
+ sdist = { url = "https://files.pythonhosted.org/packages/59/7f/17259e0962bb9433f39aa99ec45fd36851961491c562bc2f8c731cc476a6/srsly-2.4.8.tar.gz", hash = "sha256:b24d95a65009c2447e0b49cda043ac53fecf4f09e358d87a57446458f91b8a91", size = 351651 }
3499
+ wheels = [
3500
+ { url = "https://files.pythonhosted.org/packages/f6/48/363ffe49690ff5cd8597a2fce311890825595c20153b5fd1db7477d1e2cd/srsly-2.4.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:17f3bcb418bb4cf443ed3d4dcb210e491bd9c1b7b0185e6ab10b6af3271e63b2", size = 492893 },
3501
+ { url = "https://files.pythonhosted.org/packages/b2/19/39c39e1ed436852946924fb043cbf1f7bf96682d8ef6ad0c2b14fee235c0/srsly-2.4.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0b070a58e21ab0e878fd949f932385abb4c53dd0acb6d3a7ee75d95d447bc609", size = 491198 },
3502
+ { url = "https://files.pythonhosted.org/packages/56/2b/e4ea56011ed3b66b372ff55463b4f0f8db7245b95cec2fb2042ffec291f0/srsly-2.4.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98286d20014ed2067ad02b0be1e17c7e522255b188346e79ff266af51a54eb33", size = 488980 },
3503
+ { url = "https://files.pythonhosted.org/packages/32/69/2c054c6c5dc5daf5648f994f22377f3be44f79d643f3c3db255b4e86b391/srsly-2.4.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18685084e2e0cc47c25158cbbf3e44690e494ef77d6418c2aae0598c893f35b0", size = 493019 },
3504
+ { url = "https://files.pythonhosted.org/packages/0a/ed/d2c37221fe1975f4b6e8e3cf200d25b905b77e18f6a660b3dc149ade6192/srsly-2.4.8-cp310-cp310-win_amd64.whl", hash = "sha256:980a179cbf4eb5bc56f7507e53f76720d031bcf0cef52cd53c815720eb2fc30c", size = 481871 },
3505
+ { url = "https://files.pythonhosted.org/packages/40/fe/baa4056b7e8585f4c3478d3d1d3a2c1c3095ff066e4fb420bb000abb6cc2/srsly-2.4.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5472ed9f581e10c32e79424c996cf54c46c42237759f4224806a0cd4bb770993", size = 490026 },
3506
+ { url = "https://files.pythonhosted.org/packages/1b/d7/0800af1a75008b3a6a6a24f3efd165f2d2208076e9b8a4b11b66f16217f3/srsly-2.4.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:50f10afe9230072c5aad9f6636115ea99b32c102f4c61e8236d8642c73ec7a13", size = 488409 },
3507
+ { url = "https://files.pythonhosted.org/packages/0e/05/006dd2fdd74248d3fad492e864c2dc75260d52759d526a7cb9c7c08b0fe9/srsly-2.4.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c994a89ba247a4d4f63ef9fdefb93aa3e1f98740e4800d5351ebd56992ac75e3", size = 487672 },
3508
+ { url = "https://files.pythonhosted.org/packages/e2/a0/153375ade1ca9d33543da7d512329ea9a7d40dc0e0832599f4228b9d761b/srsly-2.4.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ace7ed4a0c20fa54d90032be32f9c656b6d75445168da78d14fe9080a0c208ad", size = 490912 },
3509
+ { url = "https://files.pythonhosted.org/packages/eb/f5/e3f29993f673d91623df6413ba64e815dd2676fd7932cbc5e7347402ddae/srsly-2.4.8-cp311-cp311-win_amd64.whl", hash = "sha256:7a919236a090fb93081fbd1cec030f675910f3863825b34a9afbcae71f643127", size = 479719 },
3510
+ { url = "https://files.pythonhosted.org/packages/b1/1a/d96117461e16203ee35dda67153db00572935e5d7fc211d091a34fec24c8/srsly-2.4.8-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:7583c03d114b4478b7a357a1915305163e9eac2dfe080da900555c975cca2a11", size = 488406 },
3511
+ { url = "https://files.pythonhosted.org/packages/9a/47/13fbea357e7eb9ee823b54cbead30a6adc6686bb3f73e76563b13dcbb2f8/srsly-2.4.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:94ccdd2f6db824c31266aaf93e0f31c1c43b8bc531cd2b3a1d924e3c26a4f294", size = 486434 },
3512
+ { url = "https://files.pythonhosted.org/packages/0e/3d/462cec40c9ce15f8a3a97c972058ce1d2688abcad2dfc4eea3c888391c11/srsly-2.4.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db72d2974f91aee652d606c7def98744ca6b899bd7dd3009fd75ebe0b5a51034", size = 486968 },
3513
+ { url = "https://files.pythonhosted.org/packages/a1/1d/c4b28e95d9ec4c2e7dad201fa415a483e173fcce444d52dd53be0b0469f3/srsly-2.4.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a60c905fd2c15e848ce1fc315fd34d8a9cc72c1dee022a0d8f4c62991131307", size = 491730 },
3514
+ { url = "https://files.pythonhosted.org/packages/06/b4/d620235df9104c9049c5378027fb2692a8a51fafc775e2feae815ff99599/srsly-2.4.8-cp312-cp312-win_amd64.whl", hash = "sha256:e0b8d5722057000694edf105b8f492e7eb2f3aa6247a5f0c9170d1e0d074151c", size = 478845 },
3515
+ ]
3516
+
3517
  [[package]]
3518
  name = "stack-data"
3519
  version = "0.6.3"
 
3561
  { url = "https://files.pythonhosted.org/packages/b6/cb/b86984bed139586d01532a587464b5805f12e397594f19f931c4c2fbfa61/tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539", size = 28169 },
3562
  ]
3563
 
3564
+ [[package]]
3565
+ name = "thinc"
3566
+ version = "8.2.5"
3567
+ source = { registry = "https://pypi.org/simple" }
3568
+ dependencies = [
3569
+ { name = "blis" },
3570
+ { name = "catalogue" },
3571
+ { name = "confection" },
3572
+ { name = "cymem" },
3573
+ { name = "murmurhash" },
3574
+ { name = "numpy" },
3575
+ { name = "packaging" },
3576
+ { name = "preshed" },
3577
+ { name = "pydantic" },
3578
+ { name = "setuptools" },
3579
+ { name = "srsly" },
3580
+ { name = "wasabi" },
3581
+ ]
3582
+ sdist = { url = "https://files.pythonhosted.org/packages/3b/2a/0e2e961e6152bedecca70e6833f6e827ee621efcee7496643242b506d54f/thinc-8.2.5.tar.gz", hash = "sha256:c2963791c934cc7fbd8f9b942d571cac79892ad11630bfca690a868c32752b75", size = 193031 }
3583
+ wheels = [
3584
+ { url = "https://files.pythonhosted.org/packages/a2/76/1994abe2bf5cbe5f68231fd2c177e384b19bed86268d56c1b6b2dc19e203/thinc-8.2.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dc267f6aad80a681a85f50383afe91da9e2bec56fefdda86bfa2e4f529bef191", size = 843631 },
3585
+ { url = "https://files.pythonhosted.org/packages/e5/76/47e94af32943bd92d7cda4e92d185331a89116a0bf87123cc71796f21a00/thinc-8.2.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d80f1e497971c9fa0938f5cc8fe607bbe87356b405fb7bbc3ff9f32fb4eed3bb", size = 779045 },
3586
+ { url = "https://files.pythonhosted.org/packages/c0/76/7dcce5cd2b5a9fe92b76767d688f17fe459543c13893e588545db3c0dc85/thinc-8.2.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0933adbd3e65e30d3bef903e77a368bc8a41bed34b0d18df6d4fc0536908e21f", size = 868726 },
3587
+ { url = "https://files.pythonhosted.org/packages/d7/b5/ad029dc7346381922b7fd655b3fc39009cf675766345d8e13f9f8282bde8/thinc-8.2.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54bac2ba23b208fdaf267cd6113d26a5ecbb3b0e0c6015dff784ae6a9c5e78ca", size = 922374 },
3588
+ { url = "https://files.pythonhosted.org/packages/f6/3e/fb96407db92a15b5c0feb0deb930c4c223bdb772e04b51b9798a86059a26/thinc-8.2.5-cp310-cp310-win_amd64.whl", hash = "sha256:399260197ef3f8d9600315fc5b5a1d5940400fceb0361de642e9fe3506d82385", size = 1482115 },
3589
+ { url = "https://files.pythonhosted.org/packages/76/37/8acfeba6bb25b08c2a33bfae5301a5df4dc164d2d17040bebbcf66d783a1/thinc-8.2.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a75c0de3340afed594beda293661de145f3842873df56d9989bc338148f13fab", size = 839072 },
3590
+ { url = "https://files.pythonhosted.org/packages/e9/eb/753a85875fb0261c83ca87a1a36d41346bde662c3a029ace9d68fe32bc5b/thinc-8.2.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6b166d1a22003ee03bc236370fff2884744c1fb758a6209a2512d305773d07d7", size = 773885 },
3591
+ { url = "https://files.pythonhosted.org/packages/34/47/06810a1bd9d3287076ba17299abec82c8c643563661b1af9b1d5d9aeab38/thinc-8.2.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34db8a023b9f70645fdf06c510584ba6d8b97ec53c1e094f42d95652bf8c875f", size = 868332 },
3592
+ { url = "https://files.pythonhosted.org/packages/1a/19/cd73e3b5f22d5d9399f6f2931ab0fb985415f34030dcfead070181866761/thinc-8.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8901b30db1071ea8d5e4437429c8632535bf5ed87938ce3bb5057bed9f15aed8", size = 920152 },
3593
+ { url = "https://files.pythonhosted.org/packages/5e/0e/5e7b24e046e0725eafc37ded0cd9bfaf789efb894101a7aca8a73dba81de/thinc-8.2.5-cp311-cp311-win_amd64.whl", hash = "sha256:8ef5d46d62e31f2450224ab22391a606cf427b13e20cfc570f70422e2f333872", size = 1480120 },
3594
+ { url = "https://files.pythonhosted.org/packages/a4/9d/d2ed3aef9bb75ab86c521bde58f897db6a572c9fd639448173b516269a69/thinc-8.2.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9fc26697e2358c71a5fe243d52e98ae67ee1a3b314eead5031845b6d1c0d121c", size = 824150 },
3595
+ { url = "https://files.pythonhosted.org/packages/66/a6/30ed1edb2adab585b5f7d5d99e89b5be3014dcbf3f4e263997b2c2426681/thinc-8.2.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8e299d4dc41107385d6d14d8604a060825798a031cabe2b894b22f9d75d9eaad", size = 760640 },
3596
+ { url = "https://files.pythonhosted.org/packages/82/ce/aaff1f39bcc1e9a97bec5f3d20aa771c005a9faff3944fc56c7492c24466/thinc-8.2.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8a8f2f249f2be9a5ce2a81a6efe7503b68be7b57e47ad54ab28204e1f0c723b", size = 818820 },
3597
+ { url = "https://files.pythonhosted.org/packages/d7/fa/c96b01e46e5962d02de1206e497fda2902aef2b8ffb2926d66d5f0159040/thinc-8.2.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87e729f33c76ec6df9b375989743252ab880d79f3a2b4175169b21dece90f102", size = 865047 },
3598
+ { url = "https://files.pythonhosted.org/packages/cd/26/306b8bedb678c52464ed00e576edf9d365fce0bcae597a333bdad9fb5d67/thinc-8.2.5-cp312-cp312-win_amd64.whl", hash = "sha256:c5f750ea2dd32ca6d46947025dacfc0f6037340c4e5f7adb9af84c75f65aa7d8", size = 1447893 },
3599
+ ]
3600
+
3601
  [[package]]
3602
  name = "threadpoolctl"
3603
  version = "3.5.0"
 
3777
  version = "3.1.0"
3778
  source = { registry = "https://pypi.org/simple" }
3779
  dependencies = [
3780
+ { name = "filelock", marker = "python_full_version < '3.13'" },
3781
  ]
3782
  wheels = [
3783
  { url = "https://files.pythonhosted.org/packages/98/29/69aa56dc0b2eb2602b553881e34243475ea2afd9699be042316842788ff5/triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b0dd10a925263abbe9fa37dcde67a5e9b2383fc269fdf59f5657cac38c5d1d8", size = 209460013 },
 
3940
  { name = "einops" },
3941
  { name = "google-generativeai" },
3942
  { name = "huggingface-hub" },
3943
+ { name = "pip" },
3944
  { name = "pypdf" },
3945
  { name = "python-dotenv" },
3946
  { name = "python-fasthtml" },
3947
  { name = "pyvespa" },
3948
  { name = "setuptools" },
3949
  { name = "shad4fast" },
3950
+ { name = "spacy" },
3951
  { name = "torch" },
3952
  { name = "vespacli" },
3953
  { name = "vidore-benchmark", extra = ["interpretability"] },
 
3980
  { name = "ipykernel", marker = "extra == 'feed'" },
3981
  { name = "jupytext", marker = "extra == 'feed'" },
3982
  { name = "pdf2image", marker = "extra == 'feed'" },
3983
+ { name = "pip" },
3984
  { name = "pydantic", marker = "extra == 'feed'" },
3985
  { name = "pypdf" },
3986
  { name = "python-dotenv" },
 
3990
  { name = "ruff", marker = "extra == 'dev'" },
3991
  { name = "setuptools" },
3992
  { name = "shad4fast", specifier = ">=1.2.1" },
3993
+ { name = "spacy" },
3994
  { name = "torch" },
3995
  { name = "vespacli" },
3996
  { name = "vidore-benchmark", extras = ["interpretability"], specifier = ">=4.0.0,<5.0.0" },
3997
  ]
3998
 
3999
+ [[package]]
4000
+ name = "wasabi"
4001
+ version = "1.1.3"
4002
+ source = { registry = "https://pypi.org/simple" }
4003
+ dependencies = [
4004
+ { name = "colorama", marker = "sys_platform == 'win32'" },
4005
+ ]
4006
+ sdist = { url = "https://files.pythonhosted.org/packages/ac/f9/054e6e2f1071e963b5e746b48d1e3727470b2a490834d18ad92364929db3/wasabi-1.1.3.tar.gz", hash = "sha256:4bb3008f003809db0c3e28b4daf20906ea871a2bb43f9914197d540f4f2e0878", size = 30391 }
4007
+ wheels = [
4008
+ { url = "https://files.pythonhosted.org/packages/06/7c/34330a89da55610daa5f245ddce5aab81244321101614751e7537f125133/wasabi-1.1.3-py3-none-any.whl", hash = "sha256:f76e16e8f7e79f8c4c8be49b4024ac725713ab10cd7f19350ad18a8e3f71728c", size = 27880 },
4009
+ ]
4010
+
4011
  [[package]]
4012
  name = "watchfiles"
4013
  version = "0.24.0"
 
4082
  { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 },
4083
  ]
4084
 
4085
+ [[package]]
4086
+ name = "weasel"
4087
+ version = "0.4.1"
4088
+ source = { registry = "https://pypi.org/simple" }
4089
+ dependencies = [
4090
+ { name = "cloudpathlib" },
4091
+ { name = "confection" },
4092
+ { name = "packaging" },
4093
+ { name = "pydantic" },
4094
+ { name = "requests" },
4095
+ { name = "smart-open" },
4096
+ { name = "srsly" },
4097
+ { name = "typer" },
4098
+ { name = "wasabi" },
4099
+ ]
4100
+ sdist = { url = "https://files.pythonhosted.org/packages/a7/1a/9c522dd61b52939c217925d3e55c95f9348b73a66a956f52608e1e59a2c0/weasel-0.4.1.tar.gz", hash = "sha256:aabc210f072e13f6744e5c3a28037f93702433405cd35673f7c6279147085aa9", size = 38417 }
4101
+ wheels = [
4102
+ { url = "https://files.pythonhosted.org/packages/2a/87/abd57374044e1f627f0a905ac33c1a7daab35a3a815abfea4e1bafd3fdb1/weasel-0.4.1-py3-none-any.whl", hash = "sha256:24140a090ea1ac512a2b2f479cc64192fd1d527a7f3627671268d08ed5ac418c", size = 50270 },
4103
+ ]
4104
+
4105
  [[package]]
4106
  name = "websockets"
4107
  version = "13.1"
 
4170
  { url = "https://files.pythonhosted.org/packages/0a/e6/a7d828fef907843b2a5773ebff47fb79ac0c1c88d60c0ca9530ee941e248/win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad", size = 3604 },
4171
  ]
4172
 
4173
+ [[package]]
4174
+ name = "wrapt"
4175
+ version = "1.16.0"
4176
+ source = { registry = "https://pypi.org/simple" }
4177
+ sdist = { url = "https://files.pythonhosted.org/packages/95/4c/063a912e20bcef7124e0df97282a8af3ff3e4b603ce84c481d6d7346be0a/wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d", size = 53972 }
4178
+ wheels = [
4179
+ { url = "https://files.pythonhosted.org/packages/a8/c6/5375258add3777494671d8cec27cdf5402abd91016dee24aa2972c61fedf/wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4", size = 37315 },
4180
+ { url = "https://files.pythonhosted.org/packages/32/12/e11adfde33444986135d8881b401e4de6cbb4cced046edc6b464e6ad7547/wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020", size = 38160 },
4181
+ { url = "https://files.pythonhosted.org/packages/70/7d/3dcc4a7e96f8d3e398450ec7703db384413f79bd6c0196e0e139055ce00f/wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440", size = 80419 },
4182
+ { url = "https://files.pythonhosted.org/packages/d1/c4/8dfdc3c2f0b38be85c8d9fdf0011ebad2f54e40897f9549a356bebb63a97/wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487", size = 72669 },
4183
+ { url = "https://files.pythonhosted.org/packages/49/83/b40bc1ad04a868b5b5bcec86349f06c1ee1ea7afe51dc3e46131e4f39308/wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf", size = 80271 },
4184
+ { url = "https://files.pythonhosted.org/packages/19/d4/cd33d3a82df73a064c9b6401d14f346e1d2fb372885f0295516ec08ed2ee/wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72", size = 84748 },
4185
+ { url = "https://files.pythonhosted.org/packages/ef/58/2fde309415b5fa98fd8f5f4a11886cbf276824c4c64d45a39da342fff6fe/wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0", size = 77522 },
4186
+ { url = "https://files.pythonhosted.org/packages/07/44/359e4724a92369b88dbf09878a7cde7393cf3da885567ea898e5904049a3/wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136", size = 84780 },
4187
+ { url = "https://files.pythonhosted.org/packages/88/8f/706f2fee019360cc1da652353330350c76aa5746b4e191082e45d6838faf/wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d", size = 35335 },
4188
+ { url = "https://files.pythonhosted.org/packages/19/2b/548d23362e3002ebbfaefe649b833fa43f6ca37ac3e95472130c4b69e0b4/wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2", size = 37528 },
4189
+ { url = "https://files.pythonhosted.org/packages/fd/03/c188ac517f402775b90d6f312955a5e53b866c964b32119f2ed76315697e/wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09", size = 37313 },
4190
+ { url = "https://files.pythonhosted.org/packages/0f/16/ea627d7817394db04518f62934a5de59874b587b792300991b3c347ff5e0/wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d", size = 38164 },
4191
+ { url = "https://files.pythonhosted.org/packages/7f/a7/f1212ba098f3de0fd244e2de0f8791ad2539c03bef6c05a9fcb03e45b089/wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389", size = 80890 },
4192
+ { url = "https://files.pythonhosted.org/packages/b7/96/bb5e08b3d6db003c9ab219c487714c13a237ee7dcc572a555eaf1ce7dc82/wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060", size = 73118 },
4193
+ { url = "https://files.pythonhosted.org/packages/6e/52/2da48b35193e39ac53cfb141467d9f259851522d0e8c87153f0ba4205fb1/wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1", size = 80746 },
4194
+ { url = "https://files.pythonhosted.org/packages/11/fb/18ec40265ab81c0e82a934de04596b6ce972c27ba2592c8b53d5585e6bcd/wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3", size = 85668 },
4195
+ { url = "https://files.pythonhosted.org/packages/0f/ef/0ecb1fa23145560431b970418dce575cfaec555ab08617d82eb92afc7ccf/wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956", size = 78556 },
4196
+ { url = "https://files.pythonhosted.org/packages/25/62/cd284b2b747f175b5a96cbd8092b32e7369edab0644c45784871528eb852/wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d", size = 85712 },
4197
+ { url = "https://files.pythonhosted.org/packages/e5/a7/47b7ff74fbadf81b696872d5ba504966591a3468f1bc86bca2f407baef68/wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362", size = 35327 },
4198
+ { url = "https://files.pythonhosted.org/packages/cf/c3/0084351951d9579ae83a3d9e38c140371e4c6b038136909235079f2e6e78/wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89", size = 37523 },
4199
+ { url = "https://files.pythonhosted.org/packages/92/17/224132494c1e23521868cdd57cd1e903f3b6a7ba6996b7b8f077ff8ac7fe/wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b", size = 37614 },
4200
+ { url = "https://files.pythonhosted.org/packages/6a/d7/cfcd73e8f4858079ac59d9db1ec5a1349bc486ae8e9ba55698cc1f4a1dff/wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36", size = 38316 },
4201
+ { url = "https://files.pythonhosted.org/packages/7e/79/5ff0a5c54bda5aec75b36453d06be4f83d5cd4932cc84b7cb2b52cee23e2/wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73", size = 86322 },
4202
+ { url = "https://files.pythonhosted.org/packages/c4/81/e799bf5d419f422d8712108837c1d9bf6ebe3cb2a81ad94413449543a923/wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809", size = 79055 },
4203
+ { url = "https://files.pythonhosted.org/packages/62/62/30ca2405de6a20448ee557ab2cd61ab9c5900be7cbd18a2639db595f0b98/wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b", size = 87291 },
4204
+ { url = "https://files.pythonhosted.org/packages/49/4e/5d2f6d7b57fc9956bf06e944eb00463551f7d52fc73ca35cfc4c2cdb7aed/wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81", size = 90374 },
4205
+ { url = "https://files.pythonhosted.org/packages/a6/9b/c2c21b44ff5b9bf14a83252a8b973fb84923764ff63db3e6dfc3895cf2e0/wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9", size = 83896 },
4206
+ { url = "https://files.pythonhosted.org/packages/14/26/93a9fa02c6f257df54d7570dfe8011995138118d11939a4ecd82cb849613/wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c", size = 91738 },
4207
+ { url = "https://files.pythonhosted.org/packages/a2/5b/4660897233eb2c8c4de3dc7cefed114c61bacb3c28327e64150dc44ee2f6/wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc", size = 35568 },
4208
+ { url = "https://files.pythonhosted.org/packages/5c/cc/8297f9658506b224aa4bd71906447dea6bb0ba629861a758c28f67428b91/wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8", size = 37653 },
4209
+ { url = "https://files.pythonhosted.org/packages/ff/21/abdedb4cdf6ff41ebf01a74087740a709e2edb146490e4d9beea054b0b7a/wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1", size = 23362 },
4210
+ ]
4211
+
4212
  [[package]]
4213
  name = "xxhash"
4214
  version = "3.5.0"