geekyrakshit commited on
Commit
8992b40
·
1 Parent(s): d889dc6

add: ImageLoader

Browse files
docs/document_loader/load_image.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Load PDF pages as images
2
+
3
+ ::: medrag_multi_modal.document_loader.load_image
medrag_multi_modal/document_loader/__init__.py CHANGED
@@ -1,4 +1,5 @@
 
1
  from .load_text import TextLoader
2
  from .load_text_image import TextImageLoader
3
 
4
- __all__ = ["TextLoader", "TextImageLoader"]
 
1
+ from .load_image import ImageLoader
2
  from .load_text import TextLoader
3
  from .load_text_image import TextImageLoader
4
 
5
+ __all__ = ["TextLoader", "TextImageLoader", "ImageLoader"]
medrag_multi_modal/document_loader/load_image.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+
3
+ import rich
4
+ import weave
5
+ from pdf2image.pdf2image import convert_from_path
6
+ from PIL import Image
7
+
8
+ from medrag_multi_modal.document_loader.load_text import TextLoader
9
+
10
+
11
+ class ImageLoader(TextLoader):
12
+ """
13
+ ImageLoader is a class that extends the TextLoader class to handle the extraction and
14
+ loading of images from a PDF file.
15
+
16
+ This class provides functionality to convert specific pages of a PDF document into images
17
+ and optionally publish these images to a Weave dataset.
18
+
19
+ !!! example "Example Usage"
20
+ ```python
21
+ import asyncio
22
+
23
+ import weave
24
+ from dotenv import load_dotenv
25
+
26
+ from medrag_multi_modal.document_loader import ImageLoader
27
+
28
+ load_dotenv()
29
+ weave.init(project_name="ml-colabs/medrag-multi-modal")
30
+ url = "https://archive.org/download/GraysAnatomy41E2015PDF/Grays%20Anatomy-41%20E%20%282015%29%20%5BPDF%5D.pdf"
31
+ loader = ImageLoader(
32
+ url=url,
33
+ document_name="Gray's Anatomy",
34
+ document_file_path="grays_anatomy.pdf",
35
+ )
36
+ asyncio.run(
37
+ loader.load_data(
38
+ start_page=31,
39
+ end_page=33,
40
+ weave_dataset_name="grays-anatomy-text",
41
+ )
42
+ )
43
+ ```
44
+
45
+ Args:
46
+ url (str): The URL of the PDF document.
47
+ document_name (str): The name of the document.
48
+ document_file_path (str): The path to the PDF file.
49
+ """
50
+
51
+ def __init__(self, url: str, document_name: str, document_file_path: str):
52
+ super().__init__(url, document_name, document_file_path)
53
+
54
+ def extract_data_from_pdf_file(
55
+ self, pdf_file: str, page_number: int
56
+ ) -> Image.Image:
57
+ image = convert_from_path(
58
+ pdf_file, first_page=page_number + 1, last_page=page_number + 1
59
+ )[0]
60
+ return image
61
+
62
+ async def load_data(self, start_page: int, end_page: int, weave_dataset_name: str):
63
+ """
64
+ Asynchronously loads images from a PDF file specified by a URL or local file path,
65
+ processes the images for the specified range of pages, and optionally publishes them
66
+ to a Weave dataset.
67
+
68
+ This function reads the specified range of pages from a PDF document, converts each page
69
+ to an image using the `pdf2image` library, and returns a list of dictionaries containing
70
+ the image and metadata for each processed page. It processes pages concurrently using
71
+ `asyncio` for efficiency. If a weave_dataset_name is provided, the processed pages are
72
+ published to a Weave dataset.
73
+
74
+ Args:
75
+ start_page (int): The starting page index (0-based) to process.
76
+ end_page (int): The ending page index (0-based) to process.
77
+ weave_dataset_name (str): The name of the Weave dataset to publish the pages to,
78
+ if provided.
79
+
80
+ Returns:
81
+ list[dict]: A list of dictionaries, each containing the image and metadata for a
82
+ processed page.
83
+
84
+ Raises:
85
+ ValueError: If the specified start_page or end_page is out of bounds of the document's
86
+ page count.
87
+ """
88
+ start_page, end_page = self.get_page_indices(start_page, end_page)
89
+ pages = []
90
+ processed_pages_counter: int = 1
91
+ total_pages = end_page - start_page
92
+
93
+ async def process_page(page_idx):
94
+ nonlocal processed_pages_counter
95
+ pages.append(
96
+ {
97
+ "image": convert_from_path(
98
+ self.document_file_path,
99
+ first_page=page_idx + 1,
100
+ last_page=page_idx + 1,
101
+ )[0],
102
+ "page_idx": page_idx,
103
+ "document_name": self.document_name,
104
+ "file_path": self.document_file_path,
105
+ "file_url": self.url,
106
+ }
107
+ )
108
+ rich.print(f"Processed pages {processed_pages_counter}/{total_pages}")
109
+ processed_pages_counter += 1
110
+
111
+ tasks = [process_page(page_idx) for page_idx in range(start_page, end_page)]
112
+ for task in asyncio.as_completed(tasks):
113
+ await task
114
+ if weave_dataset_name:
115
+ weave.publish(weave.Dataset(name=weave_dataset_name, rows=pages))
116
+ return pages
mkdocs.yml CHANGED
@@ -62,5 +62,6 @@ nav:
62
  - Document Loader:
63
  - Text Loader: 'document_loader/load_text.md'
64
  - Text and Image Loader: 'document_loader/load_text_image.md'
 
65
 
66
  repo_url: https://github.com/soumik12345/medrag-multi-modal
 
62
  - Document Loader:
63
  - Text Loader: 'document_loader/load_text.md'
64
  - Text and Image Loader: 'document_loader/load_text_image.md'
65
+ - Image Loader: 'document_loader/load_image.md'
66
 
67
  repo_url: https://github.com/soumik12345/medrag-multi-modal
pyproject.toml CHANGED
@@ -7,10 +7,11 @@ requires-python = ">=3.10"
7
  dependencies = [
8
  "Byaldi>=0.0.5",
9
  "firerequests>=0.0.7",
 
10
  "python-dotenv>=1.0.1",
11
  "pymupdf4llm>=0.0.17",
12
  "torch>=2.4.1",
13
- "weave>=0.51.12",
14
  "pip>=24.2",
15
  "uv>=0.4.20",
16
  "pytest>=8.3.3",
@@ -32,10 +33,11 @@ dependencies = [
32
  core = [
33
  "Byaldi>=0.0.5",
34
  "firerequests>=0.0.7",
 
35
  "python-dotenv>=1.0.1",
36
  "pymupdf4llm>=0.0.17",
37
  "torch>=2.4.1",
38
- "weave>=0.51.12",
39
  ]
40
 
41
  dev = [
 
7
  dependencies = [
8
  "Byaldi>=0.0.5",
9
  "firerequests>=0.0.7",
10
+ "pdf2image>=1.17.0",
11
  "python-dotenv>=1.0.1",
12
  "pymupdf4llm>=0.0.17",
13
  "torch>=2.4.1",
14
+ "weave>=0.51.14",
15
  "pip>=24.2",
16
  "uv>=0.4.20",
17
  "pytest>=8.3.3",
 
33
  core = [
34
  "Byaldi>=0.0.5",
35
  "firerequests>=0.0.7",
36
+ "pdf2image>=1.17.0",
37
  "python-dotenv>=1.0.1",
38
  "pymupdf4llm>=0.0.17",
39
  "torch>=2.4.1",
40
+ "weave>=0.51.14",
41
  ]
42
 
43
  dev = [
uv.lock CHANGED
@@ -124,15 +124,6 @@ wheels = [
124
  { url = "https://files.pythonhosted.org/packages/71/c0/6d33ac32bfbf9dd91a16c26bc37dd4763084d7f991dc848655d34e31291a/aiohttp-3.10.9-cp313-cp313-win_amd64.whl", hash = "sha256:a35ed3d03910785f7d9d6f5381f0c24002b2b888b298e6f941b2fc94c5055fcd", size = 377205 },
125
  ]
126
 
127
- [[package]]
128
- name = "aioprocessing"
129
- version = "2.0.1"
130
- source = { registry = "https://pypi.org/simple" }
131
- sdist = { url = "https://files.pythonhosted.org/packages/4d/85/9a75151e7049bf144c01384279201d82d99484bd658f8e6fb013552d8724/aioprocessing-2.0.1.tar.gz", hash = "sha256:fe01c7b1a38c78168611d3040e73d93036c3b7c8a649d636dc9ed7a3bc9b1ba2", size = 12818 }
132
- wheels = [
133
- { url = "https://files.pythonhosted.org/packages/ea/7b/34129c3bb87078f37b1ca64b547e8669fdde00db9fa724f0b3a8ec54bb27/aioprocessing-2.0.1-py3-none-any.whl", hash = "sha256:8fcac4b0108b72eb9df76e06a9d7e05720ee1e8330829d3fd53fa059879be586", size = 14415 },
134
- ]
135
-
136
  [[package]]
137
  name = "aiosignal"
138
  version = "1.3.1"
@@ -145,20 +136,6 @@ wheels = [
145
  { url = "https://files.pythonhosted.org/packages/76/ac/a7305707cb852b7e16ff80eaf5692309bde30e2b1100a1fcacdc8f731d97/aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17", size = 7617 },
146
  ]
147
 
148
- [[package]]
149
- name = "analytics-python"
150
- version = "1.2.9"
151
- source = { registry = "https://pypi.org/simple" }
152
- dependencies = [
153
- { name = "python-dateutil" },
154
- { name = "requests" },
155
- { name = "six" },
156
- ]
157
- sdist = { url = "https://files.pythonhosted.org/packages/6d/ae/affa8190ad884f9654483201f6fe71465bd59263b3365c0e3b544cd36203/analytics-python-1.2.9.tar.gz", hash = "sha256:f3d1ca27cb277da67c10d71a5c9c593d2a9ec99109e31409ab771b44821a86bf", size = 9706 }
158
- wheels = [
159
- { url = "https://files.pythonhosted.org/packages/d3/37/c49d052f88655cd96445c36979fb63f69ef859e167eaff5706ca7c8a8ee3/analytics_python-1.2.9-py2.py3-none-any.whl", hash = "sha256:69d88b2d3e2c350e6803487a1a802e0fd111e86665d4c9b16c3c6f5fbc6c445f", size = 13445 },
160
- ]
161
-
162
  [[package]]
163
  name = "annotated-types"
164
  version = "0.7.0"
@@ -650,15 +627,6 @@ wheels = [
650
  { url = "https://files.pythonhosted.org/packages/f5/3a/74a29b11cf2cdfcd6ba89c0cecd70b37cd1ba7b77978ce611eb7a146a832/dill-0.3.7-py3-none-any.whl", hash = "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e", size = 115254 },
651
  ]
652
 
653
- [[package]]
654
- name = "distro"
655
- version = "1.9.0"
656
- source = { registry = "https://pypi.org/simple" }
657
- sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 }
658
- wheels = [
659
- { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 },
660
- ]
661
-
662
  [[package]]
663
  name = "docker-pycreds"
664
  version = "0.4.0"
@@ -887,6 +855,9 @@ wheels = [
887
  ]
888
 
889
  [package.optional-dependencies]
 
 
 
890
  requests = [
891
  { name = "requests" },
892
  { name = "requests-toolbelt" },
@@ -1078,18 +1049,6 @@ wheels = [
1078
  { url = "https://files.pythonhosted.org/packages/d1/b3/8def84f539e7d2289a02f0524b944b15d7c75dab7628bedf1c4f0992029c/isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6", size = 92310 },
1079
  ]
1080
 
1081
- [[package]]
1082
- name = "janus"
1083
- version = "1.0.0"
1084
- source = { registry = "https://pypi.org/simple" }
1085
- dependencies = [
1086
- { name = "typing-extensions" },
1087
- ]
1088
- sdist = { url = "https://files.pythonhosted.org/packages/b8/a8/facab7275d7d3d2032f375843fe46fad1cfa604a108b5a238638d4615bdc/janus-1.0.0.tar.gz", hash = "sha256:df976f2cdcfb034b147a2d51edfc34ff6bfb12d4e2643d3ad0e10de058cb1612", size = 19043 }
1089
- wheels = [
1090
- { url = "https://files.pythonhosted.org/packages/c1/84/7bfe436fa6a4943eecb17c2cca9c84215299684575376d664ea6bf294439/janus-1.0.0-py3-none-any.whl", hash = "sha256:2596ea5482711c1ee3ef2df6c290aaf370a13c55a007826e8f7c32d696d1d00a", size = 6895 },
1091
- ]
1092
-
1093
  [[package]]
1094
  name = "jedi"
1095
  version = "0.19.1"
@@ -1114,62 +1073,6 @@ wheels = [
1114
  { url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 },
1115
  ]
1116
 
1117
- [[package]]
1118
- name = "jiter"
1119
- version = "0.6.1"
1120
- source = { registry = "https://pypi.org/simple" }
1121
- sdist = { url = "https://files.pythonhosted.org/packages/26/ef/64458dfad180debd70d9dd1ca4f607e52bb6de748e5284d748556a0d5173/jiter-0.6.1.tar.gz", hash = "sha256:e19cd21221fc139fb032e4112986656cb2739e9fe6d84c13956ab30ccc7d4449", size = 161306 }
1122
- wheels = [
1123
- { url = "https://files.pythonhosted.org/packages/0c/1d/9dede54580112c1403a9b6ef0cab33d10c58e3e7e55548d6b97bfd890748/jiter-0.6.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:d08510593cb57296851080018006dfc394070178d238b767b1879dc1013b106c", size = 290507 },
1124
- { url = "https://files.pythonhosted.org/packages/b2/28/cf5586637c8c21ad1d68bcc3361d60ade8e81524340454f21c68e8368b70/jiter-0.6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:adef59d5e2394ebbad13b7ed5e0306cceb1df92e2de688824232a91588e77aa7", size = 301642 },
1125
- { url = "https://files.pythonhosted.org/packages/6b/ab/07e67b0a9ad816f5130def05537177f2efdfe451480a584ae9fbb31cdaf8/jiter-0.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b3e02f7a27f2bcc15b7d455c9df05df8ffffcc596a2a541eeda9a3110326e7a3", size = 337364 },
1126
- { url = "https://files.pythonhosted.org/packages/25/3a/bb625446b95b7f964ac8c5e9260190262b629c1aecc9f7e9fd7730e2e2b1/jiter-0.6.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed69a7971d67b08f152c17c638f0e8c2aa207e9dd3a5fcd3cba294d39b5a8d2d", size = 353782 },
1127
- { url = "https://files.pythonhosted.org/packages/44/78/fb2bf870418360ac523ac1591a7418add2e9385e207ca6320907d22a0699/jiter-0.6.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2019d966e98f7c6df24b3b8363998575f47d26471bfb14aade37630fae836a1", size = 370761 },
1128
- { url = "https://files.pythonhosted.org/packages/ae/c3/4e68a0e52a3790df68b95a5fa0d70aae3f6d1f376adf515fb9016080ccf3/jiter-0.6.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:36c0b51a285b68311e207a76c385650322734c8717d16c2eb8af75c9d69506e7", size = 392957 },
1129
- { url = "https://files.pythonhosted.org/packages/bd/5a/d2fe7904a3f12cb2a425e83382186d23325c3316d40382cd17cd4a2205b9/jiter-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:220e0963b4fb507c525c8f58cde3da6b1be0bfddb7ffd6798fb8f2531226cdb1", size = 325211 },
1130
- { url = "https://files.pythonhosted.org/packages/d6/4a/9db9f1f7034187290ffb370c9b579e647b3e5889a541b54d113353d29a14/jiter-0.6.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aa25c7a9bf7875a141182b9c95aed487add635da01942ef7ca726e42a0c09058", size = 366109 },
1131
- { url = "https://files.pythonhosted.org/packages/0c/4b/487e2623703da76405d3ccd5f6047a7c7f9e238eda7a3043b806542e53ac/jiter-0.6.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e90552109ca8ccd07f47ca99c8a1509ced93920d271bb81780a973279974c5ab", size = 514433 },
1132
- { url = "https://files.pythonhosted.org/packages/33/18/ed55ecd669f5ce963045f9cd3404c937d51509324070af5bba17cda789fd/jiter-0.6.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:67723a011964971864e0b484b0ecfee6a14de1533cff7ffd71189e92103b38a8", size = 496282 },
1133
- { url = "https://files.pythonhosted.org/packages/c1/8e/2854fe24b38e7180396a991e34363f3e7a72ea99c4a05f2c3940ae01fda8/jiter-0.6.1-cp310-none-win32.whl", hash = "sha256:33af2b7d2bf310fdfec2da0177eab2fedab8679d1538d5b86a633ebfbbac4edd", size = 197413 },
1134
- { url = "https://files.pythonhosted.org/packages/5b/bd/ff2f6a84574e0e01759dd81255c3145cacd9f374d01efc49574b03638105/jiter-0.6.1-cp310-none-win_amd64.whl", hash = "sha256:7cea41c4c673353799906d940eee8f2d8fd1d9561d734aa921ae0f75cb9732f4", size = 200042 },
1135
- { url = "https://files.pythonhosted.org/packages/95/91/d1605f3cabcf47193ecab3712e5a4c55a19cf1a4d86ef67402325e28a44e/jiter-0.6.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:b03c24e7da7e75b170c7b2b172d9c5e463aa4b5c95696a368d52c295b3f6847f", size = 290963 },
1136
- { url = "https://files.pythonhosted.org/packages/91/35/85ef9eaef7dec14f28dd9b8a2116c07075bb2731a405b650a55fda4c74d7/jiter-0.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:47fee1be677b25d0ef79d687e238dc6ac91a8e553e1a68d0839f38c69e0ee491", size = 302639 },
1137
- { url = "https://files.pythonhosted.org/packages/3b/c7/87a809bf95eb6fbcd8b30ea1d0f922c2187590de64a7f0944615008fde45/jiter-0.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25f0d2f6e01a8a0fb0eab6d0e469058dab2be46ff3139ed2d1543475b5a1d8e7", size = 337048 },
1138
- { url = "https://files.pythonhosted.org/packages/bf/70/c31f21c109a01e6ebb0e032c8296d24761b5244b37d16bb3e9b0789a0eb0/jiter-0.6.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0b809e39e342c346df454b29bfcc7bca3d957f5d7b60e33dae42b0e5ec13e027", size = 354239 },
1139
- { url = "https://files.pythonhosted.org/packages/b9/86/6e4ef77c86175bbcc2cff6e8c6a8f98a554f88ce99b9c892c9330858d07c/jiter-0.6.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e9ac7c2f092f231f5620bef23ce2e530bd218fc046098747cc390b21b8738a7a", size = 370842 },
1140
- { url = "https://files.pythonhosted.org/packages/ba/e3/ef93fc307278d98c981b09b4f965f49312d0639ba31c2db4fe073b78a833/jiter-0.6.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e51a2d80d5fe0ffb10ed2c82b6004458be4a3f2b9c7d09ed85baa2fbf033f54b", size = 392489 },
1141
- { url = "https://files.pythonhosted.org/packages/63/6d/bff2bce7cc17bd7e0f517490cfa4444ad94d20720eb2ccd3152a6cd57a30/jiter-0.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3343d4706a2b7140e8bd49b6c8b0a82abf9194b3f0f5925a78fc69359f8fc33c", size = 325493 },
1142
- { url = "https://files.pythonhosted.org/packages/49/4b/56e8a5e2be5439e503b77d2c9479197e0d8199827d7f79b06592747c5210/jiter-0.6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:82521000d18c71e41c96960cb36e915a357bc83d63a8bed63154b89d95d05ad1", size = 365974 },
1143
- { url = "https://files.pythonhosted.org/packages/d3/9b/967752fb36ddb4b6ea7a2a8cd0ef3f167a112a2d3a2131ee544969203659/jiter-0.6.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3c843e7c1633470708a3987e8ce617ee2979ee18542d6eb25ae92861af3f1d62", size = 514144 },
1144
- { url = "https://files.pythonhosted.org/packages/58/55/9b7e0021e567731b076a8bf017a1df7d6f148bb175be2ac647a0c6433bbd/jiter-0.6.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a2e861658c3fe849efc39b06ebb98d042e4a4c51a8d7d1c3ddc3b1ea091d0784", size = 496072 },
1145
- { url = "https://files.pythonhosted.org/packages/ca/37/9e0638d2a129a1b72344a90a03b2b518c048066db0858aaf0877cb9d4acd/jiter-0.6.1-cp311-none-win32.whl", hash = "sha256:7d72fc86474862c9c6d1f87b921b70c362f2b7e8b2e3c798bb7d58e419a6bc0f", size = 197571 },
1146
- { url = "https://files.pythonhosted.org/packages/65/8a/78d337464e2b2e552d2988148e3e51da5445d910345c0d00f1982fd9aad4/jiter-0.6.1-cp311-none-win_amd64.whl", hash = "sha256:3e36a320634f33a07794bb15b8da995dccb94f944d298c8cfe2bd99b1b8a574a", size = 201994 },
1147
- { url = "https://files.pythonhosted.org/packages/2e/d5/fcdfbcea637f8b9b833597797d6b77fd7e22649b4794fc571674477c8520/jiter-0.6.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:1fad93654d5a7dcce0809aff66e883c98e2618b86656aeb2129db2cd6f26f867", size = 289279 },
1148
- { url = "https://files.pythonhosted.org/packages/9a/47/8e4a7704a267b8d1d3287b4353fc07f1f4a3541b27988ea3e49ccbf3164a/jiter-0.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4e6e340e8cd92edab7f6a3a904dbbc8137e7f4b347c49a27da9814015cc0420c", size = 300931 },
1149
- { url = "https://files.pythonhosted.org/packages/ea/4f/fbb1e11fcc3881d108359d3db8456715c9d30ddfce84dc5f9e0856e08e11/jiter-0.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:691352e5653af84ed71763c3c427cff05e4d658c508172e01e9c956dfe004aba", size = 336534 },
1150
- { url = "https://files.pythonhosted.org/packages/29/8a/4c1e1229f89127187df166de760438b2a20e5a311391ba10d2b69db0da6f/jiter-0.6.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:defee3949313c1f5b55e18be45089970cdb936eb2a0063f5020c4185db1b63c9", size = 354266 },
1151
- { url = "https://files.pythonhosted.org/packages/19/15/3f27f4b9d40bc7709a30fda99876cbe9e9f75a0ea2ef7d55f3dd4d04f927/jiter-0.6.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26d2bdd5da097e624081c6b5d416d3ee73e5b13f1703bcdadbb1881f0caa1933", size = 370492 },
1152
- { url = "https://files.pythonhosted.org/packages/1f/9d/9ec03c07325bc3a3c5b5082840b8ecb7e7ad38f3071c149b7c6fb9e78706/jiter-0.6.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18aa9d1626b61c0734b973ed7088f8a3d690d0b7f5384a5270cd04f4d9f26c86", size = 390330 },
1153
- { url = "https://files.pythonhosted.org/packages/bd/3b/612ea6daa52d64bc0cc46f2bd2e138952c58f1edbe86b17fd89e07c33d86/jiter-0.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a3567c8228afa5ddcce950631c6b17397ed178003dc9ee7e567c4c4dcae9fa0", size = 324245 },
1154
- { url = "https://files.pythonhosted.org/packages/21/0f/f3a1ffd9f203d4014b4e5045c0ea2c67ee71a7eee8bf3408dbf11007cf07/jiter-0.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e5c0507131c922defe3f04c527d6838932fcdfd69facebafd7d3574fa3395314", size = 368232 },
1155
- { url = "https://files.pythonhosted.org/packages/62/12/5d75729e0a57804852de0affc6f03b3df8518259e47ed4cd89aeeb671a71/jiter-0.6.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:540fcb224d7dc1bcf82f90f2ffb652df96f2851c031adca3c8741cb91877143b", size = 513820 },
1156
- { url = "https://files.pythonhosted.org/packages/5f/e8/e47734280e19cd465832e610e1c69367ee72947de738785c4b6fc4031e25/jiter-0.6.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e7b75436d4fa2032b2530ad989e4cb0ca74c655975e3ff49f91a1a3d7f4e1df2", size = 496023 },
1157
- { url = "https://files.pythonhosted.org/packages/52/01/5f65dd1387d39aa3fd4a98a5be1d8470e929a0cb0dd6cbfebaccd9a20ac5/jiter-0.6.1-cp312-none-win32.whl", hash = "sha256:883d2ced7c21bf06874fdeecab15014c1c6d82216765ca6deef08e335fa719e0", size = 197425 },
1158
- { url = "https://files.pythonhosted.org/packages/43/b2/bd6665030f7d7cd5d9182c62a869c3d5ceadd7bff9f1b305de9192e7dbf8/jiter-0.6.1-cp312-none-win_amd64.whl", hash = "sha256:91e63273563401aadc6c52cca64a7921c50b29372441adc104127b910e98a5b6", size = 198966 },
1159
- { url = "https://files.pythonhosted.org/packages/23/38/7b48e0149778ff4b893567c9fd997ecfcc013e290375aa7823e1f681b3d3/jiter-0.6.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:852508a54fe3228432e56019da8b69208ea622a3069458252f725d634e955b31", size = 288674 },
1160
- { url = "https://files.pythonhosted.org/packages/85/3b/96d15b483d82a637279da53a1d299dd5da6e029b9905bcd1a4e1f89b8e4f/jiter-0.6.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f491cc69ff44e5a1e8bc6bf2b94c1f98d179e1aaf4a554493c171a5b2316b701", size = 301531 },
1161
- { url = "https://files.pythonhosted.org/packages/cf/54/9681f112cbec4e197259e9db679bd4bc314f4bd24f74b9aa5e93073990b5/jiter-0.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc56c8f0b2a28ad4d8047f3ae62d25d0e9ae01b99940ec0283263a04724de1f3", size = 335954 },
1162
- { url = "https://files.pythonhosted.org/packages/4a/4d/f9c0ba82b154c66278e28348086086264ccf50622ae468ec215e4bbc2873/jiter-0.6.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:51b58f7a0d9e084a43b28b23da2b09fc5e8df6aa2b6a27de43f991293cab85fd", size = 353996 },
1163
- { url = "https://files.pythonhosted.org/packages/ee/be/7f26b258ef190f6d582e21c76c7dd1097753a2203bad3e1643f45392720a/jiter-0.6.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5f79ce15099154c90ef900d69c6b4c686b64dfe23b0114e0971f2fecd306ec6c", size = 369733 },
1164
- { url = "https://files.pythonhosted.org/packages/5f/85/037ed5261fa622312471ef5520b2135c26b29256c83adc16c8cc55dc4108/jiter-0.6.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:03a025b52009f47e53ea619175d17e4ded7c035c6fbd44935cb3ada11e1fd592", size = 389920 },
1165
- { url = "https://files.pythonhosted.org/packages/a8/f3/2e01294712faa476be9e6ceb49e424c3919e03415ded76d103378a06bb80/jiter-0.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74a8d93718137c021d9295248a87c2f9fdc0dcafead12d2930bc459ad40f885", size = 324138 },
1166
- { url = "https://files.pythonhosted.org/packages/00/45/50377814f21b6412c7785be27f2dace225af52e0af20be7af899a7e3f264/jiter-0.6.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40b03b75f903975f68199fc4ec73d546150919cb7e534f3b51e727c4d6ccca5a", size = 367610 },
1167
- { url = "https://files.pythonhosted.org/packages/af/fc/51ba30875125381bfe21a1572c176de1a7dd64a386a7498355fc100decc4/jiter-0.6.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:825651a3f04cf92a661d22cad61fc913400e33aa89b3e3ad9a6aa9dc8a1f5a71", size = 512945 },
1168
- { url = "https://files.pythonhosted.org/packages/69/60/af26168bd4916f9199ed433161e9f8a4eeda581a4e5982560d0f22dd146c/jiter-0.6.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:928bf25eb69ddb292ab8177fe69d3fbf76c7feab5fce1c09265a7dccf25d3991", size = 494963 },
1169
- { url = "https://files.pythonhosted.org/packages/f3/2f/4f3cc5c9067a6fd1020d3c4365546535a69ed77da7fba2bec24368f3662c/jiter-0.6.1-cp313-none-win32.whl", hash = "sha256:352cd24121e80d3d053fab1cc9806258cad27c53cad99b7a3cac57cf934b12e4", size = 196869 },
1170
- { url = "https://files.pythonhosted.org/packages/7a/fc/8709ee90837e94790d8b50db51c7b8a70e86e41b2c81e824c20b0ecfeba7/jiter-0.6.1-cp313-none-win_amd64.whl", hash = "sha256:be7503dd6f4bf02c2a9bacb5cc9335bc59132e7eee9d3e931b13d76fd80d7fda", size = 198919 },
1171
- ]
1172
-
1173
  [[package]]
1174
  name = "joblib"
1175
  version = "1.4.2"
@@ -1600,6 +1503,7 @@ dependencies = [
1600
  { name = "mkdocs-minify-plugin" },
1601
  { name = "mkdocstrings" },
1602
  { name = "mkdocstrings-python" },
 
1603
  { name = "pip" },
1604
  { name = "pymupdf4llm" },
1605
  { name = "pypdf2" },
@@ -1615,6 +1519,7 @@ dependencies = [
1615
  core = [
1616
  { name = "byaldi" },
1617
  { name = "firerequests" },
 
1618
  { name = "pymupdf4llm" },
1619
  { name = "python-dotenv" },
1620
  { name = "torch" },
@@ -1664,6 +1569,8 @@ requires-dist = [
1664
  { name = "mkdocstrings", marker = "extra == 'docs'", specifier = ">=0.26.1" },
1665
  { name = "mkdocstrings-python", specifier = ">=1.11.1" },
1666
  { name = "mkdocstrings-python", marker = "extra == 'docs'", specifier = ">=1.11.1" },
 
 
1667
  { name = "pip", specifier = ">=24.2" },
1668
  { name = "pymupdf4llm", specifier = ">=0.0.17" },
1669
  { name = "pymupdf4llm", marker = "extra == 'core'", specifier = ">=0.0.17" },
@@ -1678,8 +1585,8 @@ requires-dist = [
1678
  { name = "torch", specifier = ">=2.4.1" },
1679
  { name = "torch", marker = "extra == 'core'", specifier = ">=2.4.1" },
1680
  { name = "uv", specifier = ">=0.4.20" },
1681
- { name = "weave", specifier = ">=0.51.12" },
1682
- { name = "weave", marker = "extra == 'core'", specifier = ">=0.51.12" },
1683
  ]
1684
 
1685
  [[package]]
@@ -2316,25 +2223,6 @@ wheels = [
2316
  { url = "https://files.pythonhosted.org/packages/da/d3/8057f0587683ed2fcd4dbfbdfdfa807b9160b809976099d36b8f60d08f03/nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5", size = 99138 },
2317
  ]
2318
 
2319
- [[package]]
2320
- name = "openai"
2321
- version = "1.51.2"
2322
- source = { registry = "https://pypi.org/simple" }
2323
- dependencies = [
2324
- { name = "anyio" },
2325
- { name = "distro" },
2326
- { name = "httpx" },
2327
- { name = "jiter" },
2328
- { name = "pydantic" },
2329
- { name = "sniffio" },
2330
- { name = "tqdm" },
2331
- { name = "typing-extensions" },
2332
- ]
2333
- sdist = { url = "https://files.pythonhosted.org/packages/95/64/9a5279138b5ea6c2f0e5443d5d93b4510cb87fa6fe7be0c92b837087124e/openai-1.51.2.tar.gz", hash = "sha256:c6a51fac62a1ca9df85a522e462918f6bb6bc51a8897032217e453a0730123a6", size = 307755 }
2334
- wheels = [
2335
- { url = "https://files.pythonhosted.org/packages/3d/49/72198d0941b3a0264b6d13033823025c01c497f1cbfd83db310392c49c0e/openai-1.51.2-py3-none-any.whl", hash = "sha256:5c5954711cba931423e471c37ff22ae0fd3892be9b083eee36459865fbbb83fa", size = 383687 },
2336
- ]
2337
-
2338
  [[package]]
2339
  name = "overrides"
2340
  version = "7.7.0"
@@ -3737,42 +3625,6 @@ wheels = [
3737
  { url = "https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467", size = 18414 },
3738
  ]
3739
 
3740
- [[package]]
3741
- name = "tiktoken"
3742
- version = "0.8.0"
3743
- source = { registry = "https://pypi.org/simple" }
3744
- dependencies = [
3745
- { name = "regex" },
3746
- { name = "requests" },
3747
- ]
3748
- sdist = { url = "https://files.pythonhosted.org/packages/37/02/576ff3a6639e755c4f70997b2d315f56d6d71e0d046f4fb64cb81a3fb099/tiktoken-0.8.0.tar.gz", hash = "sha256:9ccbb2740f24542534369c5635cfd9b2b3c2490754a78ac8831d99f89f94eeb2", size = 35107 }
3749
- wheels = [
3750
- { url = "https://files.pythonhosted.org/packages/c9/ba/a35fad753bbca8ba0cc1b0f3402a70256a110ced7ac332cf84ba89fc87ab/tiktoken-0.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b07e33283463089c81ef1467180e3e00ab00d46c2c4bbcef0acab5f771d6695e", size = 1039905 },
3751
- { url = "https://files.pythonhosted.org/packages/91/05/13dab8fd7460391c387b3e69e14bf1e51ff71fe0a202cd2933cc3ea93fb6/tiktoken-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9269348cb650726f44dd3bbb3f9110ac19a8dcc8f54949ad3ef652ca22a38e21", size = 982417 },
3752
- { url = "https://files.pythonhosted.org/packages/e9/98/18ec4a8351a6cf4537e40cd6e19a422c10cce1ef00a2fcb716e0a96af58b/tiktoken-0.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e13f37bc4ef2d012731e93e0fef21dc3b7aea5bb9009618de9a4026844e560", size = 1144915 },
3753
- { url = "https://files.pythonhosted.org/packages/2e/28/cf3633018cbcc6deb7805b700ccd6085c9a5a7f72b38974ee0bffd56d311/tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f13d13c981511331eac0d01a59b5df7c0d4060a8be1e378672822213da51e0a2", size = 1177221 },
3754
- { url = "https://files.pythonhosted.org/packages/57/81/8a5be305cbd39d4e83a794f9e80c7f2c84b524587b7feb27c797b2046d51/tiktoken-0.8.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6b2ddbc79a22621ce8b1166afa9f9a888a664a579350dc7c09346a3b5de837d9", size = 1237398 },
3755
- { url = "https://files.pythonhosted.org/packages/dc/da/8d1cc3089a83f5cf11c2e489332752981435280285231924557350523a59/tiktoken-0.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:d8c2d0e5ba6453a290b86cd65fc51fedf247e1ba170191715b049dac1f628005", size = 884215 },
3756
- { url = "https://files.pythonhosted.org/packages/f6/1e/ca48e7bfeeccaf76f3a501bd84db1fa28b3c22c9d1a1f41af9fb7579c5f6/tiktoken-0.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d622d8011e6d6f239297efa42a2657043aaed06c4f68833550cac9e9bc723ef1", size = 1039700 },
3757
- { url = "https://files.pythonhosted.org/packages/8c/f8/f0101d98d661b34534769c3818f5af631e59c36ac6d07268fbfc89e539ce/tiktoken-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2efaf6199717b4485031b4d6edb94075e4d79177a172f38dd934d911b588d54a", size = 982413 },
3758
- { url = "https://files.pythonhosted.org/packages/ac/3c/2b95391d9bd520a73830469f80a96e3790e6c0a5ac2444f80f20b4b31051/tiktoken-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5637e425ce1fc49cf716d88df3092048359a4b3bbb7da762840426e937ada06d", size = 1144242 },
3759
- { url = "https://files.pythonhosted.org/packages/01/c4/c4a4360de845217b6aa9709c15773484b50479f36bb50419c443204e5de9/tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fb0e352d1dbe15aba082883058b3cce9e48d33101bdaac1eccf66424feb5b47", size = 1176588 },
3760
- { url = "https://files.pythonhosted.org/packages/f8/a3/ef984e976822cd6c2227c854f74d2e60cf4cd6fbfca46251199914746f78/tiktoken-0.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:56edfefe896c8f10aba372ab5706b9e3558e78db39dd497c940b47bf228bc419", size = 1237261 },
3761
- { url = "https://files.pythonhosted.org/packages/1e/86/eea2309dc258fb86c7d9b10db536434fc16420feaa3b6113df18b23db7c2/tiktoken-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:326624128590def898775b722ccc327e90b073714227175ea8febbc920ac0a99", size = 884537 },
3762
- { url = "https://files.pythonhosted.org/packages/c1/22/34b2e136a6f4af186b6640cbfd6f93400783c9ef6cd550d9eab80628d9de/tiktoken-0.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:881839cfeae051b3628d9823b2e56b5cc93a9e2efb435f4cf15f17dc45f21586", size = 1039357 },
3763
- { url = "https://files.pythonhosted.org/packages/04/d2/c793cf49c20f5855fd6ce05d080c0537d7418f22c58e71f392d5e8c8dbf7/tiktoken-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fe9399bdc3f29d428f16a2f86c3c8ec20be3eac5f53693ce4980371c3245729b", size = 982616 },
3764
- { url = "https://files.pythonhosted.org/packages/b3/a1/79846e5ef911cd5d75c844de3fa496a10c91b4b5f550aad695c5df153d72/tiktoken-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a58deb7075d5b69237a3ff4bb51a726670419db6ea62bdcd8bd80c78497d7ab", size = 1144011 },
3765
- { url = "https://files.pythonhosted.org/packages/26/32/e0e3a859136e95c85a572e4806dc58bf1ddf651108ae8b97d5f3ebe1a244/tiktoken-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2908c0d043a7d03ebd80347266b0e58440bdef5564f84f4d29fb235b5df3b04", size = 1175432 },
3766
- { url = "https://files.pythonhosted.org/packages/c7/89/926b66e9025b97e9fbabeaa59048a736fe3c3e4530a204109571104f921c/tiktoken-0.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:294440d21a2a51e12d4238e68a5972095534fe9878be57d905c476017bff99fc", size = 1236576 },
3767
- { url = "https://files.pythonhosted.org/packages/45/e2/39d4aa02a52bba73b2cd21ba4533c84425ff8786cc63c511d68c8897376e/tiktoken-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:d8f3192733ac4d77977432947d563d7e1b310b96497acd3c196c9bddb36ed9db", size = 883824 },
3768
- { url = "https://files.pythonhosted.org/packages/e3/38/802e79ba0ee5fcbf240cd624143f57744e5d411d2e9d9ad2db70d8395986/tiktoken-0.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:02be1666096aff7da6cbd7cdaa8e7917bfed3467cd64b38b1f112e96d3b06a24", size = 1039648 },
3769
- { url = "https://files.pythonhosted.org/packages/b1/da/24cdbfc302c98663fbea66f5866f7fa1048405c7564ab88483aea97c3b1a/tiktoken-0.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c94ff53c5c74b535b2cbf431d907fc13c678bbd009ee633a2aca269a04389f9a", size = 982763 },
3770
- { url = "https://files.pythonhosted.org/packages/e4/f0/0ecf79a279dfa41fc97d00adccf976ecc2556d3c08ef3e25e45eb31f665b/tiktoken-0.8.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b231f5e8982c245ee3065cd84a4712d64692348bc609d84467c57b4b72dcbc5", size = 1144417 },
3771
- { url = "https://files.pythonhosted.org/packages/ab/d3/155d2d4514f3471a25dc1d6d20549ef254e2aa9bb5b1060809b1d3b03d3a/tiktoken-0.8.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4177faa809bd55f699e88c96d9bb4635d22e3f59d635ba6fd9ffedf7150b9953", size = 1175108 },
3772
- { url = "https://files.pythonhosted.org/packages/19/eb/5989e16821ee8300ef8ee13c16effc20dfc26c777d05fbb6825e3c037b81/tiktoken-0.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5376b6f8dc4753cd81ead935c5f518fa0fbe7e133d9e25f648d8c4dabdd4bad7", size = 1236520 },
3773
- { url = "https://files.pythonhosted.org/packages/40/59/14b20465f1d1cb89cfbc96ec27e5617b2d41c79da12b5e04e96d689be2a7/tiktoken-0.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:18228d624807d66c87acd8f25fc135665617cab220671eb65b50f5d70fa51f69", size = 883849 },
3774
- ]
3775
-
3776
  [[package]]
3777
  name = "tinycss2"
3778
  version = "1.3.0"
@@ -4146,35 +3998,22 @@ wheels = [
4146
 
4147
  [[package]]
4148
  name = "weave"
4149
- version = "0.51.12"
4150
  source = { registry = "https://pypi.org/simple" }
4151
  dependencies = [
4152
- { name = "aiofiles" },
4153
- { name = "aiohttp" },
4154
- { name = "aioprocessing" },
4155
- { name = "analytics-python" },
4156
  { name = "emoji" },
4157
- { name = "gql", extra = ["requests"] },
4158
- { name = "graphql-core" },
4159
- { name = "janus" },
4160
  { name = "numpy" },
4161
- { name = "openai" },
4162
  { name = "packaging" },
4163
- { name = "pyarrow" },
4164
  { name = "pydantic" },
4165
- { name = "python-dateutil" },
4166
- { name = "python-json-logger" },
4167
  { name = "rich" },
4168
  { name = "tenacity" },
4169
- { name = "tiktoken" },
4170
- { name = "typing-extensions" },
4171
  { name = "uuid-utils" },
4172
  { name = "wandb" },
4173
- { name = "werkzeug" },
4174
  ]
4175
- sdist = { url = "https://files.pythonhosted.org/packages/b2/26/731eb697a0033aac04b9aff91c0ceb3bc13cb354cd15e86fe3c0305c0585/weave-0.51.12.tar.gz", hash = "sha256:aef8068889cd88c0a3928ca06059077cf1ba1a05bfb40b21e907d2a2b9b1ac67", size = 29494789 }
4176
  wheels = [
4177
- { url = "https://files.pythonhosted.org/packages/3e/ea/cacdbc795cd310aa9dffd7b5483b97321e35664bf3b4533258e6bd0ca7e7/weave-0.51.12-py3-none-any.whl", hash = "sha256:d508359419496064d80fe3161016504fc787c36807735245c0708f2cd1fa44a6", size = 982281 },
4178
  ]
4179
 
4180
  [[package]]
@@ -4204,18 +4043,6 @@ wheels = [
4204
  { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 },
4205
  ]
4206
 
4207
- [[package]]
4208
- name = "werkzeug"
4209
- version = "3.0.4"
4210
- source = { registry = "https://pypi.org/simple" }
4211
- dependencies = [
4212
- { name = "markupsafe" },
4213
- ]
4214
- sdist = { url = "https://files.pythonhosted.org/packages/0f/e2/6dbcaab07560909ff8f654d3a2e5a60552d937c909455211b1b36d7101dc/werkzeug-3.0.4.tar.gz", hash = "sha256:34f2371506b250df4d4f84bfe7b0921e4762525762bbd936614909fe25cd7306", size = 803966 }
4215
- wheels = [
4216
- { url = "https://files.pythonhosted.org/packages/4b/84/997bbf7c2bf2dc3f09565c6d0b4959fefe5355c18c4096cfd26d83e0785b/werkzeug-3.0.4-py3-none-any.whl", hash = "sha256:02c9eb92b7d6c06f31a782811505d2157837cea66aaede3e217c7c27c039476c", size = 227554 },
4217
- ]
4218
-
4219
  [[package]]
4220
  name = "widgetsnbextension"
4221
  version = "4.0.13"
 
124
  { url = "https://files.pythonhosted.org/packages/71/c0/6d33ac32bfbf9dd91a16c26bc37dd4763084d7f991dc848655d34e31291a/aiohttp-3.10.9-cp313-cp313-win_amd64.whl", hash = "sha256:a35ed3d03910785f7d9d6f5381f0c24002b2b888b298e6f941b2fc94c5055fcd", size = 377205 },
125
  ]
126
 
 
 
 
 
 
 
 
 
 
127
  [[package]]
128
  name = "aiosignal"
129
  version = "1.3.1"
 
136
  { url = "https://files.pythonhosted.org/packages/76/ac/a7305707cb852b7e16ff80eaf5692309bde30e2b1100a1fcacdc8f731d97/aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17", size = 7617 },
137
  ]
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  [[package]]
140
  name = "annotated-types"
141
  version = "0.7.0"
 
627
  { url = "https://files.pythonhosted.org/packages/f5/3a/74a29b11cf2cdfcd6ba89c0cecd70b37cd1ba7b77978ce611eb7a146a832/dill-0.3.7-py3-none-any.whl", hash = "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e", size = 115254 },
628
  ]
629
 
 
 
 
 
 
 
 
 
 
630
  [[package]]
631
  name = "docker-pycreds"
632
  version = "0.4.0"
 
855
  ]
856
 
857
  [package.optional-dependencies]
858
+ aiohttp = [
859
+ { name = "aiohttp" },
860
+ ]
861
  requests = [
862
  { name = "requests" },
863
  { name = "requests-toolbelt" },
 
1049
  { url = "https://files.pythonhosted.org/packages/d1/b3/8def84f539e7d2289a02f0524b944b15d7c75dab7628bedf1c4f0992029c/isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6", size = 92310 },
1050
  ]
1051
 
 
 
 
 
 
 
 
 
 
 
 
 
1052
  [[package]]
1053
  name = "jedi"
1054
  version = "0.19.1"
 
1073
  { url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 },
1074
  ]
1075
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1076
  [[package]]
1077
  name = "joblib"
1078
  version = "1.4.2"
 
1503
  { name = "mkdocs-minify-plugin" },
1504
  { name = "mkdocstrings" },
1505
  { name = "mkdocstrings-python" },
1506
+ { name = "pdf2image" },
1507
  { name = "pip" },
1508
  { name = "pymupdf4llm" },
1509
  { name = "pypdf2" },
 
1519
  core = [
1520
  { name = "byaldi" },
1521
  { name = "firerequests" },
1522
+ { name = "pdf2image" },
1523
  { name = "pymupdf4llm" },
1524
  { name = "python-dotenv" },
1525
  { name = "torch" },
 
1569
  { name = "mkdocstrings", marker = "extra == 'docs'", specifier = ">=0.26.1" },
1570
  { name = "mkdocstrings-python", specifier = ">=1.11.1" },
1571
  { name = "mkdocstrings-python", marker = "extra == 'docs'", specifier = ">=1.11.1" },
1572
+ { name = "pdf2image", specifier = ">=1.17.0" },
1573
+ { name = "pdf2image", marker = "extra == 'core'", specifier = ">=1.17.0" },
1574
  { name = "pip", specifier = ">=24.2" },
1575
  { name = "pymupdf4llm", specifier = ">=0.0.17" },
1576
  { name = "pymupdf4llm", marker = "extra == 'core'", specifier = ">=0.0.17" },
 
1585
  { name = "torch", specifier = ">=2.4.1" },
1586
  { name = "torch", marker = "extra == 'core'", specifier = ">=2.4.1" },
1587
  { name = "uv", specifier = ">=0.4.20" },
1588
+ { name = "weave", specifier = ">=0.51.14" },
1589
+ { name = "weave", marker = "extra == 'core'", specifier = ">=0.51.14" },
1590
  ]
1591
 
1592
  [[package]]
 
2223
  { url = "https://files.pythonhosted.org/packages/da/d3/8057f0587683ed2fcd4dbfbdfdfa807b9160b809976099d36b8f60d08f03/nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5", size = 99138 },
2224
  ]
2225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2226
  [[package]]
2227
  name = "overrides"
2228
  version = "7.7.0"
 
3625
  { url = "https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467", size = 18414 },
3626
  ]
3627
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3628
  [[package]]
3629
  name = "tinycss2"
3630
  version = "1.3.0"
 
3998
 
3999
  [[package]]
4000
  name = "weave"
4001
+ version = "0.51.14"
4002
  source = { registry = "https://pypi.org/simple" }
4003
  dependencies = [
 
 
 
 
4004
  { name = "emoji" },
4005
+ { name = "gql", extra = ["aiohttp", "requests"] },
 
 
4006
  { name = "numpy" },
 
4007
  { name = "packaging" },
 
4008
  { name = "pydantic" },
 
 
4009
  { name = "rich" },
4010
  { name = "tenacity" },
 
 
4011
  { name = "uuid-utils" },
4012
  { name = "wandb" },
 
4013
  ]
4014
+ sdist = { url = "https://files.pythonhosted.org/packages/e5/78/4b6b415d73b51e48defefc80136c0e3673db2d518167a0c54666447ba067/weave-0.51.14.tar.gz", hash = "sha256:a14c6d1877b38e4ad780f9fb8a40d19514696d8f450decce7b18529907d800d9", size = 200324 }
4015
  wheels = [
4016
+ { url = "https://files.pythonhosted.org/packages/f0/57/53a022e364d5b62375966f7302c86b08fcb0ce90f99653f7d49e3fa7269c/weave-0.51.14-py3-none-any.whl", hash = "sha256:9328a787e393fd7e4b8807680be2c590b0ef9c60d9ef04268c19630791af1712", size = 251898 },
4017
  ]
4018
 
4019
  [[package]]
 
4043
  { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 },
4044
  ]
4045
 
 
 
 
 
 
 
 
 
 
 
 
 
4046
  [[package]]
4047
  name = "widgetsnbextension"
4048
  version = "4.0.13"