Spaces:
Sleeping
Sleeping
geekyrakshit
commited on
Commit
·
8992b40
1
Parent(s):
d889dc6
add: ImageLoader
Browse files- docs/document_loader/load_image.md +3 -0
- medrag_multi_modal/document_loader/__init__.py +2 -1
- medrag_multi_modal/document_loader/load_image.py +116 -0
- mkdocs.yml +1 -0
- pyproject.toml +4 -2
- uv.lock +13 -186
docs/document_loader/load_image.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# Load PDF pages as images
|
2 |
+
|
3 |
+
::: medrag_multi_modal.document_loader.load_image
|
medrag_multi_modal/document_loader/__init__.py
CHANGED
@@ -1,4 +1,5 @@
|
|
|
|
1 |
from .load_text import TextLoader
|
2 |
from .load_text_image import TextImageLoader
|
3 |
|
4 |
-
__all__ = ["TextLoader", "TextImageLoader"]
|
|
|
1 |
+
from .load_image import ImageLoader
|
2 |
from .load_text import TextLoader
|
3 |
from .load_text_image import TextImageLoader
|
4 |
|
5 |
+
__all__ = ["TextLoader", "TextImageLoader", "ImageLoader"]
|
medrag_multi_modal/document_loader/load_image.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
|
3 |
+
import rich
|
4 |
+
import weave
|
5 |
+
from pdf2image.pdf2image import convert_from_path
|
6 |
+
from PIL import Image
|
7 |
+
|
8 |
+
from medrag_multi_modal.document_loader.load_text import TextLoader
|
9 |
+
|
10 |
+
|
11 |
+
class ImageLoader(TextLoader):
|
12 |
+
"""
|
13 |
+
ImageLoader is a class that extends the TextLoader class to handle the extraction and
|
14 |
+
loading of images from a PDF file.
|
15 |
+
|
16 |
+
This class provides functionality to convert specific pages of a PDF document into images
|
17 |
+
and optionally publish these images to a Weave dataset.
|
18 |
+
|
19 |
+
!!! example "Example Usage"
|
20 |
+
```python
|
21 |
+
import asyncio
|
22 |
+
|
23 |
+
import weave
|
24 |
+
from dotenv import load_dotenv
|
25 |
+
|
26 |
+
from medrag_multi_modal.document_loader import ImageLoader
|
27 |
+
|
28 |
+
load_dotenv()
|
29 |
+
weave.init(project_name="ml-colabs/medrag-multi-modal")
|
30 |
+
url = "https://archive.org/download/GraysAnatomy41E2015PDF/Grays%20Anatomy-41%20E%20%282015%29%20%5BPDF%5D.pdf"
|
31 |
+
loader = ImageLoader(
|
32 |
+
url=url,
|
33 |
+
document_name="Gray's Anatomy",
|
34 |
+
document_file_path="grays_anatomy.pdf",
|
35 |
+
)
|
36 |
+
asyncio.run(
|
37 |
+
loader.load_data(
|
38 |
+
start_page=31,
|
39 |
+
end_page=33,
|
40 |
+
weave_dataset_name="grays-anatomy-text",
|
41 |
+
)
|
42 |
+
)
|
43 |
+
```
|
44 |
+
|
45 |
+
Args:
|
46 |
+
url (str): The URL of the PDF document.
|
47 |
+
document_name (str): The name of the document.
|
48 |
+
document_file_path (str): The path to the PDF file.
|
49 |
+
"""
|
50 |
+
|
51 |
+
def __init__(self, url: str, document_name: str, document_file_path: str):
|
52 |
+
super().__init__(url, document_name, document_file_path)
|
53 |
+
|
54 |
+
def extract_data_from_pdf_file(
|
55 |
+
self, pdf_file: str, page_number: int
|
56 |
+
) -> Image.Image:
|
57 |
+
image = convert_from_path(
|
58 |
+
pdf_file, first_page=page_number + 1, last_page=page_number + 1
|
59 |
+
)[0]
|
60 |
+
return image
|
61 |
+
|
62 |
+
async def load_data(self, start_page: int, end_page: int, weave_dataset_name: str):
|
63 |
+
"""
|
64 |
+
Asynchronously loads images from a PDF file specified by a URL or local file path,
|
65 |
+
processes the images for the specified range of pages, and optionally publishes them
|
66 |
+
to a Weave dataset.
|
67 |
+
|
68 |
+
This function reads the specified range of pages from a PDF document, converts each page
|
69 |
+
to an image using the `pdf2image` library, and returns a list of dictionaries containing
|
70 |
+
the image and metadata for each processed page. It processes pages concurrently using
|
71 |
+
`asyncio` for efficiency. If a weave_dataset_name is provided, the processed pages are
|
72 |
+
published to a Weave dataset.
|
73 |
+
|
74 |
+
Args:
|
75 |
+
start_page (int): The starting page index (0-based) to process.
|
76 |
+
end_page (int): The ending page index (0-based) to process.
|
77 |
+
weave_dataset_name (str): The name of the Weave dataset to publish the pages to,
|
78 |
+
if provided.
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
list[dict]: A list of dictionaries, each containing the image and metadata for a
|
82 |
+
processed page.
|
83 |
+
|
84 |
+
Raises:
|
85 |
+
ValueError: If the specified start_page or end_page is out of bounds of the document's
|
86 |
+
page count.
|
87 |
+
"""
|
88 |
+
start_page, end_page = self.get_page_indices(start_page, end_page)
|
89 |
+
pages = []
|
90 |
+
processed_pages_counter: int = 1
|
91 |
+
total_pages = end_page - start_page
|
92 |
+
|
93 |
+
async def process_page(page_idx):
|
94 |
+
nonlocal processed_pages_counter
|
95 |
+
pages.append(
|
96 |
+
{
|
97 |
+
"image": convert_from_path(
|
98 |
+
self.document_file_path,
|
99 |
+
first_page=page_idx + 1,
|
100 |
+
last_page=page_idx + 1,
|
101 |
+
)[0],
|
102 |
+
"page_idx": page_idx,
|
103 |
+
"document_name": self.document_name,
|
104 |
+
"file_path": self.document_file_path,
|
105 |
+
"file_url": self.url,
|
106 |
+
}
|
107 |
+
)
|
108 |
+
rich.print(f"Processed pages {processed_pages_counter}/{total_pages}")
|
109 |
+
processed_pages_counter += 1
|
110 |
+
|
111 |
+
tasks = [process_page(page_idx) for page_idx in range(start_page, end_page)]
|
112 |
+
for task in asyncio.as_completed(tasks):
|
113 |
+
await task
|
114 |
+
if weave_dataset_name:
|
115 |
+
weave.publish(weave.Dataset(name=weave_dataset_name, rows=pages))
|
116 |
+
return pages
|
mkdocs.yml
CHANGED
@@ -62,5 +62,6 @@ nav:
|
|
62 |
- Document Loader:
|
63 |
- Text Loader: 'document_loader/load_text.md'
|
64 |
- Text and Image Loader: 'document_loader/load_text_image.md'
|
|
|
65 |
|
66 |
repo_url: https://github.com/soumik12345/medrag-multi-modal
|
|
|
62 |
- Document Loader:
|
63 |
- Text Loader: 'document_loader/load_text.md'
|
64 |
- Text and Image Loader: 'document_loader/load_text_image.md'
|
65 |
+
- Image Loader: 'document_loader/load_image.md'
|
66 |
|
67 |
repo_url: https://github.com/soumik12345/medrag-multi-modal
|
pyproject.toml
CHANGED
@@ -7,10 +7,11 @@ requires-python = ">=3.10"
|
|
7 |
dependencies = [
|
8 |
"Byaldi>=0.0.5",
|
9 |
"firerequests>=0.0.7",
|
|
|
10 |
"python-dotenv>=1.0.1",
|
11 |
"pymupdf4llm>=0.0.17",
|
12 |
"torch>=2.4.1",
|
13 |
-
"weave>=0.51.
|
14 |
"pip>=24.2",
|
15 |
"uv>=0.4.20",
|
16 |
"pytest>=8.3.3",
|
@@ -32,10 +33,11 @@ dependencies = [
|
|
32 |
core = [
|
33 |
"Byaldi>=0.0.5",
|
34 |
"firerequests>=0.0.7",
|
|
|
35 |
"python-dotenv>=1.0.1",
|
36 |
"pymupdf4llm>=0.0.17",
|
37 |
"torch>=2.4.1",
|
38 |
-
"weave>=0.51.
|
39 |
]
|
40 |
|
41 |
dev = [
|
|
|
7 |
dependencies = [
|
8 |
"Byaldi>=0.0.5",
|
9 |
"firerequests>=0.0.7",
|
10 |
+
"pdf2image>=1.17.0",
|
11 |
"python-dotenv>=1.0.1",
|
12 |
"pymupdf4llm>=0.0.17",
|
13 |
"torch>=2.4.1",
|
14 |
+
"weave>=0.51.14",
|
15 |
"pip>=24.2",
|
16 |
"uv>=0.4.20",
|
17 |
"pytest>=8.3.3",
|
|
|
33 |
core = [
|
34 |
"Byaldi>=0.0.5",
|
35 |
"firerequests>=0.0.7",
|
36 |
+
"pdf2image>=1.17.0",
|
37 |
"python-dotenv>=1.0.1",
|
38 |
"pymupdf4llm>=0.0.17",
|
39 |
"torch>=2.4.1",
|
40 |
+
"weave>=0.51.14",
|
41 |
]
|
42 |
|
43 |
dev = [
|
uv.lock
CHANGED
@@ -124,15 +124,6 @@ wheels = [
|
|
124 |
{ url = "https://files.pythonhosted.org/packages/71/c0/6d33ac32bfbf9dd91a16c26bc37dd4763084d7f991dc848655d34e31291a/aiohttp-3.10.9-cp313-cp313-win_amd64.whl", hash = "sha256:a35ed3d03910785f7d9d6f5381f0c24002b2b888b298e6f941b2fc94c5055fcd", size = 377205 },
|
125 |
]
|
126 |
|
127 |
-
[[package]]
|
128 |
-
name = "aioprocessing"
|
129 |
-
version = "2.0.1"
|
130 |
-
source = { registry = "https://pypi.org/simple" }
|
131 |
-
sdist = { url = "https://files.pythonhosted.org/packages/4d/85/9a75151e7049bf144c01384279201d82d99484bd658f8e6fb013552d8724/aioprocessing-2.0.1.tar.gz", hash = "sha256:fe01c7b1a38c78168611d3040e73d93036c3b7c8a649d636dc9ed7a3bc9b1ba2", size = 12818 }
|
132 |
-
wheels = [
|
133 |
-
{ url = "https://files.pythonhosted.org/packages/ea/7b/34129c3bb87078f37b1ca64b547e8669fdde00db9fa724f0b3a8ec54bb27/aioprocessing-2.0.1-py3-none-any.whl", hash = "sha256:8fcac4b0108b72eb9df76e06a9d7e05720ee1e8330829d3fd53fa059879be586", size = 14415 },
|
134 |
-
]
|
135 |
-
|
136 |
[[package]]
|
137 |
name = "aiosignal"
|
138 |
version = "1.3.1"
|
@@ -145,20 +136,6 @@ wheels = [
|
|
145 |
{ url = "https://files.pythonhosted.org/packages/76/ac/a7305707cb852b7e16ff80eaf5692309bde30e2b1100a1fcacdc8f731d97/aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17", size = 7617 },
|
146 |
]
|
147 |
|
148 |
-
[[package]]
|
149 |
-
name = "analytics-python"
|
150 |
-
version = "1.2.9"
|
151 |
-
source = { registry = "https://pypi.org/simple" }
|
152 |
-
dependencies = [
|
153 |
-
{ name = "python-dateutil" },
|
154 |
-
{ name = "requests" },
|
155 |
-
{ name = "six" },
|
156 |
-
]
|
157 |
-
sdist = { url = "https://files.pythonhosted.org/packages/6d/ae/affa8190ad884f9654483201f6fe71465bd59263b3365c0e3b544cd36203/analytics-python-1.2.9.tar.gz", hash = "sha256:f3d1ca27cb277da67c10d71a5c9c593d2a9ec99109e31409ab771b44821a86bf", size = 9706 }
|
158 |
-
wheels = [
|
159 |
-
{ url = "https://files.pythonhosted.org/packages/d3/37/c49d052f88655cd96445c36979fb63f69ef859e167eaff5706ca7c8a8ee3/analytics_python-1.2.9-py2.py3-none-any.whl", hash = "sha256:69d88b2d3e2c350e6803487a1a802e0fd111e86665d4c9b16c3c6f5fbc6c445f", size = 13445 },
|
160 |
-
]
|
161 |
-
|
162 |
[[package]]
|
163 |
name = "annotated-types"
|
164 |
version = "0.7.0"
|
@@ -650,15 +627,6 @@ wheels = [
|
|
650 |
{ url = "https://files.pythonhosted.org/packages/f5/3a/74a29b11cf2cdfcd6ba89c0cecd70b37cd1ba7b77978ce611eb7a146a832/dill-0.3.7-py3-none-any.whl", hash = "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e", size = 115254 },
|
651 |
]
|
652 |
|
653 |
-
[[package]]
|
654 |
-
name = "distro"
|
655 |
-
version = "1.9.0"
|
656 |
-
source = { registry = "https://pypi.org/simple" }
|
657 |
-
sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 }
|
658 |
-
wheels = [
|
659 |
-
{ url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 },
|
660 |
-
]
|
661 |
-
|
662 |
[[package]]
|
663 |
name = "docker-pycreds"
|
664 |
version = "0.4.0"
|
@@ -887,6 +855,9 @@ wheels = [
|
|
887 |
]
|
888 |
|
889 |
[package.optional-dependencies]
|
|
|
|
|
|
|
890 |
requests = [
|
891 |
{ name = "requests" },
|
892 |
{ name = "requests-toolbelt" },
|
@@ -1078,18 +1049,6 @@ wheels = [
|
|
1078 |
{ url = "https://files.pythonhosted.org/packages/d1/b3/8def84f539e7d2289a02f0524b944b15d7c75dab7628bedf1c4f0992029c/isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6", size = 92310 },
|
1079 |
]
|
1080 |
|
1081 |
-
[[package]]
|
1082 |
-
name = "janus"
|
1083 |
-
version = "1.0.0"
|
1084 |
-
source = { registry = "https://pypi.org/simple" }
|
1085 |
-
dependencies = [
|
1086 |
-
{ name = "typing-extensions" },
|
1087 |
-
]
|
1088 |
-
sdist = { url = "https://files.pythonhosted.org/packages/b8/a8/facab7275d7d3d2032f375843fe46fad1cfa604a108b5a238638d4615bdc/janus-1.0.0.tar.gz", hash = "sha256:df976f2cdcfb034b147a2d51edfc34ff6bfb12d4e2643d3ad0e10de058cb1612", size = 19043 }
|
1089 |
-
wheels = [
|
1090 |
-
{ url = "https://files.pythonhosted.org/packages/c1/84/7bfe436fa6a4943eecb17c2cca9c84215299684575376d664ea6bf294439/janus-1.0.0-py3-none-any.whl", hash = "sha256:2596ea5482711c1ee3ef2df6c290aaf370a13c55a007826e8f7c32d696d1d00a", size = 6895 },
|
1091 |
-
]
|
1092 |
-
|
1093 |
[[package]]
|
1094 |
name = "jedi"
|
1095 |
version = "0.19.1"
|
@@ -1114,62 +1073,6 @@ wheels = [
|
|
1114 |
{ url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 },
|
1115 |
]
|
1116 |
|
1117 |
-
[[package]]
|
1118 |
-
name = "jiter"
|
1119 |
-
version = "0.6.1"
|
1120 |
-
source = { registry = "https://pypi.org/simple" }
|
1121 |
-
sdist = { url = "https://files.pythonhosted.org/packages/26/ef/64458dfad180debd70d9dd1ca4f607e52bb6de748e5284d748556a0d5173/jiter-0.6.1.tar.gz", hash = "sha256:e19cd21221fc139fb032e4112986656cb2739e9fe6d84c13956ab30ccc7d4449", size = 161306 }
|
1122 |
-
wheels = [
|
1123 |
-
{ url = "https://files.pythonhosted.org/packages/0c/1d/9dede54580112c1403a9b6ef0cab33d10c58e3e7e55548d6b97bfd890748/jiter-0.6.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:d08510593cb57296851080018006dfc394070178d238b767b1879dc1013b106c", size = 290507 },
|
1124 |
-
{ url = "https://files.pythonhosted.org/packages/b2/28/cf5586637c8c21ad1d68bcc3361d60ade8e81524340454f21c68e8368b70/jiter-0.6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:adef59d5e2394ebbad13b7ed5e0306cceb1df92e2de688824232a91588e77aa7", size = 301642 },
|
1125 |
-
{ url = "https://files.pythonhosted.org/packages/6b/ab/07e67b0a9ad816f5130def05537177f2efdfe451480a584ae9fbb31cdaf8/jiter-0.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b3e02f7a27f2bcc15b7d455c9df05df8ffffcc596a2a541eeda9a3110326e7a3", size = 337364 },
|
1126 |
-
{ url = "https://files.pythonhosted.org/packages/25/3a/bb625446b95b7f964ac8c5e9260190262b629c1aecc9f7e9fd7730e2e2b1/jiter-0.6.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed69a7971d67b08f152c17c638f0e8c2aa207e9dd3a5fcd3cba294d39b5a8d2d", size = 353782 },
|
1127 |
-
{ url = "https://files.pythonhosted.org/packages/44/78/fb2bf870418360ac523ac1591a7418add2e9385e207ca6320907d22a0699/jiter-0.6.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2019d966e98f7c6df24b3b8363998575f47d26471bfb14aade37630fae836a1", size = 370761 },
|
1128 |
-
{ url = "https://files.pythonhosted.org/packages/ae/c3/4e68a0e52a3790df68b95a5fa0d70aae3f6d1f376adf515fb9016080ccf3/jiter-0.6.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:36c0b51a285b68311e207a76c385650322734c8717d16c2eb8af75c9d69506e7", size = 392957 },
|
1129 |
-
{ url = "https://files.pythonhosted.org/packages/bd/5a/d2fe7904a3f12cb2a425e83382186d23325c3316d40382cd17cd4a2205b9/jiter-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:220e0963b4fb507c525c8f58cde3da6b1be0bfddb7ffd6798fb8f2531226cdb1", size = 325211 },
|
1130 |
-
{ url = "https://files.pythonhosted.org/packages/d6/4a/9db9f1f7034187290ffb370c9b579e647b3e5889a541b54d113353d29a14/jiter-0.6.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aa25c7a9bf7875a141182b9c95aed487add635da01942ef7ca726e42a0c09058", size = 366109 },
|
1131 |
-
{ url = "https://files.pythonhosted.org/packages/0c/4b/487e2623703da76405d3ccd5f6047a7c7f9e238eda7a3043b806542e53ac/jiter-0.6.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e90552109ca8ccd07f47ca99c8a1509ced93920d271bb81780a973279974c5ab", size = 514433 },
|
1132 |
-
{ url = "https://files.pythonhosted.org/packages/33/18/ed55ecd669f5ce963045f9cd3404c937d51509324070af5bba17cda789fd/jiter-0.6.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:67723a011964971864e0b484b0ecfee6a14de1533cff7ffd71189e92103b38a8", size = 496282 },
|
1133 |
-
{ url = "https://files.pythonhosted.org/packages/c1/8e/2854fe24b38e7180396a991e34363f3e7a72ea99c4a05f2c3940ae01fda8/jiter-0.6.1-cp310-none-win32.whl", hash = "sha256:33af2b7d2bf310fdfec2da0177eab2fedab8679d1538d5b86a633ebfbbac4edd", size = 197413 },
|
1134 |
-
{ url = "https://files.pythonhosted.org/packages/5b/bd/ff2f6a84574e0e01759dd81255c3145cacd9f374d01efc49574b03638105/jiter-0.6.1-cp310-none-win_amd64.whl", hash = "sha256:7cea41c4c673353799906d940eee8f2d8fd1d9561d734aa921ae0f75cb9732f4", size = 200042 },
|
1135 |
-
{ url = "https://files.pythonhosted.org/packages/95/91/d1605f3cabcf47193ecab3712e5a4c55a19cf1a4d86ef67402325e28a44e/jiter-0.6.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:b03c24e7da7e75b170c7b2b172d9c5e463aa4b5c95696a368d52c295b3f6847f", size = 290963 },
|
1136 |
-
{ url = "https://files.pythonhosted.org/packages/91/35/85ef9eaef7dec14f28dd9b8a2116c07075bb2731a405b650a55fda4c74d7/jiter-0.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:47fee1be677b25d0ef79d687e238dc6ac91a8e553e1a68d0839f38c69e0ee491", size = 302639 },
|
1137 |
-
{ url = "https://files.pythonhosted.org/packages/3b/c7/87a809bf95eb6fbcd8b30ea1d0f922c2187590de64a7f0944615008fde45/jiter-0.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25f0d2f6e01a8a0fb0eab6d0e469058dab2be46ff3139ed2d1543475b5a1d8e7", size = 337048 },
|
1138 |
-
{ url = "https://files.pythonhosted.org/packages/bf/70/c31f21c109a01e6ebb0e032c8296d24761b5244b37d16bb3e9b0789a0eb0/jiter-0.6.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0b809e39e342c346df454b29bfcc7bca3d957f5d7b60e33dae42b0e5ec13e027", size = 354239 },
|
1139 |
-
{ url = "https://files.pythonhosted.org/packages/b9/86/6e4ef77c86175bbcc2cff6e8c6a8f98a554f88ce99b9c892c9330858d07c/jiter-0.6.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e9ac7c2f092f231f5620bef23ce2e530bd218fc046098747cc390b21b8738a7a", size = 370842 },
|
1140 |
-
{ url = "https://files.pythonhosted.org/packages/ba/e3/ef93fc307278d98c981b09b4f965f49312d0639ba31c2db4fe073b78a833/jiter-0.6.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e51a2d80d5fe0ffb10ed2c82b6004458be4a3f2b9c7d09ed85baa2fbf033f54b", size = 392489 },
|
1141 |
-
{ url = "https://files.pythonhosted.org/packages/63/6d/bff2bce7cc17bd7e0f517490cfa4444ad94d20720eb2ccd3152a6cd57a30/jiter-0.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3343d4706a2b7140e8bd49b6c8b0a82abf9194b3f0f5925a78fc69359f8fc33c", size = 325493 },
|
1142 |
-
{ url = "https://files.pythonhosted.org/packages/49/4b/56e8a5e2be5439e503b77d2c9479197e0d8199827d7f79b06592747c5210/jiter-0.6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:82521000d18c71e41c96960cb36e915a357bc83d63a8bed63154b89d95d05ad1", size = 365974 },
|
1143 |
-
{ url = "https://files.pythonhosted.org/packages/d3/9b/967752fb36ddb4b6ea7a2a8cd0ef3f167a112a2d3a2131ee544969203659/jiter-0.6.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3c843e7c1633470708a3987e8ce617ee2979ee18542d6eb25ae92861af3f1d62", size = 514144 },
|
1144 |
-
{ url = "https://files.pythonhosted.org/packages/58/55/9b7e0021e567731b076a8bf017a1df7d6f148bb175be2ac647a0c6433bbd/jiter-0.6.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a2e861658c3fe849efc39b06ebb98d042e4a4c51a8d7d1c3ddc3b1ea091d0784", size = 496072 },
|
1145 |
-
{ url = "https://files.pythonhosted.org/packages/ca/37/9e0638d2a129a1b72344a90a03b2b518c048066db0858aaf0877cb9d4acd/jiter-0.6.1-cp311-none-win32.whl", hash = "sha256:7d72fc86474862c9c6d1f87b921b70c362f2b7e8b2e3c798bb7d58e419a6bc0f", size = 197571 },
|
1146 |
-
{ url = "https://files.pythonhosted.org/packages/65/8a/78d337464e2b2e552d2988148e3e51da5445d910345c0d00f1982fd9aad4/jiter-0.6.1-cp311-none-win_amd64.whl", hash = "sha256:3e36a320634f33a07794bb15b8da995dccb94f944d298c8cfe2bd99b1b8a574a", size = 201994 },
|
1147 |
-
{ url = "https://files.pythonhosted.org/packages/2e/d5/fcdfbcea637f8b9b833597797d6b77fd7e22649b4794fc571674477c8520/jiter-0.6.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:1fad93654d5a7dcce0809aff66e883c98e2618b86656aeb2129db2cd6f26f867", size = 289279 },
|
1148 |
-
{ url = "https://files.pythonhosted.org/packages/9a/47/8e4a7704a267b8d1d3287b4353fc07f1f4a3541b27988ea3e49ccbf3164a/jiter-0.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4e6e340e8cd92edab7f6a3a904dbbc8137e7f4b347c49a27da9814015cc0420c", size = 300931 },
|
1149 |
-
{ url = "https://files.pythonhosted.org/packages/ea/4f/fbb1e11fcc3881d108359d3db8456715c9d30ddfce84dc5f9e0856e08e11/jiter-0.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:691352e5653af84ed71763c3c427cff05e4d658c508172e01e9c956dfe004aba", size = 336534 },
|
1150 |
-
{ url = "https://files.pythonhosted.org/packages/29/8a/4c1e1229f89127187df166de760438b2a20e5a311391ba10d2b69db0da6f/jiter-0.6.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:defee3949313c1f5b55e18be45089970cdb936eb2a0063f5020c4185db1b63c9", size = 354266 },
|
1151 |
-
{ url = "https://files.pythonhosted.org/packages/19/15/3f27f4b9d40bc7709a30fda99876cbe9e9f75a0ea2ef7d55f3dd4d04f927/jiter-0.6.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26d2bdd5da097e624081c6b5d416d3ee73e5b13f1703bcdadbb1881f0caa1933", size = 370492 },
|
1152 |
-
{ url = "https://files.pythonhosted.org/packages/1f/9d/9ec03c07325bc3a3c5b5082840b8ecb7e7ad38f3071c149b7c6fb9e78706/jiter-0.6.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18aa9d1626b61c0734b973ed7088f8a3d690d0b7f5384a5270cd04f4d9f26c86", size = 390330 },
|
1153 |
-
{ url = "https://files.pythonhosted.org/packages/bd/3b/612ea6daa52d64bc0cc46f2bd2e138952c58f1edbe86b17fd89e07c33d86/jiter-0.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a3567c8228afa5ddcce950631c6b17397ed178003dc9ee7e567c4c4dcae9fa0", size = 324245 },
|
1154 |
-
{ url = "https://files.pythonhosted.org/packages/21/0f/f3a1ffd9f203d4014b4e5045c0ea2c67ee71a7eee8bf3408dbf11007cf07/jiter-0.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e5c0507131c922defe3f04c527d6838932fcdfd69facebafd7d3574fa3395314", size = 368232 },
|
1155 |
-
{ url = "https://files.pythonhosted.org/packages/62/12/5d75729e0a57804852de0affc6f03b3df8518259e47ed4cd89aeeb671a71/jiter-0.6.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:540fcb224d7dc1bcf82f90f2ffb652df96f2851c031adca3c8741cb91877143b", size = 513820 },
|
1156 |
-
{ url = "https://files.pythonhosted.org/packages/5f/e8/e47734280e19cd465832e610e1c69367ee72947de738785c4b6fc4031e25/jiter-0.6.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e7b75436d4fa2032b2530ad989e4cb0ca74c655975e3ff49f91a1a3d7f4e1df2", size = 496023 },
|
1157 |
-
{ url = "https://files.pythonhosted.org/packages/52/01/5f65dd1387d39aa3fd4a98a5be1d8470e929a0cb0dd6cbfebaccd9a20ac5/jiter-0.6.1-cp312-none-win32.whl", hash = "sha256:883d2ced7c21bf06874fdeecab15014c1c6d82216765ca6deef08e335fa719e0", size = 197425 },
|
1158 |
-
{ url = "https://files.pythonhosted.org/packages/43/b2/bd6665030f7d7cd5d9182c62a869c3d5ceadd7bff9f1b305de9192e7dbf8/jiter-0.6.1-cp312-none-win_amd64.whl", hash = "sha256:91e63273563401aadc6c52cca64a7921c50b29372441adc104127b910e98a5b6", size = 198966 },
|
1159 |
-
{ url = "https://files.pythonhosted.org/packages/23/38/7b48e0149778ff4b893567c9fd997ecfcc013e290375aa7823e1f681b3d3/jiter-0.6.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:852508a54fe3228432e56019da8b69208ea622a3069458252f725d634e955b31", size = 288674 },
|
1160 |
-
{ url = "https://files.pythonhosted.org/packages/85/3b/96d15b483d82a637279da53a1d299dd5da6e029b9905bcd1a4e1f89b8e4f/jiter-0.6.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f491cc69ff44e5a1e8bc6bf2b94c1f98d179e1aaf4a554493c171a5b2316b701", size = 301531 },
|
1161 |
-
{ url = "https://files.pythonhosted.org/packages/cf/54/9681f112cbec4e197259e9db679bd4bc314f4bd24f74b9aa5e93073990b5/jiter-0.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc56c8f0b2a28ad4d8047f3ae62d25d0e9ae01b99940ec0283263a04724de1f3", size = 335954 },
|
1162 |
-
{ url = "https://files.pythonhosted.org/packages/4a/4d/f9c0ba82b154c66278e28348086086264ccf50622ae468ec215e4bbc2873/jiter-0.6.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:51b58f7a0d9e084a43b28b23da2b09fc5e8df6aa2b6a27de43f991293cab85fd", size = 353996 },
|
1163 |
-
{ url = "https://files.pythonhosted.org/packages/ee/be/7f26b258ef190f6d582e21c76c7dd1097753a2203bad3e1643f45392720a/jiter-0.6.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5f79ce15099154c90ef900d69c6b4c686b64dfe23b0114e0971f2fecd306ec6c", size = 369733 },
|
1164 |
-
{ url = "https://files.pythonhosted.org/packages/5f/85/037ed5261fa622312471ef5520b2135c26b29256c83adc16c8cc55dc4108/jiter-0.6.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:03a025b52009f47e53ea619175d17e4ded7c035c6fbd44935cb3ada11e1fd592", size = 389920 },
|
1165 |
-
{ url = "https://files.pythonhosted.org/packages/a8/f3/2e01294712faa476be9e6ceb49e424c3919e03415ded76d103378a06bb80/jiter-0.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74a8d93718137c021d9295248a87c2f9fdc0dcafead12d2930bc459ad40f885", size = 324138 },
|
1166 |
-
{ url = "https://files.pythonhosted.org/packages/00/45/50377814f21b6412c7785be27f2dace225af52e0af20be7af899a7e3f264/jiter-0.6.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40b03b75f903975f68199fc4ec73d546150919cb7e534f3b51e727c4d6ccca5a", size = 367610 },
|
1167 |
-
{ url = "https://files.pythonhosted.org/packages/af/fc/51ba30875125381bfe21a1572c176de1a7dd64a386a7498355fc100decc4/jiter-0.6.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:825651a3f04cf92a661d22cad61fc913400e33aa89b3e3ad9a6aa9dc8a1f5a71", size = 512945 },
|
1168 |
-
{ url = "https://files.pythonhosted.org/packages/69/60/af26168bd4916f9199ed433161e9f8a4eeda581a4e5982560d0f22dd146c/jiter-0.6.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:928bf25eb69ddb292ab8177fe69d3fbf76c7feab5fce1c09265a7dccf25d3991", size = 494963 },
|
1169 |
-
{ url = "https://files.pythonhosted.org/packages/f3/2f/4f3cc5c9067a6fd1020d3c4365546535a69ed77da7fba2bec24368f3662c/jiter-0.6.1-cp313-none-win32.whl", hash = "sha256:352cd24121e80d3d053fab1cc9806258cad27c53cad99b7a3cac57cf934b12e4", size = 196869 },
|
1170 |
-
{ url = "https://files.pythonhosted.org/packages/7a/fc/8709ee90837e94790d8b50db51c7b8a70e86e41b2c81e824c20b0ecfeba7/jiter-0.6.1-cp313-none-win_amd64.whl", hash = "sha256:be7503dd6f4bf02c2a9bacb5cc9335bc59132e7eee9d3e931b13d76fd80d7fda", size = 198919 },
|
1171 |
-
]
|
1172 |
-
|
1173 |
[[package]]
|
1174 |
name = "joblib"
|
1175 |
version = "1.4.2"
|
@@ -1600,6 +1503,7 @@ dependencies = [
|
|
1600 |
{ name = "mkdocs-minify-plugin" },
|
1601 |
{ name = "mkdocstrings" },
|
1602 |
{ name = "mkdocstrings-python" },
|
|
|
1603 |
{ name = "pip" },
|
1604 |
{ name = "pymupdf4llm" },
|
1605 |
{ name = "pypdf2" },
|
@@ -1615,6 +1519,7 @@ dependencies = [
|
|
1615 |
core = [
|
1616 |
{ name = "byaldi" },
|
1617 |
{ name = "firerequests" },
|
|
|
1618 |
{ name = "pymupdf4llm" },
|
1619 |
{ name = "python-dotenv" },
|
1620 |
{ name = "torch" },
|
@@ -1664,6 +1569,8 @@ requires-dist = [
|
|
1664 |
{ name = "mkdocstrings", marker = "extra == 'docs'", specifier = ">=0.26.1" },
|
1665 |
{ name = "mkdocstrings-python", specifier = ">=1.11.1" },
|
1666 |
{ name = "mkdocstrings-python", marker = "extra == 'docs'", specifier = ">=1.11.1" },
|
|
|
|
|
1667 |
{ name = "pip", specifier = ">=24.2" },
|
1668 |
{ name = "pymupdf4llm", specifier = ">=0.0.17" },
|
1669 |
{ name = "pymupdf4llm", marker = "extra == 'core'", specifier = ">=0.0.17" },
|
@@ -1678,8 +1585,8 @@ requires-dist = [
|
|
1678 |
{ name = "torch", specifier = ">=2.4.1" },
|
1679 |
{ name = "torch", marker = "extra == 'core'", specifier = ">=2.4.1" },
|
1680 |
{ name = "uv", specifier = ">=0.4.20" },
|
1681 |
-
{ name = "weave", specifier = ">=0.51.
|
1682 |
-
{ name = "weave", marker = "extra == 'core'", specifier = ">=0.51.
|
1683 |
]
|
1684 |
|
1685 |
[[package]]
|
@@ -2316,25 +2223,6 @@ wheels = [
|
|
2316 |
{ url = "https://files.pythonhosted.org/packages/da/d3/8057f0587683ed2fcd4dbfbdfdfa807b9160b809976099d36b8f60d08f03/nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5", size = 99138 },
|
2317 |
]
|
2318 |
|
2319 |
-
[[package]]
|
2320 |
-
name = "openai"
|
2321 |
-
version = "1.51.2"
|
2322 |
-
source = { registry = "https://pypi.org/simple" }
|
2323 |
-
dependencies = [
|
2324 |
-
{ name = "anyio" },
|
2325 |
-
{ name = "distro" },
|
2326 |
-
{ name = "httpx" },
|
2327 |
-
{ name = "jiter" },
|
2328 |
-
{ name = "pydantic" },
|
2329 |
-
{ name = "sniffio" },
|
2330 |
-
{ name = "tqdm" },
|
2331 |
-
{ name = "typing-extensions" },
|
2332 |
-
]
|
2333 |
-
sdist = { url = "https://files.pythonhosted.org/packages/95/64/9a5279138b5ea6c2f0e5443d5d93b4510cb87fa6fe7be0c92b837087124e/openai-1.51.2.tar.gz", hash = "sha256:c6a51fac62a1ca9df85a522e462918f6bb6bc51a8897032217e453a0730123a6", size = 307755 }
|
2334 |
-
wheels = [
|
2335 |
-
{ url = "https://files.pythonhosted.org/packages/3d/49/72198d0941b3a0264b6d13033823025c01c497f1cbfd83db310392c49c0e/openai-1.51.2-py3-none-any.whl", hash = "sha256:5c5954711cba931423e471c37ff22ae0fd3892be9b083eee36459865fbbb83fa", size = 383687 },
|
2336 |
-
]
|
2337 |
-
|
2338 |
[[package]]
|
2339 |
name = "overrides"
|
2340 |
version = "7.7.0"
|
@@ -3737,42 +3625,6 @@ wheels = [
|
|
3737 |
{ url = "https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467", size = 18414 },
|
3738 |
]
|
3739 |
|
3740 |
-
[[package]]
|
3741 |
-
name = "tiktoken"
|
3742 |
-
version = "0.8.0"
|
3743 |
-
source = { registry = "https://pypi.org/simple" }
|
3744 |
-
dependencies = [
|
3745 |
-
{ name = "regex" },
|
3746 |
-
{ name = "requests" },
|
3747 |
-
]
|
3748 |
-
sdist = { url = "https://files.pythonhosted.org/packages/37/02/576ff3a6639e755c4f70997b2d315f56d6d71e0d046f4fb64cb81a3fb099/tiktoken-0.8.0.tar.gz", hash = "sha256:9ccbb2740f24542534369c5635cfd9b2b3c2490754a78ac8831d99f89f94eeb2", size = 35107 }
|
3749 |
-
wheels = [
|
3750 |
-
{ url = "https://files.pythonhosted.org/packages/c9/ba/a35fad753bbca8ba0cc1b0f3402a70256a110ced7ac332cf84ba89fc87ab/tiktoken-0.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b07e33283463089c81ef1467180e3e00ab00d46c2c4bbcef0acab5f771d6695e", size = 1039905 },
|
3751 |
-
{ url = "https://files.pythonhosted.org/packages/91/05/13dab8fd7460391c387b3e69e14bf1e51ff71fe0a202cd2933cc3ea93fb6/tiktoken-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9269348cb650726f44dd3bbb3f9110ac19a8dcc8f54949ad3ef652ca22a38e21", size = 982417 },
|
3752 |
-
{ url = "https://files.pythonhosted.org/packages/e9/98/18ec4a8351a6cf4537e40cd6e19a422c10cce1ef00a2fcb716e0a96af58b/tiktoken-0.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e13f37bc4ef2d012731e93e0fef21dc3b7aea5bb9009618de9a4026844e560", size = 1144915 },
|
3753 |
-
{ url = "https://files.pythonhosted.org/packages/2e/28/cf3633018cbcc6deb7805b700ccd6085c9a5a7f72b38974ee0bffd56d311/tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f13d13c981511331eac0d01a59b5df7c0d4060a8be1e378672822213da51e0a2", size = 1177221 },
|
3754 |
-
{ url = "https://files.pythonhosted.org/packages/57/81/8a5be305cbd39d4e83a794f9e80c7f2c84b524587b7feb27c797b2046d51/tiktoken-0.8.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6b2ddbc79a22621ce8b1166afa9f9a888a664a579350dc7c09346a3b5de837d9", size = 1237398 },
|
3755 |
-
{ url = "https://files.pythonhosted.org/packages/dc/da/8d1cc3089a83f5cf11c2e489332752981435280285231924557350523a59/tiktoken-0.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:d8c2d0e5ba6453a290b86cd65fc51fedf247e1ba170191715b049dac1f628005", size = 884215 },
|
3756 |
-
{ url = "https://files.pythonhosted.org/packages/f6/1e/ca48e7bfeeccaf76f3a501bd84db1fa28b3c22c9d1a1f41af9fb7579c5f6/tiktoken-0.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d622d8011e6d6f239297efa42a2657043aaed06c4f68833550cac9e9bc723ef1", size = 1039700 },
|
3757 |
-
{ url = "https://files.pythonhosted.org/packages/8c/f8/f0101d98d661b34534769c3818f5af631e59c36ac6d07268fbfc89e539ce/tiktoken-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2efaf6199717b4485031b4d6edb94075e4d79177a172f38dd934d911b588d54a", size = 982413 },
|
3758 |
-
{ url = "https://files.pythonhosted.org/packages/ac/3c/2b95391d9bd520a73830469f80a96e3790e6c0a5ac2444f80f20b4b31051/tiktoken-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5637e425ce1fc49cf716d88df3092048359a4b3bbb7da762840426e937ada06d", size = 1144242 },
|
3759 |
-
{ url = "https://files.pythonhosted.org/packages/01/c4/c4a4360de845217b6aa9709c15773484b50479f36bb50419c443204e5de9/tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fb0e352d1dbe15aba082883058b3cce9e48d33101bdaac1eccf66424feb5b47", size = 1176588 },
|
3760 |
-
{ url = "https://files.pythonhosted.org/packages/f8/a3/ef984e976822cd6c2227c854f74d2e60cf4cd6fbfca46251199914746f78/tiktoken-0.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:56edfefe896c8f10aba372ab5706b9e3558e78db39dd497c940b47bf228bc419", size = 1237261 },
|
3761 |
-
{ url = "https://files.pythonhosted.org/packages/1e/86/eea2309dc258fb86c7d9b10db536434fc16420feaa3b6113df18b23db7c2/tiktoken-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:326624128590def898775b722ccc327e90b073714227175ea8febbc920ac0a99", size = 884537 },
|
3762 |
-
{ url = "https://files.pythonhosted.org/packages/c1/22/34b2e136a6f4af186b6640cbfd6f93400783c9ef6cd550d9eab80628d9de/tiktoken-0.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:881839cfeae051b3628d9823b2e56b5cc93a9e2efb435f4cf15f17dc45f21586", size = 1039357 },
|
3763 |
-
{ url = "https://files.pythonhosted.org/packages/04/d2/c793cf49c20f5855fd6ce05d080c0537d7418f22c58e71f392d5e8c8dbf7/tiktoken-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fe9399bdc3f29d428f16a2f86c3c8ec20be3eac5f53693ce4980371c3245729b", size = 982616 },
|
3764 |
-
{ url = "https://files.pythonhosted.org/packages/b3/a1/79846e5ef911cd5d75c844de3fa496a10c91b4b5f550aad695c5df153d72/tiktoken-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a58deb7075d5b69237a3ff4bb51a726670419db6ea62bdcd8bd80c78497d7ab", size = 1144011 },
|
3765 |
-
{ url = "https://files.pythonhosted.org/packages/26/32/e0e3a859136e95c85a572e4806dc58bf1ddf651108ae8b97d5f3ebe1a244/tiktoken-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2908c0d043a7d03ebd80347266b0e58440bdef5564f84f4d29fb235b5df3b04", size = 1175432 },
|
3766 |
-
{ url = "https://files.pythonhosted.org/packages/c7/89/926b66e9025b97e9fbabeaa59048a736fe3c3e4530a204109571104f921c/tiktoken-0.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:294440d21a2a51e12d4238e68a5972095534fe9878be57d905c476017bff99fc", size = 1236576 },
|
3767 |
-
{ url = "https://files.pythonhosted.org/packages/45/e2/39d4aa02a52bba73b2cd21ba4533c84425ff8786cc63c511d68c8897376e/tiktoken-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:d8f3192733ac4d77977432947d563d7e1b310b96497acd3c196c9bddb36ed9db", size = 883824 },
|
3768 |
-
{ url = "https://files.pythonhosted.org/packages/e3/38/802e79ba0ee5fcbf240cd624143f57744e5d411d2e9d9ad2db70d8395986/tiktoken-0.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:02be1666096aff7da6cbd7cdaa8e7917bfed3467cd64b38b1f112e96d3b06a24", size = 1039648 },
|
3769 |
-
{ url = "https://files.pythonhosted.org/packages/b1/da/24cdbfc302c98663fbea66f5866f7fa1048405c7564ab88483aea97c3b1a/tiktoken-0.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c94ff53c5c74b535b2cbf431d907fc13c678bbd009ee633a2aca269a04389f9a", size = 982763 },
|
3770 |
-
{ url = "https://files.pythonhosted.org/packages/e4/f0/0ecf79a279dfa41fc97d00adccf976ecc2556d3c08ef3e25e45eb31f665b/tiktoken-0.8.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b231f5e8982c245ee3065cd84a4712d64692348bc609d84467c57b4b72dcbc5", size = 1144417 },
|
3771 |
-
{ url = "https://files.pythonhosted.org/packages/ab/d3/155d2d4514f3471a25dc1d6d20549ef254e2aa9bb5b1060809b1d3b03d3a/tiktoken-0.8.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4177faa809bd55f699e88c96d9bb4635d22e3f59d635ba6fd9ffedf7150b9953", size = 1175108 },
|
3772 |
-
{ url = "https://files.pythonhosted.org/packages/19/eb/5989e16821ee8300ef8ee13c16effc20dfc26c777d05fbb6825e3c037b81/tiktoken-0.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5376b6f8dc4753cd81ead935c5f518fa0fbe7e133d9e25f648d8c4dabdd4bad7", size = 1236520 },
|
3773 |
-
{ url = "https://files.pythonhosted.org/packages/40/59/14b20465f1d1cb89cfbc96ec27e5617b2d41c79da12b5e04e96d689be2a7/tiktoken-0.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:18228d624807d66c87acd8f25fc135665617cab220671eb65b50f5d70fa51f69", size = 883849 },
|
3774 |
-
]
|
3775 |
-
|
3776 |
[[package]]
|
3777 |
name = "tinycss2"
|
3778 |
version = "1.3.0"
|
@@ -4146,35 +3998,22 @@ wheels = [
|
|
4146 |
|
4147 |
[[package]]
|
4148 |
name = "weave"
|
4149 |
-
version = "0.51.
|
4150 |
source = { registry = "https://pypi.org/simple" }
|
4151 |
dependencies = [
|
4152 |
-
{ name = "aiofiles" },
|
4153 |
-
{ name = "aiohttp" },
|
4154 |
-
{ name = "aioprocessing" },
|
4155 |
-
{ name = "analytics-python" },
|
4156 |
{ name = "emoji" },
|
4157 |
-
{ name = "gql", extra = ["requests"] },
|
4158 |
-
{ name = "graphql-core" },
|
4159 |
-
{ name = "janus" },
|
4160 |
{ name = "numpy" },
|
4161 |
-
{ name = "openai" },
|
4162 |
{ name = "packaging" },
|
4163 |
-
{ name = "pyarrow" },
|
4164 |
{ name = "pydantic" },
|
4165 |
-
{ name = "python-dateutil" },
|
4166 |
-
{ name = "python-json-logger" },
|
4167 |
{ name = "rich" },
|
4168 |
{ name = "tenacity" },
|
4169 |
-
{ name = "tiktoken" },
|
4170 |
-
{ name = "typing-extensions" },
|
4171 |
{ name = "uuid-utils" },
|
4172 |
{ name = "wandb" },
|
4173 |
-
{ name = "werkzeug" },
|
4174 |
]
|
4175 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
4176 |
wheels = [
|
4177 |
-
{ url = "https://files.pythonhosted.org/packages/
|
4178 |
]
|
4179 |
|
4180 |
[[package]]
|
@@ -4204,18 +4043,6 @@ wheels = [
|
|
4204 |
{ url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 },
|
4205 |
]
|
4206 |
|
4207 |
-
[[package]]
|
4208 |
-
name = "werkzeug"
|
4209 |
-
version = "3.0.4"
|
4210 |
-
source = { registry = "https://pypi.org/simple" }
|
4211 |
-
dependencies = [
|
4212 |
-
{ name = "markupsafe" },
|
4213 |
-
]
|
4214 |
-
sdist = { url = "https://files.pythonhosted.org/packages/0f/e2/6dbcaab07560909ff8f654d3a2e5a60552d937c909455211b1b36d7101dc/werkzeug-3.0.4.tar.gz", hash = "sha256:34f2371506b250df4d4f84bfe7b0921e4762525762bbd936614909fe25cd7306", size = 803966 }
|
4215 |
-
wheels = [
|
4216 |
-
{ url = "https://files.pythonhosted.org/packages/4b/84/997bbf7c2bf2dc3f09565c6d0b4959fefe5355c18c4096cfd26d83e0785b/werkzeug-3.0.4-py3-none-any.whl", hash = "sha256:02c9eb92b7d6c06f31a782811505d2157837cea66aaede3e217c7c27c039476c", size = 227554 },
|
4217 |
-
]
|
4218 |
-
|
4219 |
[[package]]
|
4220 |
name = "widgetsnbextension"
|
4221 |
version = "4.0.13"
|
|
|
124 |
{ url = "https://files.pythonhosted.org/packages/71/c0/6d33ac32bfbf9dd91a16c26bc37dd4763084d7f991dc848655d34e31291a/aiohttp-3.10.9-cp313-cp313-win_amd64.whl", hash = "sha256:a35ed3d03910785f7d9d6f5381f0c24002b2b888b298e6f941b2fc94c5055fcd", size = 377205 },
|
125 |
]
|
126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
[[package]]
|
128 |
name = "aiosignal"
|
129 |
version = "1.3.1"
|
|
|
136 |
{ url = "https://files.pythonhosted.org/packages/76/ac/a7305707cb852b7e16ff80eaf5692309bde30e2b1100a1fcacdc8f731d97/aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17", size = 7617 },
|
137 |
]
|
138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
[[package]]
|
140 |
name = "annotated-types"
|
141 |
version = "0.7.0"
|
|
|
627 |
{ url = "https://files.pythonhosted.org/packages/f5/3a/74a29b11cf2cdfcd6ba89c0cecd70b37cd1ba7b77978ce611eb7a146a832/dill-0.3.7-py3-none-any.whl", hash = "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e", size = 115254 },
|
628 |
]
|
629 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
630 |
[[package]]
|
631 |
name = "docker-pycreds"
|
632 |
version = "0.4.0"
|
|
|
855 |
]
|
856 |
|
857 |
[package.optional-dependencies]
|
858 |
+
aiohttp = [
|
859 |
+
{ name = "aiohttp" },
|
860 |
+
]
|
861 |
requests = [
|
862 |
{ name = "requests" },
|
863 |
{ name = "requests-toolbelt" },
|
|
|
1049 |
{ url = "https://files.pythonhosted.org/packages/d1/b3/8def84f539e7d2289a02f0524b944b15d7c75dab7628bedf1c4f0992029c/isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6", size = 92310 },
|
1050 |
]
|
1051 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1052 |
[[package]]
|
1053 |
name = "jedi"
|
1054 |
version = "0.19.1"
|
|
|
1073 |
{ url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 },
|
1074 |
]
|
1075 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1076 |
[[package]]
|
1077 |
name = "joblib"
|
1078 |
version = "1.4.2"
|
|
|
1503 |
{ name = "mkdocs-minify-plugin" },
|
1504 |
{ name = "mkdocstrings" },
|
1505 |
{ name = "mkdocstrings-python" },
|
1506 |
+
{ name = "pdf2image" },
|
1507 |
{ name = "pip" },
|
1508 |
{ name = "pymupdf4llm" },
|
1509 |
{ name = "pypdf2" },
|
|
|
1519 |
core = [
|
1520 |
{ name = "byaldi" },
|
1521 |
{ name = "firerequests" },
|
1522 |
+
{ name = "pdf2image" },
|
1523 |
{ name = "pymupdf4llm" },
|
1524 |
{ name = "python-dotenv" },
|
1525 |
{ name = "torch" },
|
|
|
1569 |
{ name = "mkdocstrings", marker = "extra == 'docs'", specifier = ">=0.26.1" },
|
1570 |
{ name = "mkdocstrings-python", specifier = ">=1.11.1" },
|
1571 |
{ name = "mkdocstrings-python", marker = "extra == 'docs'", specifier = ">=1.11.1" },
|
1572 |
+
{ name = "pdf2image", specifier = ">=1.17.0" },
|
1573 |
+
{ name = "pdf2image", marker = "extra == 'core'", specifier = ">=1.17.0" },
|
1574 |
{ name = "pip", specifier = ">=24.2" },
|
1575 |
{ name = "pymupdf4llm", specifier = ">=0.0.17" },
|
1576 |
{ name = "pymupdf4llm", marker = "extra == 'core'", specifier = ">=0.0.17" },
|
|
|
1585 |
{ name = "torch", specifier = ">=2.4.1" },
|
1586 |
{ name = "torch", marker = "extra == 'core'", specifier = ">=2.4.1" },
|
1587 |
{ name = "uv", specifier = ">=0.4.20" },
|
1588 |
+
{ name = "weave", specifier = ">=0.51.14" },
|
1589 |
+
{ name = "weave", marker = "extra == 'core'", specifier = ">=0.51.14" },
|
1590 |
]
|
1591 |
|
1592 |
[[package]]
|
|
|
2223 |
{ url = "https://files.pythonhosted.org/packages/da/d3/8057f0587683ed2fcd4dbfbdfdfa807b9160b809976099d36b8f60d08f03/nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5", size = 99138 },
|
2224 |
]
|
2225 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2226 |
[[package]]
|
2227 |
name = "overrides"
|
2228 |
version = "7.7.0"
|
|
|
3625 |
{ url = "https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467", size = 18414 },
|
3626 |
]
|
3627 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3628 |
[[package]]
|
3629 |
name = "tinycss2"
|
3630 |
version = "1.3.0"
|
|
|
3998 |
|
3999 |
[[package]]
|
4000 |
name = "weave"
|
4001 |
+
version = "0.51.14"
|
4002 |
source = { registry = "https://pypi.org/simple" }
|
4003 |
dependencies = [
|
|
|
|
|
|
|
|
|
4004 |
{ name = "emoji" },
|
4005 |
+
{ name = "gql", extra = ["aiohttp", "requests"] },
|
|
|
|
|
4006 |
{ name = "numpy" },
|
|
|
4007 |
{ name = "packaging" },
|
|
|
4008 |
{ name = "pydantic" },
|
|
|
|
|
4009 |
{ name = "rich" },
|
4010 |
{ name = "tenacity" },
|
|
|
|
|
4011 |
{ name = "uuid-utils" },
|
4012 |
{ name = "wandb" },
|
|
|
4013 |
]
|
4014 |
+
sdist = { url = "https://files.pythonhosted.org/packages/e5/78/4b6b415d73b51e48defefc80136c0e3673db2d518167a0c54666447ba067/weave-0.51.14.tar.gz", hash = "sha256:a14c6d1877b38e4ad780f9fb8a40d19514696d8f450decce7b18529907d800d9", size = 200324 }
|
4015 |
wheels = [
|
4016 |
+
{ url = "https://files.pythonhosted.org/packages/f0/57/53a022e364d5b62375966f7302c86b08fcb0ce90f99653f7d49e3fa7269c/weave-0.51.14-py3-none-any.whl", hash = "sha256:9328a787e393fd7e4b8807680be2c590b0ef9c60d9ef04268c19630791af1712", size = 251898 },
|
4017 |
]
|
4018 |
|
4019 |
[[package]]
|
|
|
4043 |
{ url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 },
|
4044 |
]
|
4045 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4046 |
[[package]]
|
4047 |
name = "widgetsnbextension"
|
4048 |
version = "4.0.13"
|