Commit
·
6d2b0a3
1
Parent(s):
64cd544
improve description
Browse files
app.py
CHANGED
@@ -56,14 +56,21 @@ def process_pdfs(
|
|
56 |
progress=gr.Progress(),
|
57 |
):
|
58 |
if not pdf_files:
|
59 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
if oauth_token is None:
|
62 |
-
gr.Info("Please log in to upload to Hugging Face.")
|
63 |
return (
|
64 |
None,
|
65 |
None,
|
66 |
-
|
|
|
|
|
67 |
)
|
68 |
|
69 |
try:
|
@@ -71,6 +78,7 @@ def process_pdfs(
|
|
71 |
images_dir = os.path.join(temp_dir, "images")
|
72 |
os.makedirs(images_dir)
|
73 |
|
|
|
74 |
images, message = pdf_to_images(pdf_files, sample_size, images_dir)
|
75 |
|
76 |
# Create a zip file of the images
|
@@ -106,7 +114,10 @@ def process_pdfs(
|
|
106 |
|
107 |
# Define the Gradio interface
|
108 |
with gr.Blocks() as demo:
|
109 |
-
gr.
|
|
|
|
|
|
|
110 |
gr.Markdown(
|
111 |
"Upload PDF(s), convert pages to images, and optionally upload them to a Hugging Face repo. If a sample size is specified, random pages will be selected."
|
112 |
)
|
@@ -121,17 +132,20 @@ with gr.Blocks() as demo:
|
|
121 |
with gr.Row():
|
122 |
sample_size = gr.Number(
|
123 |
value=None,
|
124 |
-
label="
|
|
|
125 |
)
|
126 |
hf_repo = gr.Textbox(
|
127 |
-
label="Hugging Face Repo",
|
|
|
|
|
128 |
)
|
129 |
-
|
130 |
-
|
131 |
status_text = gr.Markdown(label="Status")
|
132 |
download_button = gr.File(label="Download Converted Images")
|
133 |
|
134 |
-
submit_button = gr.Button("
|
135 |
submit_button.click(
|
136 |
process_pdfs,
|
137 |
inputs=[pdf_files, sample_size, hf_repo],
|
|
|
56 |
progress=gr.Progress(),
|
57 |
):
|
58 |
if not pdf_files:
|
59 |
+
return (
|
60 |
+
None,
|
61 |
+
None,
|
62 |
+
gr.Markdown(
|
63 |
+
"⚠️ No PDF files uploaded. Please upload at least one PDF file."
|
64 |
+
),
|
65 |
+
)
|
66 |
|
67 |
if oauth_token is None:
|
|
|
68 |
return (
|
69 |
None,
|
70 |
None,
|
71 |
+
gr.Markdown(
|
72 |
+
"⚠️ Not logged in to Hugging Face. Please log in to upload to a Hugging Face dataset."
|
73 |
+
),
|
74 |
)
|
75 |
|
76 |
try:
|
|
|
78 |
images_dir = os.path.join(temp_dir, "images")
|
79 |
os.makedirs(images_dir)
|
80 |
|
81 |
+
progress(0, desc="Starting PDF processing")
|
82 |
images, message = pdf_to_images(pdf_files, sample_size, images_dir)
|
83 |
|
84 |
# Create a zip file of the images
|
|
|
114 |
|
115 |
# Define the Gradio interface
|
116 |
with gr.Blocks() as demo:
|
117 |
+
gr.HTML(
|
118 |
+
"""<h1 style='text-align: center;'> PDFs to Page Images Converter</h1>
|
119 |
+
<center><i> 📁 Convert PDFs to an image dataset 📁 </i></center>"""
|
120 |
+
)
|
121 |
gr.Markdown(
|
122 |
"Upload PDF(s), convert pages to images, and optionally upload them to a Hugging Face repo. If a sample size is specified, random pages will be selected."
|
123 |
)
|
|
|
132 |
with gr.Row():
|
133 |
sample_size = gr.Number(
|
134 |
value=None,
|
135 |
+
label="Pages per PDF (0 for all pages)",
|
136 |
+
info="Specify how many pages to convert from each PDF. Use 0 to convert all pages.",
|
137 |
)
|
138 |
hf_repo = gr.Textbox(
|
139 |
+
label="Hugging Face Repo",
|
140 |
+
placeholder="username/repo-name",
|
141 |
+
info="Enter the Hugging Face repository name in the format 'username/repo-name'",
|
142 |
)
|
143 |
+
with gr.Accordion("View converted images", open=False):
|
144 |
+
output_gallery = gr.Gallery(label="Converted Images")
|
145 |
status_text = gr.Markdown(label="Status")
|
146 |
download_button = gr.File(label="Download Converted Images")
|
147 |
|
148 |
+
submit_button = gr.Button("Convert PDFs to page images")
|
149 |
submit_button.click(
|
150 |
process_pdfs,
|
151 |
inputs=[pdf_files, sample_size, hf_repo],
|