Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,6 @@ logging.basicConfig(
|
|
8 |
format="%(asctime)s - %(levelname)s - %(message)s",
|
9 |
)
|
10 |
|
11 |
-
|
12 |
import gradio as gr
|
13 |
import nltk
|
14 |
import torch
|
@@ -23,10 +22,8 @@ nltk.download("stopwords") # TODO=find where this requirement originates from
|
|
23 |
def load_uploaded_file(file_obj, temp_dir: Path = None):
|
24 |
"""
|
25 |
load_uploaded_file - process an uploaded file
|
26 |
-
|
27 |
Args:
|
28 |
file_obj (POTENTIALLY list): Gradio file object inside a list
|
29 |
-
|
30 |
Returns:
|
31 |
str, the uploaded file contents
|
32 |
"""
|
@@ -62,11 +59,9 @@ def convert_PDF(
|
|
62 |
):
|
63 |
"""
|
64 |
convert_PDF - convert a PDF file to text
|
65 |
-
|
66 |
Args:
|
67 |
pdf_bytes_obj (bytes): PDF file contents
|
68 |
language (str, optional): Language to use for OCR. Defaults to "en".
|
69 |
-
|
70 |
Returns:
|
71 |
str, the PDF file contents as text
|
72 |
"""
|
@@ -95,7 +90,6 @@ def convert_PDF(
|
|
95 |
converted_txt = conversion_stats["converted_text"]
|
96 |
num_pages = conversion_stats["num_pages"]
|
97 |
was_truncated = conversion_stats["truncated"]
|
98 |
-
# if alt_lang: # TODO: fix this
|
99 |
|
100 |
rt = round((time.perf_counter() - st) / 60, 2)
|
101 |
print(f"Runtime: {rt} minutes")
|
@@ -174,10 +168,13 @@ if __name__ == "__main__":
|
|
174 |
interactive=False,
|
175 |
)
|
176 |
|
|
|
177 |
convert_button.click(
|
178 |
fn=convert_PDF,
|
179 |
inputs=[uploaded_file],
|
180 |
outputs=[OCR_text, out_placeholder, text_file],
|
|
|
181 |
)
|
182 |
|
183 |
-
|
|
|
|
8 |
format="%(asctime)s - %(levelname)s - %(message)s",
|
9 |
)
|
10 |
|
|
|
11 |
import gradio as gr
|
12 |
import nltk
|
13 |
import torch
|
|
|
22 |
def load_uploaded_file(file_obj, temp_dir: Path = None):
|
23 |
"""
|
24 |
load_uploaded_file - process an uploaded file
|
|
|
25 |
Args:
|
26 |
file_obj (POTENTIALLY list): Gradio file object inside a list
|
|
|
27 |
Returns:
|
28 |
str, the uploaded file contents
|
29 |
"""
|
|
|
59 |
):
|
60 |
"""
|
61 |
convert_PDF - convert a PDF file to text
|
|
|
62 |
Args:
|
63 |
pdf_bytes_obj (bytes): PDF file contents
|
64 |
language (str, optional): Language to use for OCR. Defaults to "en".
|
|
|
65 |
Returns:
|
66 |
str, the PDF file contents as text
|
67 |
"""
|
|
|
90 |
converted_txt = conversion_stats["converted_text"]
|
91 |
num_pages = conversion_stats["num_pages"]
|
92 |
was_truncated = conversion_stats["truncated"]
|
|
|
93 |
|
94 |
rt = round((time.perf_counter() - st) / 60, 2)
|
95 |
print(f"Runtime: {rt} minutes")
|
|
|
168 |
interactive=False,
|
169 |
)
|
170 |
|
171 |
+
# Set api_name to expose the function as an API route
|
172 |
convert_button.click(
|
173 |
fn=convert_PDF,
|
174 |
inputs=[uploaded_file],
|
175 |
outputs=[OCR_text, out_placeholder, text_file],
|
176 |
+
api_name="convert_pdf", # Exposing this function as an API route
|
177 |
)
|
178 |
|
179 |
+
# Expose the API page with show_api=True
|
180 |
+
demo.launch(enable_queue=True, show_api=True)
|