pdf-ocr2

Running

GAS17 commited on Dec 21, 2024

Commit

70e6398

verified ·

1 Parent(s): 8bf3eb1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,7 +8,6 @@ logging.basicConfig(
     format="%(asctime)s - %(levelname)s - %(message)s",
 )
 import gradio as gr
 import nltk
 import torch
@@ -23,10 +22,8 @@ nltk.download("stopwords")  # TODO=find where this requirement originates from
 def load_uploaded_file(file_obj, temp_dir: Path = None):
     """
     load_uploaded_file - process an uploaded file
     Args:
         file_obj (POTENTIALLY list): Gradio file object inside a list
     Returns:
         str, the uploaded file contents
     """
@@ -62,11 +59,9 @@ def convert_PDF(
 ):
     """
     convert_PDF - convert a PDF file to text
     Args:
         pdf_bytes_obj (bytes): PDF file contents
         language (str, optional): Language to use for OCR. Defaults to "en".
     Returns:
         str, the PDF file contents as text
     """
@@ -95,7 +90,6 @@ def convert_PDF(
     converted_txt = conversion_stats["converted_text"]
     num_pages = conversion_stats["num_pages"]
     was_truncated = conversion_stats["truncated"]
-    # if alt_lang: # TODO: fix this
     rt = round((time.perf_counter() - st) / 60, 2)
     print(f"Runtime: {rt} minutes")
@@ -174,10 +168,13 @@ if __name__ == "__main__":
                 interactive=False,
             )
         convert_button.click(
             fn=convert_PDF,
             inputs=[uploaded_file],
             outputs=[OCR_text, out_placeholder, text_file],
         )
-    demo.launch(enable_queue=True)

     format="%(asctime)s - %(levelname)s - %(message)s",
 )
 import gradio as gr
 import nltk
 import torch
 def load_uploaded_file(file_obj, temp_dir: Path = None):
     """
     load_uploaded_file - process an uploaded file
     Args:
         file_obj (POTENTIALLY list): Gradio file object inside a list
     Returns:
         str, the uploaded file contents
     """
 ):
     """
     convert_PDF - convert a PDF file to text
     Args:
         pdf_bytes_obj (bytes): PDF file contents
         language (str, optional): Language to use for OCR. Defaults to "en".
     Returns:
         str, the PDF file contents as text
     """
     converted_txt = conversion_stats["converted_text"]
     num_pages = conversion_stats["num_pages"]
     was_truncated = conversion_stats["truncated"]
     rt = round((time.perf_counter() - st) / 60, 2)
     print(f"Runtime: {rt} minutes")
                 interactive=False,
             )
+        # Set api_name to expose the function as an API route
         convert_button.click(
             fn=convert_PDF,
             inputs=[uploaded_file],
             outputs=[OCR_text, out_placeholder, text_file],
+            api_name="convert_pdf",  # Exposing this function as an API route
         )
+    # Expose the API page with show_api=True
+    demo.launch(enable_queue=True, show_api=True)