pacman2223 commited on
Commit
24d5b1d
1 Parent(s): a1fcd2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -61,7 +61,7 @@ import re
61
  import gradio as gr
62
  import torch
63
  from transformers import DonutProcessor, VisionEncoderDecoderModel
64
- from pdf2image import convert_from_bytes
65
  import io
66
 
67
  processor = DonutProcessor.from_pretrained("pacman2223/univ-docu-model-v3")
@@ -72,14 +72,17 @@ model.to(device)
72
  def pdf_to_images(pdf_file):
73
  if pdf_file is None:
74
  return None
75
- images = convert_from_bytes(pdf_file.read())
76
- return images
77
 
78
  def process_document(pdf_file, page_number, question):
79
  if pdf_file is None:
80
  return "Please upload a PDF file."
81
 
82
  images = pdf_to_images(pdf_file)
 
 
 
83
  if page_number < 1 or page_number > len(images):
84
  return f"Invalid page number. The PDF has {len(images)} pages."
85
 
@@ -121,6 +124,8 @@ def update_page_slider(pdf_file):
121
  if pdf_file is None:
122
  return gr.Slider(minimum=1, maximum=1, value=1, step=1, label="Page Number")
123
  images = pdf_to_images(pdf_file)
 
 
124
  return gr.Slider(minimum=1, maximum=len(images), value=1, step=1, label="Page Number")
125
 
126
  with gr.Blocks() as demo:
 
61
  import gradio as gr
62
  import torch
63
  from transformers import DonutProcessor, VisionEncoderDecoderModel
64
+ from pdf2image import convert_from_path
65
  import io
66
 
67
  processor = DonutProcessor.from_pretrained("pacman2223/univ-docu-model-v3")
 
72
  def pdf_to_images(pdf_file):
73
  if pdf_file is None:
74
  return None
75
+ pdf_path = pdf_file.name # Get the file path
76
+ return convert_from_path(pdf_path)
77
 
78
  def process_document(pdf_file, page_number, question):
79
  if pdf_file is None:
80
  return "Please upload a PDF file."
81
 
82
  images = pdf_to_images(pdf_file)
83
+ if images is None:
84
+ return "Failed to process the PDF file."
85
+
86
  if page_number < 1 or page_number > len(images):
87
  return f"Invalid page number. The PDF has {len(images)} pages."
88
 
 
124
  if pdf_file is None:
125
  return gr.Slider(minimum=1, maximum=1, value=1, step=1, label="Page Number")
126
  images = pdf_to_images(pdf_file)
127
+ if images is None:
128
+ return gr.Slider(minimum=1, maximum=1, value=1, step=1, label="Page Number")
129
  return gr.Slider(minimum=1, maximum=len(images), value=1, step=1, label="Page Number")
130
 
131
  with gr.Blocks() as demo: