jackkuo commited on
Commit
606ffde
·
verified ·
1 Parent(s): e34b25a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -5
app.py CHANGED
@@ -83,14 +83,29 @@ def predict(input_text, pdf_file):
83
  return extract_result or "Too many users. Please wait a moment!"
84
 
85
 
86
- def view_pdf(pdf_file):
87
  if pdf_file is None:
88
  return "Please upload a PDF file to view."
89
 
90
- with open(pdf_file.name, 'rb') as f:
91
- pdf_data = f.read()
92
- b64_data = base64.b64encode(pdf_data).decode('utf-8')
93
- return f"<embed src='data:application/pdf;base64,{b64_data}' type='application/pdf' width='100%' height='700px' />"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
 
96
  en_1 = """Could you please help me extract the information of 'title'/'journal'/'year'/'author'/'institution'/'email' from the previous content in a markdown table format?
 
83
  return extract_result or "Too many users. Please wait a moment!"
84
 
85
 
86
+ def view_pdf(pdf_file, max_pages=3):
87
  if pdf_file is None:
88
  return "Please upload a PDF file to view."
89
 
90
+ try:
91
+ # Open the PDF file
92
+ doc = fitz.open(pdf_file.name)
93
+
94
+ # Only read up to `max_pages` pages to reduce size for large PDFs
95
+ preview_pdf = fitz.open() # Create an empty PDF for the preview
96
+ for page_num in range(min(max_pages, doc.page_count)):
97
+ preview_pdf.insert_pdf(doc, from_page=page_num, to_page=page_num)
98
+
99
+ # Save the preview as a temporary in-memory file
100
+ pdf_data = preview_pdf.tobytes()
101
+
102
+ # Encode as base64 for embedding in HTML
103
+ b64_data = base64.b64encode(pdf_data).decode('utf-8')
104
+ return f"<embed src='data:application/pdf;base64,{b64_data}' type='application/pdf' width='100%' height='700px' />"
105
+
106
+ except Exception as e:
107
+ print(f"Error displaying PDF: {e}")
108
+ return "Error displaying PDF. Please try re-uploading."
109
 
110
 
111
  en_1 = """Could you please help me extract the information of 'title'/'journal'/'year'/'author'/'institution'/'email' from the previous content in a markdown table format?