Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -83,14 +83,29 @@ def predict(input_text, pdf_file):
|
|
83 |
return extract_result or "Too many users. Please wait a moment!"
|
84 |
|
85 |
|
86 |
-
def view_pdf(pdf_file):
|
87 |
if pdf_file is None:
|
88 |
return "Please upload a PDF file to view."
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
|
96 |
en_1 = """Could you please help me extract the information of 'title'/'journal'/'year'/'author'/'institution'/'email' from the previous content in a markdown table format?
|
|
|
83 |
return extract_result or "Too many users. Please wait a moment!"
|
84 |
|
85 |
|
86 |
+
def view_pdf(pdf_file, max_pages=3):
|
87 |
if pdf_file is None:
|
88 |
return "Please upload a PDF file to view."
|
89 |
|
90 |
+
try:
|
91 |
+
# Open the PDF file
|
92 |
+
doc = fitz.open(pdf_file.name)
|
93 |
+
|
94 |
+
# Only read up to `max_pages` pages to reduce size for large PDFs
|
95 |
+
preview_pdf = fitz.open() # Create an empty PDF for the preview
|
96 |
+
for page_num in range(min(max_pages, doc.page_count)):
|
97 |
+
preview_pdf.insert_pdf(doc, from_page=page_num, to_page=page_num)
|
98 |
+
|
99 |
+
# Save the preview as a temporary in-memory file
|
100 |
+
pdf_data = preview_pdf.tobytes()
|
101 |
+
|
102 |
+
# Encode as base64 for embedding in HTML
|
103 |
+
b64_data = base64.b64encode(pdf_data).decode('utf-8')
|
104 |
+
return f"<embed src='data:application/pdf;base64,{b64_data}' type='application/pdf' width='100%' height='700px' />"
|
105 |
+
|
106 |
+
except Exception as e:
|
107 |
+
print(f"Error displaying PDF: {e}")
|
108 |
+
return "Error displaying PDF. Please try re-uploading."
|
109 |
|
110 |
|
111 |
en_1 = """Could you please help me extract the information of 'title'/'journal'/'year'/'author'/'institution'/'email' from the previous content in a markdown table format?
|