Spaces:
Runtime error
Runtime error
bhagatsuryainatom
commited on
Commit
•
76a5996
1
Parent(s):
e1bd857
Update app.py
Browse files
app.py
CHANGED
@@ -24,6 +24,9 @@ def image_to_latex(image):
|
|
24 |
image.save(image_path)
|
25 |
result = subprocess.run(["pix2tex", image_path], capture_output=True, text=True)
|
26 |
return result.stdout
|
|
|
|
|
|
|
27 |
def pdf_to_text(file):
|
28 |
with open(file.name, 'rb') as f:
|
29 |
reader = PdfReader(f)
|
@@ -36,13 +39,15 @@ def pdf_to_text(file):
|
|
36 |
page_text = image_to_latex(image)
|
37 |
page_text = clean_text(page_text)
|
38 |
if len(page_text.split()) > 5:
|
39 |
-
|
|
|
40 |
full_text += page_text + "\n\n"
|
41 |
base_name = os.path.splitext(os.path.basename(file.name))[0]
|
42 |
output_file_name = base_name + ".txt"
|
43 |
with open(output_file_name, 'w') as f:
|
44 |
f.write(full_text)
|
45 |
-
return output_file_name
|
|
|
46 |
|
47 |
|
48 |
iface = gr.Interface(fn=pdf_to_text,
|
|
|
24 |
image.save(image_path)
|
25 |
result = subprocess.run(["pix2tex", image_path], capture_output=True, text=True)
|
26 |
return result.stdout
|
27 |
+
|
28 |
+
|
29 |
+
|
30 |
def pdf_to_text(file):
|
31 |
with open(file.name, 'rb') as f:
|
32 |
reader = PdfReader(f)
|
|
|
39 |
page_text = image_to_latex(image)
|
40 |
page_text = clean_text(page_text)
|
41 |
if len(page_text.split()) > 5:
|
42 |
+
page_number = i + 1
|
43 |
+
page_text = "## Metadata: Page Number " + str(page_number) + "\n" + page_text
|
44 |
full_text += page_text + "\n\n"
|
45 |
base_name = os.path.splitext(os.path.basename(file.name))[0]
|
46 |
output_file_name = base_name + ".txt"
|
47 |
with open(output_file_name, 'w') as f:
|
48 |
f.write(full_text)
|
49 |
+
return output_file_name, page_number
|
50 |
+
|
51 |
|
52 |
|
53 |
iface = gr.Interface(fn=pdf_to_text,
|