BhagatSurya commited on
Commit
5abf32d
1 Parent(s): e1bd857

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -24,6 +24,8 @@ def image_to_latex(image):
24
  image.save(image_path)
25
  result = subprocess.run(["pix2tex", image_path], capture_output=True, text=True)
26
  return result.stdout
 
 
27
  def pdf_to_text(file):
28
  with open(file.name, 'rb') as f:
29
  reader = PdfReader(f)
@@ -36,13 +38,15 @@ def pdf_to_text(file):
36
  page_text = image_to_latex(image)
37
  page_text = clean_text(page_text)
38
  if len(page_text.split()) > 5:
39
- page_text = "## Metadata: Page Number " + str(i+1) + "\n" + page_text
 
40
  full_text += page_text + "\n\n"
41
  base_name = os.path.splitext(os.path.basename(file.name))[0]
42
  output_file_name = base_name + ".txt"
43
  with open(output_file_name, 'w') as f:
44
  f.write(full_text)
45
- return output_file_name
 
46
 
47
 
48
  iface = gr.Interface(fn=pdf_to_text,
 
24
  image.save(image_path)
25
  result = subprocess.run(["pix2tex", image_path], capture_output=True, text=True)
26
  return result.stdout
27
+
28
+
29
  def pdf_to_text(file):
30
  with open(file.name, 'rb') as f:
31
  reader = PdfReader(f)
 
38
  page_text = image_to_latex(image)
39
  page_text = clean_text(page_text)
40
  if len(page_text.split()) > 5:
41
+ page_number = i + 1
42
+ page_text = "## Metadata: Page Number " + str(page_number) + "\n" + page_text
43
  full_text += page_text + "\n\n"
44
  base_name = os.path.splitext(os.path.basename(file.name))[0]
45
  output_file_name = base_name + ".txt"
46
  with open(output_file_name, 'w') as f:
47
  f.write(full_text)
48
+ return output_file_name, page_number
49
+
50
 
51
 
52
  iface = gr.Interface(fn=pdf_to_text,