Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -39,8 +39,9 @@ def pdf_to_text(file):
39
  image_list = page.get_images(full=True)
40
  for img in image_list:
41
  xref, name, ext, color_space, width, height, bpc, image_data, image_mask, smask_data = img
42
- # Decode image_data from base64 before opening it
43
- image_data = base64.b64decode(image_data)
 
44
  image = Image.open(io.BytesIO(image_data))
45
  latex_code = image_to_latex(image)
46
  page_text += "\n" + latex_code # Add LaTeX code to page text
@@ -56,12 +57,13 @@ def pdf_to_text(file):
56
  with open(output_file_name, 'w') as f:
57
  f.write(full_text)
58
 
59
- return output_file_name, page_number
60
 
61
 
62
  iface = gr.Interface(fn=pdf_to_text,
63
  inputs=gr.inputs.File(label="Your PDF"),
64
- outputs=gr.outputs.File(label="Download TXT"),
65
  title="PDF to TXT",
66
  description="Convert your PDF files to clean text")
67
  iface.launch()
 
 
39
  image_list = page.get_images(full=True)
40
  for img in image_list:
41
  xref, name, ext, color_space, width, height, bpc, image_data, image_mask, smask_data = img
42
+ # Ensure image_data is a bytes object before opening it
43
+ if isinstance(image_data, str):
44
+ image_data = image_data.encode() # Convert string to bytes if necessary
45
  image = Image.open(io.BytesIO(image_data))
46
  latex_code = image_to_latex(image)
47
  page_text += "\n" + latex_code # Add LaTeX code to page text
 
57
  with open(output_file_name, 'w') as f:
58
  f.write(full_text)
59
 
60
+ return f"{output_file_name},{page_number}"
61
 
62
 
63
  iface = gr.Interface(fn=pdf_to_text,
64
  inputs=gr.inputs.File(label="Your PDF"),
65
+ outputs="text",
66
  title="PDF to TXT",
67
  description="Convert your PDF files to clean text")
68
  iface.launch()
69
+