pdf_to_text / app.py
Rahul8827's picture
Upload 2 files
72e4a80
raw
history blame
576 Bytes
import fitz # PyMuPDF
import pytesseract
import gradio as gr
from PIL import Image
def pdf_to_text(pdf_file):
doc = fitz.open(pdf_file)
text = ""
for page in doc:
pix = page.get_pixmap()
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
text += pytesseract.image_to_string(img)
doc.close()
return text
def pdf_to_text_interface(pdf_file):
text = pdf_to_text(pdf_file)
return text
iface = gr.Interface(fn=pdf_to_text_interface, inputs="file", outputs="text", title="PDF to Text Converter")
iface.launch()