Spaces:
Runtime error
Runtime error
import fitz # PyMuPDF | |
import pytesseract | |
import gradio as gr | |
from PIL import Image | |
def pdf_to_text(pdf_file): | |
doc = fitz.open(pdf_file) | |
text = "" | |
for page in doc: | |
pix = page.get_pixmap() | |
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
text += pytesseract.image_to_string(img) | |
doc.close() | |
return text | |
def pdf_to_text_interface(pdf_file): | |
text = pdf_to_text(pdf_file) | |
return text | |
iface = gr.Interface(fn=pdf_to_text_interface, inputs="file", outputs="text", title="PDF to Text Converter") | |
iface.launch() |