pdf-to-table / src /pdfParser.py
regraded01's picture
feat: store model_id as a config variable
5d4bf7d
raw
history blame contribute delete
374 Bytes
import fitz
def extract_text_from_pdf(pdf_file):
try:
document = fitz.open(stream=pdf_file.read(), filetype="pdf")
text = ""
for page_num in range(len(document)):
page = document.load_page(page_num)
text += page.get_text()
return text
except Exception as e:
return f"Failed to load in text: {str(e)}"