Spaces:
Sleeping
Sleeping
import PyPDF2 | |
def extract_text_from_pdf(pdf_path): | |
text = [] | |
with open(pdf_path, 'rb') as file: | |
pdf_reader = PyPDF2.PdfReader(file) | |
for page in pdf_reader.pages: | |
text.append(page.extract_text()) | |
return text | |
# Utilisation de la fonction | |
pdf_path = 'data/07-VF2_UDM_Oneframe_A4-2023.pdf' | |
pdf_text = extract_text_from_pdf(pdf_path) | |
for page_num, page_text in enumerate(pdf_text, start=1): | |
print(f"Page {page_num}:\n{page_text}\n{'='*100}\n") | |