|
import os |
|
from docx import Document |
|
import streamlit as st |
|
from utils import create_or_empty_dir, convert_pdf_to_images, create_docx_with_text |
|
|
|
extracted_images_dir = "extracted_images" |
|
|
|
current_dir = os.path.dirname(os.path.abspath(__file__)) |
|
uploads_dir = os.path.join(current_dir, "uploads") |
|
os.makedirs(uploads_dir, exist_ok=True) |
|
converted_docx_dir = os.path.join(current_dir, "converted_docx") |
|
os.makedirs(converted_docx_dir, exist_ok=True) |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") |
|
|
|
|
|
if uploaded_file is not None: |
|
|
|
with open(os.path.join(uploads_dir, uploaded_file.name), "wb") as file: |
|
file.write(uploaded_file.getbuffer()) |
|
st.success("File uploaded successfully!") |
|
else: |
|
st.info("Please upload a PDF file.") |
|
|
|
|
|
pdf_files = [file for file in os.listdir(uploads_dir) if file.endswith(".pdf")] |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
|
|
with col1: |
|
selected_files = [] |
|
for file in pdf_files: |
|
checkbox = st.checkbox(file) |
|
if checkbox: |
|
selected_files.append(file) |
|
|
|
|
|
if selected_files: |
|
|
|
if st.button("Convert"): |
|
|
|
print(f"Creating or emptying the {extracted_images_dir} directory") |
|
create_or_empty_dir(extracted_images_dir) |
|
|
|
|
|
for file in selected_files: |
|
pdf_path = os.path.join(uploads_dir, file) |
|
print(f"Converting {file} to images in {extracted_images_dir}") |
|
convert_pdf_to_images(pdf_path, extracted_images_dir) |
|
|
|
output_docx = os.path.join( |
|
converted_docx_dir, f'{file.replace(".pdf", "")}.docx' |
|
) |
|
image_folder = os.path.join(current_dir, extracted_images_dir) |
|
print( |
|
f'Creating {file.replace(".pdf", "")}.docx with text extracted from images in the {extracted_images_dir}' |
|
) |
|
create_docx_with_text(image_folder, output_docx) |
|
|
|
st.success("Conversion completed successfully!") |
|
|
|
|
|
with col2: |
|
docx_files = [ |
|
file for file in os.listdir(converted_docx_dir) if file.endswith(".docx") |
|
] |
|
for file in docx_files: |
|
st.download_button( |
|
f"Download {file}", |
|
open(os.path.join(converted_docx_dir, file), "rb").read(), |
|
file_name=file, |
|
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document", |
|
) |
|
|