Spaces:
Sleeping
Sleeping
import argparse | |
import pypandoc | |
from pdf2docx import Converter | |
def pdf_to_epub(pdf_path, epub_path, ignore_header_footer=True): | |
docx_path = pdf_path.replace('.pdf', '.docx') | |
# Initialize converter with options to ignore headers and footers | |
convert_settings = { | |
"ignore_footer": ignore_header_footer, | |
"ignore_header": ignore_header_footer, | |
} | |
cv = Converter(pdf_path) | |
# Convert PDF to DOCX with specified settings | |
cv.convert(docx_path, **convert_settings) | |
cv.close() | |
# Step 2: Convert DOCX to EPUB | |
output = pypandoc.convert_file(docx_path, 'epub', outputfile=epub_path) | |
print(output) | |
def main(): | |
# Parse command-line arguments | |
parser = argparse.ArgumentParser(description='Convert a PDF file to EPUB format.') | |
parser.add_argument('pdf_path', type=str, help='Path to the PDF file to convert.') | |
args = parser.parse_args() | |
# Derive EPUB path from PDF path | |
epub_path = args.pdf_path.replace('.pdf', '.epub') | |
# Perform conversion | |
pdf_to_epub(args.pdf_path, epub_path) | |
print(f"Conversion complete. EPUB file saved to: {epub_path}") | |
if __name__ == '__main__': | |
main() | |