|
import gradio as gr |
|
import os |
|
from docx2pdf import convert |
|
import subprocess |
|
from platform import system |
|
import logging |
|
from pathlib import Path |
|
|
|
class WordToPDFConverter: |
|
""" |
|
A cross-platform Word to PDF converter that preserves formatting and hyperlinks. |
|
Uses docx2pdf for Windows/Mac and LibreOffice for Linux. |
|
""" |
|
|
|
def __init__(self): |
|
self.platform = system() |
|
self.logger = self._setup_logger() |
|
|
|
|
|
if self.platform == "Linux": |
|
self._verify_libreoffice() |
|
|
|
def _setup_logger(self): |
|
"""Set up logging configuration.""" |
|
logger = logging.getLogger('WordToPDFConverter') |
|
logger.setLevel(logging.INFO) |
|
|
|
if not logger.handlers: |
|
handler = logging.StreamHandler() |
|
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') |
|
handler.setFormatter(formatter) |
|
logger.addHandler(handler) |
|
|
|
return logger |
|
|
|
def _verify_libreoffice(self): |
|
"""Verify LibreOffice is installed on Linux systems.""" |
|
try: |
|
subprocess.run(['libreoffice', '--version'], |
|
stdout=subprocess.PIPE, |
|
stderr=subprocess.PIPE) |
|
except FileNotFoundError: |
|
raise SystemError( |
|
"LibreOffice is required for Linux systems. " |
|
"Please install it using: sudo apt-get install libreoffice" |
|
) |
|
|
|
def _convert_with_libreoffice(self, input_path, output_path=None): |
|
"""Convert Word to PDF using LibreOffice (for Linux).""" |
|
input_path = Path(input_path).absolute() |
|
|
|
if output_path: |
|
output_path = Path(output_path).absolute() |
|
else: |
|
output_path = input_path.with_suffix('.pdf') |
|
|
|
|
|
output_path.parent.mkdir(parents=True, exist_ok=True) |
|
|
|
cmd = [ |
|
'libreoffice', |
|
'--headless', |
|
'--convert-to', |
|
'pdf', |
|
'--outdir', |
|
str(output_path.parent), |
|
str(input_path) |
|
] |
|
|
|
try: |
|
process = subprocess.run( |
|
cmd, |
|
stdout=subprocess.PIPE, |
|
stderr=subprocess.PIPE, |
|
text=True |
|
) |
|
|
|
if process.returncode != 0: |
|
raise Exception(f"LibreOffice conversion failed: {process.stderr}") |
|
|
|
|
|
created_pdf = output_path.parent / input_path.with_suffix('.pdf').name |
|
|
|
|
|
if output_path.name != created_pdf.name: |
|
created_pdf.rename(output_path) |
|
|
|
return str(output_path) |
|
|
|
except Exception as e: |
|
self.logger.error(f"Error during LibreOffice conversion: {str(e)}") |
|
raise |
|
|
|
def convert_to_pdf(self, input_path, output_path=None): |
|
""" |
|
Convert a Word document to PDF while preserving formatting and hyperlinks. |
|
|
|
Args: |
|
input_path (str): Path to the input Word document |
|
output_path (str, optional): Path for the output PDF |
|
|
|
Returns: |
|
str: Path to the created PDF file |
|
""" |
|
input_path = os.path.abspath(input_path) |
|
|
|
if not os.path.exists(input_path): |
|
raise FileNotFoundError(f"Word document not found: {input_path}") |
|
|
|
if output_path: |
|
output_path = os.path.abspath(output_path) |
|
else: |
|
output_path = os.path.splitext(input_path)[0] + '.pdf' |
|
|
|
try: |
|
if self.platform == "Linux": |
|
self.logger.info(f"Converting {input_path} using LibreOffice...") |
|
return self._convert_with_libreoffice(input_path, output_path) |
|
else: |
|
self.logger.info(f"Converting {input_path} using docx2pdf...") |
|
convert(input_path, output_path) |
|
return output_path |
|
|
|
except Exception as e: |
|
self.logger.error(f"Conversion failed: {str(e)}") |
|
raise |
|
|
|
def convert_word_to_pdf(input_file): |
|
""" |
|
Gradio-friendly wrapper for Word to PDF conversion. |
|
|
|
Args: |
|
input_file (str): Path to the uploaded Word document |
|
|
|
Returns: |
|
str: Path to the converted PDF file |
|
""" |
|
try: |
|
converter = WordToPDFConverter() |
|
|
|
|
|
output_file = os.path.splitext(input_file)[0] + '.pdf' |
|
|
|
|
|
pdf_path = converter.convert_to_pdf(input_file, output_file) |
|
|
|
return pdf_path |
|
except Exception as e: |
|
raise gr.Error(f"Conversion failed: {str(e)}") |
|
|
|
|
|
def create_gradio_interface(): |
|
""" |
|
Create a Gradio interface for Word to PDF conversion. |
|
|
|
Returns: |
|
gr.Interface: Configured Gradio interface |
|
""" |
|
interface = gr.Interface( |
|
fn=convert_word_to_pdf, |
|
inputs=gr.File(label="Upload Word Document", type="filepath", file_types=['.doc', '.docx']), |
|
outputs=gr.File(label="Download PDF"), |
|
title="Word to PDF Converter", |
|
description="Upload a Word document and convert it to PDF while preserving formatting and hyperlinks.", |
|
theme="soft" |
|
) |
|
|
|
return interface |
|
|
|
|
|
if __name__ == "__main__": |
|
app = create_gradio_interface() |
|
app.launch(share=True) |