import gradio as gr import os from docx2pdf import convert import subprocess from platform import system import logging from pathlib import Path class WordToPDFConverter: """ A cross-platform Word to PDF converter that preserves formatting and hyperlinks. Uses docx2pdf for Windows/Mac and LibreOffice for Linux. """ def __init__(self): self.platform = system() self.logger = self._setup_logger() # Verify LibreOffice installation on Linux if self.platform == "Linux": self._verify_libreoffice() def _setup_logger(self): """Set up logging configuration.""" logger = logging.getLogger('WordToPDFConverter') logger.setLevel(logging.INFO) if not logger.handlers: handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) return logger def _verify_libreoffice(self): """Verify LibreOffice is installed on Linux systems.""" try: subprocess.run(['libreoffice', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) except FileNotFoundError: raise SystemError( "LibreOffice is required for Linux systems. " "Please install it using: sudo apt-get install libreoffice" ) def _convert_with_libreoffice(self, input_path, output_path=None): """Convert Word to PDF using LibreOffice (for Linux).""" input_path = Path(input_path).absolute() if output_path: output_path = Path(output_path).absolute() else: output_path = input_path.with_suffix('.pdf') # Ensure output directory exists output_path.parent.mkdir(parents=True, exist_ok=True) cmd = [ 'libreoffice', '--headless', '--convert-to', 'pdf', '--outdir', str(output_path.parent), str(input_path) ] try: process = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True ) if process.returncode != 0: raise Exception(f"LibreOffice conversion failed: {process.stderr}") # LibreOffice creates PDF with the same name in the output directory created_pdf = output_path.parent / input_path.with_suffix('.pdf').name # Rename if a specific output path was requested if output_path.name != created_pdf.name: created_pdf.rename(output_path) return str(output_path) except Exception as e: self.logger.error(f"Error during LibreOffice conversion: {str(e)}") raise def convert_to_pdf(self, input_path, output_path=None): """ Convert a Word document to PDF while preserving formatting and hyperlinks. Args: input_path (str): Path to the input Word document output_path (str, optional): Path for the output PDF Returns: str: Path to the created PDF file """ input_path = os.path.abspath(input_path) if not os.path.exists(input_path): raise FileNotFoundError(f"Word document not found: {input_path}") if output_path: output_path = os.path.abspath(output_path) else: output_path = os.path.splitext(input_path)[0] + '.pdf' try: if self.platform == "Linux": self.logger.info(f"Converting {input_path} using LibreOffice...") return self._convert_with_libreoffice(input_path, output_path) else: self.logger.info(f"Converting {input_path} using docx2pdf...") convert(input_path, output_path) return output_path except Exception as e: self.logger.error(f"Conversion failed: {str(e)}") raise def convert_word_to_pdf(input_file): """ Gradio-friendly wrapper for Word to PDF conversion. Args: input_file (str): Path to the uploaded Word document Returns: str: Path to the converted PDF file """ try: converter = WordToPDFConverter() # Generate a unique output filename output_file = os.path.splitext(input_file)[0] + '.pdf' # Convert the file pdf_path = converter.convert_to_pdf(input_file, output_file) return pdf_path except Exception as e: raise gr.Error(f"Conversion failed: {str(e)}") # Create Gradio Interface def create_gradio_interface(): """ Create a Gradio interface for Word to PDF conversion. Returns: gr.Interface: Configured Gradio interface """ interface = gr.Interface( fn=convert_word_to_pdf, inputs=gr.File(label="Upload Word Document", type="filepath", file_types=['.doc', '.docx']), outputs=gr.File(label="Download PDF"), title="Word to PDF Converter", description="Upload a Word document and convert it to PDF while preserving formatting and hyperlinks.", theme="soft", examples=[ ["/path/to/sample1.docx"], ["/path/to/sample2.doc"] ] ) return interface # Launch the Gradio app if __name__ == "__main__": app = create_gradio_interface() app.launch(share=True)