import gradio as gr import os import sys import subprocess from docx2pdf import convert from platform import system import logging from pathlib import Path def install_libreoffice(): """ Install LibreOffice on Linux systems if not already installed. Optimized for Hugging Face Spaces environment. """ try: # Check if LibreOffice is already installed subprocess.run(['libreoffice', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) print("LibreOffice is already installed.") return except (subprocess.CalledProcessError, FileNotFoundError): # If not installed, attempt to install if system() == "Linux": try: print("Installing LibreOffice...") # Update package lists subprocess.run(['apt-get', 'update'], check=True) # Install LibreOffice subprocess.run(['apt-get', 'install', '-y', 'libreoffice'], check=True) print("LibreOffice installed successfully.") except subprocess.CalledProcessError as e: print(f"Failed to install LibreOffice: {e}") # Don't exit, as this might prevent the app from starting print("Continuing without LibreOffice. Conversion on Linux may fail.") else: print("LibreOffice installation is only supported on Linux systems.") # Install LibreOffice if on Linux try: if system() == "Linux": install_libreoffice() except Exception as e: print(f"Error during LibreOffice installation: {e}") class WordToPDFConverter: """ A cross-platform Word to PDF converter that preserves formatting and hyperlinks. Uses docx2pdf for Windows/Mac and LibreOffice for Linux. """ def __init__(self): self.platform = system() self.logger = self._setup_logger() # Verify LibreOffice installation on Linux if self.platform == "Linux": self._verify_libreoffice() def _setup_logger(self): """Set up logging configuration.""" logger = logging.getLogger('WordToPDFConverter') logger.setLevel(logging.INFO) if not logger.handlers: handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) return logger def _verify_libreoffice(self): """Verify LibreOffice is installed on Linux systems.""" try: subprocess.run(['libreoffice', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) except FileNotFoundError: raise SystemError( "LibreOffice is required for Linux systems. " "Please install it using: sudo apt-get install libreoffice" ) def _convert_with_libreoffice(self, input_path, output_path=None): """Convert Word to PDF using LibreOffice (for Linux).""" input_path = Path(input_path).absolute() if output_path: output_path = Path(output_path).absolute() else: output_path = input_path.with_suffix('.pdf') # Ensure output directory exists output_path.parent.mkdir(parents=True, exist_ok=True) cmd = [ 'libreoffice', '--headless', '--convert-to', 'pdf', '--outdir', str(output_path.parent), str(input_path) ] try: process = subprocess.run( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True ) if process.returncode != 0: raise Exception(f"LibreOffice conversion failed: {process.stderr}") # LibreOffice creates PDF with the same name in the output directory created_pdf = output_path.parent / input_path.with_suffix('.pdf').name # Rename if a specific output path was requested if output_path.name != created_pdf.name: created_pdf.rename(output_path) return str(output_path) except Exception as e: self.logger.error(f"Error during LibreOffice conversion: {str(e)}") raise def convert_to_pdf(self, input_path, output_path=None): """ Convert a Word document to PDF while preserving formatting and hyperlinks. Args: input_path (str): Path to the input Word document output_path (str, optional): Path for the output PDF Returns: str: Path to the created PDF file """ input_path = os.path.abspath(input_path) if not os.path.exists(input_path): raise FileNotFoundError(f"Word document not found: {input_path}") if output_path: output_path = os.path.abspath(output_path) else: output_path = os.path.splitext(input_path)[0] + '.pdf' try: if self.platform == "Linux": self.logger.info(f"Converting {input_path} using LibreOffice...") return self._convert_with_libreoffice(input_path, output_path) else: self.logger.info(f"Converting {input_path} using docx2pdf...") convert(input_path, output_path) return output_path except Exception as e: self.logger.error(f"Conversion failed: {str(e)}") raise def convert_word_to_pdf(input_file): """ Gradio-friendly wrapper for Word to PDF conversion. Args: input_file (str): Path to the uploaded Word document Returns: str: Path to the converted PDF file """ try: converter = WordToPDFConverter() # Generate a unique output filename output_file = os.path.splitext(input_file)[0] + '.pdf' # Convert the file pdf_path = converter.convert_to_pdf(input_file, output_file) return pdf_path except Exception as e: raise gr.Error(f"Conversion failed: {str(e)}") # Create Gradio Interface def create_gradio_interface(): """ Create a Gradio interface for Word to PDF conversion. Returns: gr.Interface: Configured Gradio interface """ interface = gr.Interface( fn=convert_word_to_pdf, inputs=gr.File(label="Upload Word Document", type="filepath", file_types=['.doc', '.docx']), outputs=gr.File(label="Download PDF"), title="Word to PDF Converter", description="Upload a Word document and convert it to PDF while preserving formatting and hyperlinks.", theme="soft" ) return interface # Launch the Gradio app if __name__ == "__main__": app = create_gradio_interface() app.launch(share=True)