import gradio as gr from docx import Document # For .docx handling from gtts import gTTS import os import pdfkit # Path to the wkhtmltopdf executable (update this path based on the Dockerfile output) WKHTMLTOPDF_PATH = "/usr/local/bin/wkhtmltopdf" # You will get this from the Dockerfile output # Configure pdfkit to use the correct wkhtmltopdf path config = pdfkit.configuration(wkhtmltopdf=WKHTMLTOPDF_PATH) # Function to extract text from a .docx file and convert to PDF def docx_to_pdf(docx_file): try: # Extract text from the .docx file doc = Document(docx_file.name) full_text = [] for para in doc.paragraphs: full_text.append(para.text) extracted_text = '\n'.join(full_text) # Convert the extracted text into an HTML format for pdfkit html_content = f"""
{extracted_text}
""" # Generate the PDF using pdfkit with the custom wkhtmltopdf path pdf_output_path = "document_output.pdf" pdfkit.from_string(html_content, pdf_output_path, configuration=config) # Convert the text to audio using gTTS tts = gTTS(text=extracted_text, lang='en', slow=False) audio_output_path = "document_audio.wav" tts.save(audio_output_path) return audio_output_path, extracted_text, pdf_output_path except Exception as e: return None, f"An error occurred: {str(e)}", None # Gradio interface iface = gr.Interface( fn=docx_to_pdf, inputs=gr.File(label="Upload .docx File"), outputs=[gr.Audio(label="Generated Audio"), gr.Textbox(label="Extracted Text"), gr.File(label="Generated PDF")] ) if __name__ == "__main__": iface.launch()