Spaces:

arithescientist
/

lincolnlegal

Sleeping

Ari commited on Sep 5, 2024

Commit

e249de1

verified ·

1 Parent(s): 3aeb3ce

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import gradio as gr
 from docx import Document  # For .docx handling
-from fpdf import FPDF  # Python-native PDF generation
 from gtts import gTTS
 # Function to extract text from a .docx file and convert to PDF
 def docx_to_pdf(docx_file):
@@ -13,13 +14,17 @@ def docx_to_pdf(docx_file):
             full_text.append(para.text)
         extracted_text = '\n'.join(full_text)
-        # Generate the PDF using fpdf
-        pdf = FPDF()
-        pdf.add_page()
-        pdf.set_font("Arial", size=12)
-        pdf.multi_cell(190, 10, txt=extracted_text)
         pdf_output_path = "document_output.pdf"
-        pdf.output(pdf_output_path)
         # Convert the text to audio using gTTS
         tts = gTTS(text=extracted_text, lang='en', slow=False)

 import gradio as gr
 from docx import Document  # For .docx handling
 from gtts import gTTS
+import os
+import pdfkit  # For converting text to PDF
 # Function to extract text from a .docx file and convert to PDF
 def docx_to_pdf(docx_file):
             full_text.append(para.text)
         extracted_text = '\n'.join(full_text)
+        # Convert the extracted text into an HTML format for pdfkit
+        html_content = f"""
+        <html>
+        <head><meta charset="UTF-8"></head>
+        <body><pre>{extracted_text}</pre></body>
+        </html>
+        """
+        # Generate the PDF using pdfkit
         pdf_output_path = "document_output.pdf"
+        pdfkit.from_string(html_content, pdf_output_path)
         # Convert the text to audio using gTTS
         tts = gTTS(text=extracted_text, lang='en', slow=False)