Ari commited on
Commit
e249de1
·
verified ·
1 Parent(s): 3aeb3ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -7
app.py CHANGED
@@ -1,7 +1,8 @@
1
  import gradio as gr
2
  from docx import Document # For .docx handling
3
- from fpdf import FPDF # Python-native PDF generation
4
  from gtts import gTTS
 
 
5
 
6
  # Function to extract text from a .docx file and convert to PDF
7
  def docx_to_pdf(docx_file):
@@ -13,13 +14,17 @@ def docx_to_pdf(docx_file):
13
  full_text.append(para.text)
14
  extracted_text = '\n'.join(full_text)
15
 
16
- # Generate the PDF using fpdf
17
- pdf = FPDF()
18
- pdf.add_page()
19
- pdf.set_font("Arial", size=12)
20
- pdf.multi_cell(190, 10, txt=extracted_text)
 
 
 
 
21
  pdf_output_path = "document_output.pdf"
22
- pdf.output(pdf_output_path)
23
 
24
  # Convert the text to audio using gTTS
25
  tts = gTTS(text=extracted_text, lang='en', slow=False)
 
1
  import gradio as gr
2
  from docx import Document # For .docx handling
 
3
  from gtts import gTTS
4
+ import os
5
+ import pdfkit # For converting text to PDF
6
 
7
  # Function to extract text from a .docx file and convert to PDF
8
  def docx_to_pdf(docx_file):
 
14
  full_text.append(para.text)
15
  extracted_text = '\n'.join(full_text)
16
 
17
+ # Convert the extracted text into an HTML format for pdfkit
18
+ html_content = f"""
19
+ <html>
20
+ <head><meta charset="UTF-8"></head>
21
+ <body><pre>{extracted_text}</pre></body>
22
+ </html>
23
+ """
24
+
25
+ # Generate the PDF using pdfkit
26
  pdf_output_path = "document_output.pdf"
27
+ pdfkit.from_string(html_content, pdf_output_path)
28
 
29
  # Convert the text to audio using gTTS
30
  tts = gTTS(text=extracted_text, lang='en', slow=False)