Spaces:
Sleeping
Sleeping
Ari
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
from docx import Document # For .docx handling
|
3 |
-
from fpdf import FPDF # Python-native PDF generation
|
4 |
from gtts import gTTS
|
|
|
|
|
5 |
|
6 |
# Function to extract text from a .docx file and convert to PDF
|
7 |
def docx_to_pdf(docx_file):
|
@@ -13,13 +14,17 @@ def docx_to_pdf(docx_file):
|
|
13 |
full_text.append(para.text)
|
14 |
extracted_text = '\n'.join(full_text)
|
15 |
|
16 |
-
#
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
21 |
pdf_output_path = "document_output.pdf"
|
22 |
-
|
23 |
|
24 |
# Convert the text to audio using gTTS
|
25 |
tts = gTTS(text=extracted_text, lang='en', slow=False)
|
|
|
1 |
import gradio as gr
|
2 |
from docx import Document # For .docx handling
|
|
|
3 |
from gtts import gTTS
|
4 |
+
import os
|
5 |
+
import pdfkit # For converting text to PDF
|
6 |
|
7 |
# Function to extract text from a .docx file and convert to PDF
|
8 |
def docx_to_pdf(docx_file):
|
|
|
14 |
full_text.append(para.text)
|
15 |
extracted_text = '\n'.join(full_text)
|
16 |
|
17 |
+
# Convert the extracted text into an HTML format for pdfkit
|
18 |
+
html_content = f"""
|
19 |
+
<html>
|
20 |
+
<head><meta charset="UTF-8"></head>
|
21 |
+
<body><pre>{extracted_text}</pre></body>
|
22 |
+
</html>
|
23 |
+
"""
|
24 |
+
|
25 |
+
# Generate the PDF using pdfkit
|
26 |
pdf_output_path = "document_output.pdf"
|
27 |
+
pdfkit.from_string(html_content, pdf_output_path)
|
28 |
|
29 |
# Convert the text to audio using gTTS
|
30 |
tts = gTTS(text=extracted_text, lang='en', slow=False)
|