Spaces:
Running
Running
Ari
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -13,7 +13,7 @@ nltk.download('punkt')
|
|
13 |
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
|
14 |
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
|
15 |
|
16 |
-
# Function to convert DOCX to PDF
|
17 |
def docx_to_pdf(docx_file, output_pdf="converted_doc.pdf"):
|
18 |
doc = Document(docx_file)
|
19 |
full_text = []
|
@@ -23,7 +23,12 @@ def docx_to_pdf(docx_file, output_pdf="converted_doc.pdf"):
|
|
23 |
# Create a PDF and write the extracted text
|
24 |
pdf = FPDF()
|
25 |
pdf.add_page()
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
27 |
pdf.multi_cell(190, 10, txt="\n".join(full_text), align='C')
|
28 |
pdf.output(output_pdf)
|
29 |
return output_pdf
|
|
|
13 |
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
|
14 |
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
|
15 |
|
16 |
+
# Function to convert DOCX to PDF with UTF-8 support
|
17 |
def docx_to_pdf(docx_file, output_pdf="converted_doc.pdf"):
|
18 |
doc = Document(docx_file)
|
19 |
full_text = []
|
|
|
23 |
# Create a PDF and write the extracted text
|
24 |
pdf = FPDF()
|
25 |
pdf.add_page()
|
26 |
+
|
27 |
+
# Set a UTF-8 compatible font (DejaVuSans)
|
28 |
+
pdf.add_font('DejaVu', '', 'DejaVuSans.ttf', uni=True)
|
29 |
+
pdf.set_font("DejaVu", size=12)
|
30 |
+
|
31 |
+
# Write the content, ensuring UTF-8 encoding is supported
|
32 |
pdf.multi_cell(190, 10, txt="\n".join(full_text), align='C')
|
33 |
pdf.output(output_pdf)
|
34 |
return output_pdf
|