Ari commited on
Commit
ec8c26c
·
verified ·
1 Parent(s): ac28e59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -2
app.py CHANGED
@@ -13,7 +13,7 @@ nltk.download('punkt')
13
  tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
14
  model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
15
 
16
- # Function to convert DOCX to PDF
17
  def docx_to_pdf(docx_file, output_pdf="converted_doc.pdf"):
18
  doc = Document(docx_file)
19
  full_text = []
@@ -23,7 +23,12 @@ def docx_to_pdf(docx_file, output_pdf="converted_doc.pdf"):
23
  # Create a PDF and write the extracted text
24
  pdf = FPDF()
25
  pdf.add_page()
26
- pdf.set_font("Times", size=12)
 
 
 
 
 
27
  pdf.multi_cell(190, 10, txt="\n".join(full_text), align='C')
28
  pdf.output(output_pdf)
29
  return output_pdf
 
13
  tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
14
  model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
15
 
16
+ # Function to convert DOCX to PDF with UTF-8 support
17
  def docx_to_pdf(docx_file, output_pdf="converted_doc.pdf"):
18
  doc = Document(docx_file)
19
  full_text = []
 
23
  # Create a PDF and write the extracted text
24
  pdf = FPDF()
25
  pdf.add_page()
26
+
27
+ # Set a UTF-8 compatible font (DejaVuSans)
28
+ pdf.add_font('DejaVu', '', 'DejaVuSans.ttf', uni=True)
29
+ pdf.set_font("DejaVu", size=12)
30
+
31
+ # Write the content, ensuring UTF-8 encoding is supported
32
  pdf.multi_cell(190, 10, txt="\n".join(full_text), align='C')
33
  pdf.output(output_pdf)
34
  return output_pdf