arithescientist commited on
Commit
4bdda66
·
1 Parent(s): 869dd18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -4
app.py CHANGED
@@ -34,10 +34,31 @@ from pdfminer.high_level import extract_text
34
  def pdf_to_text(file_obj):
35
  text = extract_text(file_obj.name)
36
 
 
 
 
 
 
37
 
38
- myobj = gTTS(text=text, lang='en', slow=False)
39
- myobj.save("test.wav")
40
- return 'test.wav'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
 
43
 
@@ -46,7 +67,7 @@ def pdf_to_text(file_obj):
46
  # return path
47
  #pageObject.extractText()
48
  iface = gr.Interface(fn = pdf_to_text,
49
- inputs = "file", outputs="audio" )
50
 
51
  if __name__ == "__main__":
52
  iface.launch(share=True)
 
34
  def pdf_to_text(file_obj):
35
  text = extract_text(file_obj.name)
36
 
37
+ output_text= bert_legal_model(text, min_length = 8, ratio = 0.05)
38
+ output_text = output_text.replace(' ',' ')
39
+ output_text = output_text .replace(',.',',')
40
+ output_text = output_text .replace('\n',' ')
41
+ output_text = output_text .replace('..','.')
42
 
43
+ pdf = FPDF()
44
+
45
+ # Add a page
46
+ pdf.add_page()
47
+
48
+ pdf.set_font("Times", size = 12)
49
+
50
+ # open the text file in read mode
51
+ f = output_text
52
+ # insert the texts in pdf
53
+ pdf.multi_cell(190, 10, txt = f, align = 'C')
54
+ # save the pdf with name .pdf
55
+ pdf.output("legal.pdf")
56
+ all_text
57
+
58
+ myobj = gTTS(text=output_text, lang='en', slow=False)
59
+ myobj.save("legal.wav")
60
+
61
+ return 'legal.wav', output_text, "legal.pdf"
62
 
63
 
64
 
 
67
  # return path
68
  #pageObject.extractText()
69
  iface = gr.Interface(fn = pdf_to_text,
70
+ inputs = "file", outputs=["audio","text", "file"] )
71
 
72
  if __name__ == "__main__":
73
  iface.launch(share=True)