Spaces:
Running
Running
arithescientist
commited on
Commit
·
4bdda66
1
Parent(s):
869dd18
Update app.py
Browse files
app.py
CHANGED
@@ -34,10 +34,31 @@ from pdfminer.high_level import extract_text
|
|
34 |
def pdf_to_text(file_obj):
|
35 |
text = extract_text(file_obj.name)
|
36 |
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
|
43 |
|
@@ -46,7 +67,7 @@ def pdf_to_text(file_obj):
|
|
46 |
# return path
|
47 |
#pageObject.extractText()
|
48 |
iface = gr.Interface(fn = pdf_to_text,
|
49 |
-
inputs = "file", outputs="audio" )
|
50 |
|
51 |
if __name__ == "__main__":
|
52 |
iface.launch(share=True)
|
|
|
34 |
def pdf_to_text(file_obj):
|
35 |
text = extract_text(file_obj.name)
|
36 |
|
37 |
+
output_text= bert_legal_model(text, min_length = 8, ratio = 0.05)
|
38 |
+
output_text = output_text.replace(' ',' ')
|
39 |
+
output_text = output_text .replace(',.',',')
|
40 |
+
output_text = output_text .replace('\n',' ')
|
41 |
+
output_text = output_text .replace('..','.')
|
42 |
|
43 |
+
pdf = FPDF()
|
44 |
+
|
45 |
+
# Add a page
|
46 |
+
pdf.add_page()
|
47 |
+
|
48 |
+
pdf.set_font("Times", size = 12)
|
49 |
+
|
50 |
+
# open the text file in read mode
|
51 |
+
f = output_text
|
52 |
+
# insert the texts in pdf
|
53 |
+
pdf.multi_cell(190, 10, txt = f, align = 'C')
|
54 |
+
# save the pdf with name .pdf
|
55 |
+
pdf.output("legal.pdf")
|
56 |
+
all_text
|
57 |
+
|
58 |
+
myobj = gTTS(text=output_text, lang='en', slow=False)
|
59 |
+
myobj.save("legal.wav")
|
60 |
+
|
61 |
+
return 'legal.wav', output_text, "legal.pdf"
|
62 |
|
63 |
|
64 |
|
|
|
67 |
# return path
|
68 |
#pageObject.extractText()
|
69 |
iface = gr.Interface(fn = pdf_to_text,
|
70 |
+
inputs = "file", outputs=["audio","text", "file"] )
|
71 |
|
72 |
if __name__ == "__main__":
|
73 |
iface.launch(share=True)
|