Spaces:
Sleeping
Sleeping
Commit
·
f351a77
1
Parent(s):
5e3b3bb
Update app.py
Browse files
app.py
CHANGED
@@ -40,24 +40,24 @@ def pdf(file):
|
|
40 |
# OCR the image using Google's tesseract
|
41 |
content += pt.image_to_string(pages[i])
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
|
62 |
|
63 |
|
|
|
40 |
# OCR the image using Google's tesseract
|
41 |
content += pt.image_to_string(pages[i])
|
42 |
|
43 |
+
output_text= bert_legal_model(content, min_length = 8, ratio = 0.05)
|
44 |
+
output_text = output_text.replace(' ',' ')
|
45 |
+
output_text = output_text .replace(',.',',')
|
46 |
+
output_text = output_text .replace('\n',' ')
|
47 |
+
output_text = output_text .replace('..','.')
|
48 |
+
|
49 |
+
pdf = FPDF()
|
50 |
+
|
51 |
+
# Add a page
|
52 |
+
pdf.add_page()
|
53 |
+
pdf.set_font("Times", size = 12)
|
54 |
+
|
55 |
+
# open the text file in read mode
|
56 |
+
f = all_text2
|
57 |
+
|
58 |
+
|
59 |
+
return pdf.output("legal.pdf")
|
60 |
+
|
61 |
|
62 |
|
63 |
|