Spaces:
Running
Running
arithescientist
commited on
Commit
·
d468541
1
Parent(s):
630e8c3
Update app.py
Browse files
app.py
CHANGED
@@ -27,25 +27,13 @@ bert_legal_model = Summarizer(custom_model=custom_model, custom_tokenizer=custom
|
|
27 |
|
28 |
|
29 |
def pdf(file):
|
30 |
-
temp = open(
|
31 |
PDF_read = PDFFileReader(temp)
|
32 |
first_page = PDF_read.getPage
|
|
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
pdfReader = PyPDF2.PdfFileReader(pdfFileObject)
|
38 |
-
|
39 |
-
print(" No. Of Pages :", pdfReader.numPages)
|
40 |
-
|
41 |
-
pageObject = pdfReader.getPage(0)
|
42 |
-
|
43 |
-
print(pageObject.extractText())
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
return pageObject.extractText()
|
49 |
|
50 |
|
51 |
|
@@ -54,7 +42,7 @@ def pdf(file):
|
|
54 |
iface = gr.Interface(
|
55 |
pdf,
|
56 |
"file",
|
57 |
-
|
58 |
)
|
59 |
|
60 |
if __name__ == "__main__":
|
|
|
27 |
|
28 |
|
29 |
def pdf(file):
|
30 |
+
temp = open(file, 'rb')
|
31 |
PDF_read = PDFFileReader(temp)
|
32 |
first_page = PDF_read.getPage
|
33 |
+
|
34 |
|
35 |
+
return first_page
|
36 |
+
#pageObject.extractText()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
|
39 |
|
|
|
42 |
iface = gr.Interface(
|
43 |
pdf,
|
44 |
"file",
|
45 |
+
first_page
|
46 |
)
|
47 |
|
48 |
if __name__ == "__main__":
|