Update app.py
Browse files
app.py
CHANGED
@@ -40,16 +40,26 @@ headers1 = {"Authorization": "Bearer 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"}
|
|
40 |
API_URL2 = "https://api-inference.huggingface.co/models/gpt2"
|
41 |
headers2 = {"Authorization": "Bearer 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"}
|
42 |
|
43 |
-
def read_pdf(file):
|
44 |
-
# images=pdf2image.convert_from_path(file)
|
45 |
-
# # print(type(images))
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
def engsum(output):
|
55 |
def query(payload):
|
@@ -90,7 +100,7 @@ def main():
|
|
90 |
#file = uploaded_photo.read() # Read the data
|
91 |
#image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
|
92 |
#image_result.write(file)
|
93 |
-
tet =
|
94 |
#tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
|
95 |
values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
|
96 |
text = tet[values[0]*7*10:values[1]*7*100] if values[0]!=len(tet)//(7*100) else tet[len(tet)//(7*100):]
|
|
|
40 |
API_URL2 = "https://api-inference.huggingface.co/models/gpt2"
|
41 |
headers2 = {"Authorization": "Bearer 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"}
|
42 |
|
43 |
+
# def read_pdf(file):
|
44 |
+
# # images=pdf2image.convert_from_path(file)
|
45 |
+
# # # print(type(images))
|
46 |
+
# pdfReader = PdfFileReader(file)
|
47 |
+
# count = pdfReader.numPages
|
48 |
+
# all_page_text = " "
|
49 |
+
# for i in range(count):
|
50 |
+
# page = pdfReader.getPage(i)
|
51 |
+
# all_page_text += page.extractText()+" "
|
52 |
+
# return all_page_text
|
53 |
+
def read_pdf_with_pdfplumber(file):
|
54 |
+
# Open the uploaded PDF file with pdfplumber
|
55 |
+
with pdfplumber.open(file) as pdf:
|
56 |
+
extracted_text = ''
|
57 |
+
for page in pdf.pages:
|
58 |
+
extracted_text += page.extract_text()
|
59 |
+
|
60 |
+
# Display the extracted text
|
61 |
+
#st.text(extracted_text)
|
62 |
+
return extracted_text
|
63 |
|
64 |
def engsum(output):
|
65 |
def query(payload):
|
|
|
100 |
#file = uploaded_photo.read() # Read the data
|
101 |
#image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
|
102 |
#image_result.write(file)
|
103 |
+
tet = read_pdf_with_pdfplumber(uploaded_photo)
|
104 |
#tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
|
105 |
values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
|
106 |
text = tet[values[0]*7*10:values[1]*7*100] if values[0]!=len(tet)//(7*100) else tet[len(tet)//(7*100):]
|