Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
#App: NLP App with Streamlit
|
3 |
Description
|
4 |
This is a Natural Language Processing(NLP) base Application that is useful for
|
5 |
-
Document/Text Summarization from Bangla and English Images
|
6 |
"""
|
7 |
# Core Pkgs
|
8 |
import os
|
@@ -55,16 +55,16 @@ def read_pdf(file):
|
|
55 |
# text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
|
56 |
all_page_text += page.extractText()+" "
|
57 |
return all_page_text
|
58 |
-
def read_pdf_with_pdfplumber(file):
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
|
69 |
|
70 |
st.title("NLP APPLICATION")
|
@@ -93,11 +93,11 @@ def main():
|
|
93 |
#file = uploaded_photo.read() # Read the data
|
94 |
#image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
|
95 |
#image_result.write(file)
|
96 |
-
tet =
|
97 |
#tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
|
98 |
values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
|
99 |
text = tet[values[0]*7*10:values[1]*7*10] if values[0]!=len(tet)//(7*10) else tet[len(tet)//(7*100):]
|
100 |
-
|
101 |
elif uploaded_photo:
|
102 |
img = Image.open(uploaded_photo)
|
103 |
img = img.save("img.png")
|
|
|
2 |
#App: NLP App with Streamlit
|
3 |
Description
|
4 |
This is a Natural Language Processing(NLP) base Application that is useful for
|
5 |
+
Document/Text Summarization from Bangla images and English Images/PDF files.
|
6 |
"""
|
7 |
# Core Pkgs
|
8 |
import os
|
|
|
55 |
# text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
|
56 |
all_page_text += page.extractText()+" "
|
57 |
return all_page_text
|
58 |
+
# def read_pdf_with_pdfplumber(file):
|
59 |
+
# # Open the uploaded PDF file with pdfplumber
|
60 |
+
# with pdfplumber.open(file) as pdf:
|
61 |
+
# extracted_text = ''
|
62 |
+
# for page in pdf.pages:
|
63 |
+
# extracted_text += page.extract_text()
|
64 |
|
65 |
+
# # Display the extracted text
|
66 |
+
# #st.text(extracted_text)
|
67 |
+
# return extracted_text
|
68 |
|
69 |
|
70 |
st.title("NLP APPLICATION")
|
|
|
93 |
#file = uploaded_photo.read() # Read the data
|
94 |
#image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
|
95 |
#image_result.write(file)
|
96 |
+
tet = read_pdf(uploaded_photo)
|
97 |
#tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
|
98 |
values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
|
99 |
text = tet[values[0]*7*10:values[1]*7*10] if values[0]!=len(tet)//(7*10) else tet[len(tet)//(7*100):]
|
100 |
+
st.success(text)
|
101 |
elif uploaded_photo:
|
102 |
img = Image.open(uploaded_photo)
|
103 |
img = img.save("img.png")
|