Update app.py
Browse files
app.py
CHANGED
@@ -55,22 +55,17 @@ def read_pdf(file):
|
|
55 |
# text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
|
56 |
all_page_text += page.extractText()+" "
|
57 |
return all_page_text
|
58 |
-
|
59 |
-
#
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
#
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
# # get co-ordinates to cr
|
70 |
-
# # # get co-ordinates to cr
|
71 |
-
# text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
|
72 |
-
# all_page_text += text + " " #page.extractText()
|
73 |
-
# return all_page_text
|
74 |
st.title("NLP APPLICATION")
|
75 |
#@st.cache_resource(experimental_allow_widgets=True)
|
76 |
def main():
|
@@ -97,7 +92,7 @@ def main():
|
|
97 |
#file = uploaded_photo.read() # Read the data
|
98 |
#image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
|
99 |
#image_result.write(file)
|
100 |
-
tet =
|
101 |
#tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
|
102 |
values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*10)])
|
103 |
text = tet[values[0]*7*10:values[1]*7*10] if values[0]!=len(tet)//(7*10) else tet[len(tet)//(7*10):]
|
|
|
55 |
# text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
|
56 |
all_page_text += page.extractText()+" "
|
57 |
return all_page_text
|
58 |
+
def read_pdf_with_pdfplumber(file):
|
59 |
+
# Open the uploaded PDF file with pdfplumber
|
60 |
+
with pdfplumber.open(file) as pdf:
|
61 |
+
extracted_text = ''
|
62 |
+
for page in pdf.pages:
|
63 |
+
extracted_text += page.extract_text()
|
64 |
+
|
65 |
+
# Display the extracted text
|
66 |
+
text=st.text(extracted_text)
|
67 |
+
|
68 |
+
|
|
|
|
|
|
|
|
|
|
|
69 |
st.title("NLP APPLICATION")
|
70 |
#@st.cache_resource(experimental_allow_widgets=True)
|
71 |
def main():
|
|
|
92 |
#file = uploaded_photo.read() # Read the data
|
93 |
#image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
|
94 |
#image_result.write(file)
|
95 |
+
tet = read_pdf_with_pdfplumber(uploaded_photo)
|
96 |
#tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
|
97 |
values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*10)])
|
98 |
text = tet[values[0]*7*10:values[1]*7*10] if values[0]!=len(tet)//(7*10) else tet[len(tet)//(7*10):]
|