Spaces:

Soumen
/

Text-Summarization-and-NLP-tasks

Running

App Files Files Community

Soumen commited on Sep 6, 2023

Commit

d82d18a

1 Parent(s): 6d5b302

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -12

app.py CHANGED Viewed

@@ -2,7 +2,7 @@
 #App: NLP App with Streamlit
 Description
 This is a Natural Language Processing(NLP) base Application that is useful for
-Document/Text Summarization from Bangla and English Images and PDF files.
 """
 # Core Pkgs
 import os
@@ -55,16 +55,16 @@ def read_pdf(file):
 #         text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
         all_page_text += page.extractText()+" "
     return all_page_text
-def read_pdf_with_pdfplumber(file):
-    # Open the uploaded PDF file with pdfplumber
-    with pdfplumber.open(file) as pdf:
-        extracted_text = ''
-        for page in pdf.pages:
-            extracted_text += page.extract_text()
-    # Display the extracted text
-    #st.text(extracted_text)
-    return extracted_text
 st.title("NLP APPLICATION")
@@ -93,11 +93,11 @@ def main():
             #file = uploaded_photo.read() # Read the data
             #image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
             #image_result.write(file)
-            tet = read_pdf_with_pdfplumber(uploaded_photo)
             #tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
             values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
             text = tet[values[0]*7*10:values[1]*7*10] if values[0]!=len(tet)//(7*10) else tet[len(tet)//(7*100):]
-            #st.success(text)
         elif uploaded_photo:
             img = Image.open(uploaded_photo)
             img = img.save("img.png")

 #App: NLP App with Streamlit
 Description
 This is a Natural Language Processing(NLP) base Application that is useful for
+Document/Text Summarization from Bangla images and English Images/PDF files.
 """
 # Core Pkgs
 import os
 #         text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
         all_page_text += page.extractText()+" "
     return all_page_text
+# def read_pdf_with_pdfplumber(file):
+#     # Open the uploaded PDF file with pdfplumber
+#     with pdfplumber.open(file) as pdf:
+#         extracted_text = ''
+#         for page in pdf.pages:
+#             extracted_text += page.extract_text()
+#     # Display the extracted text
+#     #st.text(extracted_text)
+#     return extracted_text
 st.title("NLP APPLICATION")
             #file = uploaded_photo.read() # Read the data
             #image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
             #image_result.write(file)
+            tet = read_pdf(uploaded_photo)
             #tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
             values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
             text = tet[values[0]*7*10:values[1]*7*10] if values[0]!=len(tet)//(7*10) else tet[len(tet)//(7*100):]
+            st.success(text)
         elif uploaded_photo:
             img = Image.open(uploaded_photo)
             img = img.save("img.png")