Soumen commited on
Commit
d82d18a
·
1 Parent(s): 6d5b302

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -2,7 +2,7 @@
2
  #App: NLP App with Streamlit
3
  Description
4
  This is a Natural Language Processing(NLP) base Application that is useful for
5
- Document/Text Summarization from Bangla and English Images and PDF files.
6
  """
7
  # Core Pkgs
8
  import os
@@ -55,16 +55,16 @@ def read_pdf(file):
55
  # text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
56
  all_page_text += page.extractText()+" "
57
  return all_page_text
58
- def read_pdf_with_pdfplumber(file):
59
- # Open the uploaded PDF file with pdfplumber
60
- with pdfplumber.open(file) as pdf:
61
- extracted_text = ''
62
- for page in pdf.pages:
63
- extracted_text += page.extract_text()
64
 
65
- # Display the extracted text
66
- #st.text(extracted_text)
67
- return extracted_text
68
 
69
 
70
  st.title("NLP APPLICATION")
@@ -93,11 +93,11 @@ def main():
93
  #file = uploaded_photo.read() # Read the data
94
  #image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
95
  #image_result.write(file)
96
- tet = read_pdf_with_pdfplumber(uploaded_photo)
97
  #tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
98
  values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
99
  text = tet[values[0]*7*10:values[1]*7*10] if values[0]!=len(tet)//(7*10) else tet[len(tet)//(7*100):]
100
- #st.success(text)
101
  elif uploaded_photo:
102
  img = Image.open(uploaded_photo)
103
  img = img.save("img.png")
 
2
  #App: NLP App with Streamlit
3
  Description
4
  This is a Natural Language Processing(NLP) base Application that is useful for
5
+ Document/Text Summarization from Bangla images and English Images/PDF files.
6
  """
7
  # Core Pkgs
8
  import os
 
55
  # text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
56
  all_page_text += page.extractText()+" "
57
  return all_page_text
58
+ # def read_pdf_with_pdfplumber(file):
59
+ # # Open the uploaded PDF file with pdfplumber
60
+ # with pdfplumber.open(file) as pdf:
61
+ # extracted_text = ''
62
+ # for page in pdf.pages:
63
+ # extracted_text += page.extract_text()
64
 
65
+ # # Display the extracted text
66
+ # #st.text(extracted_text)
67
+ # return extracted_text
68
 
69
 
70
  st.title("NLP APPLICATION")
 
93
  #file = uploaded_photo.read() # Read the data
94
  #image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
95
  #image_result.write(file)
96
+ tet = read_pdf(uploaded_photo)
97
  #tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
98
  values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
99
  text = tet[values[0]*7*10:values[1]*7*10] if values[0]!=len(tet)//(7*10) else tet[len(tet)//(7*100):]
100
+ st.success(text)
101
  elif uploaded_photo:
102
  img = Image.open(uploaded_photo)
103
  img = img.save("img.png")