Soumen commited on
Commit
1a9fa41
·
1 Parent(s): f176b84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -17
app.py CHANGED
@@ -55,22 +55,17 @@ def read_pdf(file):
55
  # text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
56
  all_page_text += page.extractText()+" "
57
  return all_page_text
58
- # def read_pdf_with_pdfplumber(file):
59
- # all_page_text=" "
60
- # # all_page_text = ""
61
- # with pdfplumber.open(file) as pdf:
62
- # page = pdf.pages[0]
63
- # ge=page.to_image()
64
- # img = Image.open(ge)
65
- # img = img.save("img.png")
66
- # image_name = cv2.imread("img.png")
67
- # # get co-ordinates to c
68
- # # return page.extract_text()
69
- # # get co-ordinates to cr
70
- # # # get co-ordinates to cr
71
- # text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
72
- # all_page_text += text + " " #page.extractText()
73
- # return all_page_text
74
  st.title("NLP APPLICATION")
75
  #@st.cache_resource(experimental_allow_widgets=True)
76
  def main():
@@ -97,7 +92,7 @@ def main():
97
  #file = uploaded_photo.read() # Read the data
98
  #image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
99
  #image_result.write(file)
100
- tet = read_pdf(uploaded_photo)
101
  #tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
102
  values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*10)])
103
  text = tet[values[0]*7*10:values[1]*7*10] if values[0]!=len(tet)//(7*10) else tet[len(tet)//(7*10):]
 
55
  # text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
56
  all_page_text += page.extractText()+" "
57
  return all_page_text
58
+ def read_pdf_with_pdfplumber(file):
59
+ # Open the uploaded PDF file with pdfplumber
60
+ with pdfplumber.open(file) as pdf:
61
+ extracted_text = ''
62
+ for page in pdf.pages:
63
+ extracted_text += page.extract_text()
64
+
65
+ # Display the extracted text
66
+ text=st.text(extracted_text)
67
+
68
+
 
 
 
 
 
69
  st.title("NLP APPLICATION")
70
  #@st.cache_resource(experimental_allow_widgets=True)
71
  def main():
 
92
  #file = uploaded_photo.read() # Read the data
93
  #image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
94
  #image_result.write(file)
95
+ tet = read_pdf_with_pdfplumber(uploaded_photo)
96
  #tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
97
  values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*10)])
98
  text = tet[values[0]*7*10:values[1]*7*10] if values[0]!=len(tet)//(7*10) else tet[len(tet)//(7*10):]