Soumen commited on
Commit
9d5dc1c
1 Parent(s): b6fbd57

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -11
app.py CHANGED
@@ -40,16 +40,26 @@ headers1 = {"Authorization": "Bearer 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"}
40
  API_URL2 = "https://api-inference.huggingface.co/models/gpt2"
41
  headers2 = {"Authorization": "Bearer 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"}
42
 
43
- def read_pdf(file):
44
- # images=pdf2image.convert_from_path(file)
45
- # # print(type(images))
46
- pdfReader = PdfFileReader(file)
47
- count = pdfReader.numPages
48
- all_page_text = " "
49
- for i in range(count):
50
- page = pdfReader.getPage(i)
51
- all_page_text += page.extractText()+" "
52
- return all_page_text
 
 
 
 
 
 
 
 
 
 
53
 
54
  def engsum(output):
55
  def query(payload):
@@ -90,7 +100,7 @@ def main():
90
  #file = uploaded_photo.read() # Read the data
91
  #image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
92
  #image_result.write(file)
93
- tet = read_pdf(uploaded_photo)
94
  #tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
95
  values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
96
  text = tet[values[0]*7*10:values[1]*7*100] if values[0]!=len(tet)//(7*100) else tet[len(tet)//(7*100):]
 
40
  API_URL2 = "https://api-inference.huggingface.co/models/gpt2"
41
  headers2 = {"Authorization": "Bearer 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"}
42
 
43
+ # def read_pdf(file):
44
+ # # images=pdf2image.convert_from_path(file)
45
+ # # # print(type(images))
46
+ # pdfReader = PdfFileReader(file)
47
+ # count = pdfReader.numPages
48
+ # all_page_text = " "
49
+ # for i in range(count):
50
+ # page = pdfReader.getPage(i)
51
+ # all_page_text += page.extractText()+" "
52
+ # return all_page_text
53
+ def read_pdf_with_pdfplumber(file):
54
+ # Open the uploaded PDF file with pdfplumber
55
+ with pdfplumber.open(file) as pdf:
56
+ extracted_text = ''
57
+ for page in pdf.pages:
58
+ extracted_text += page.extract_text()
59
+
60
+ # Display the extracted text
61
+ #st.text(extracted_text)
62
+ return extracted_text
63
 
64
  def engsum(output):
65
  def query(payload):
 
100
  #file = uploaded_photo.read() # Read the data
101
  #image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
102
  #image_result.write(file)
103
+ tet = read_pdf_with_pdfplumber(uploaded_photo)
104
  #tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
105
  values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
106
  text = tet[values[0]*7*10:values[1]*7*100] if values[0]!=len(tet)//(7*100) else tet[len(tet)//(7*100):]