Soumen commited on
Commit
a2c3102
1 Parent(s): 9d5dc1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -18
app.py CHANGED
@@ -20,7 +20,10 @@ st.set_page_config(page_title="Summarization Tool", layout="wide", initial_sideb
20
  import torch
21
  import docx2txt
22
  from PIL import Image
23
- from PyPDF2 import PdfFileReader
 
 
 
24
  from pdf2image import convert_from_bytes
25
  import pdfplumber
26
  #from line_cor import mark_region
@@ -96,24 +99,32 @@ def main():
96
  st.session_state["photo"]="not done"
97
  if st.session_state["photo"]=="done" or message:
98
  if uploaded_photo and uploaded_photo.type=='application/pdf':
99
- text=None
100
- #file = uploaded_photo.read() # Read the data
101
- #image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
102
- #image_result.write(file)
103
- tet = read_pdf_with_pdfplumber(uploaded_photo)
104
- #tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
105
- values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
106
- text = tet[values[0]*7*10:values[1]*7*100] if values[0]!=len(tet)//(7*100) else tet[len(tet)//(7*100):]
 
 
 
 
 
 
 
107
  st.text("Selected text for summarize: ")
108
- if st.button("BENGALI"):
109
- st.success(text)
110
- st.text("Summarized text: ")
111
- bansum(text)
112
- if st.button("ENGLISH"):
113
- st.success(text)
114
- st.text("Summarized text: ")
115
- engsum(text)
116
-
 
117
  elif uploaded_photo and uploaded_photo.type !='application/pdf':
118
  text=None
119
  img = Image.open(uploaded_photo)
 
20
  import torch
21
  import docx2txt
22
  from PIL import Image
23
+ from langchain.document_loaders import PyPDFLoader
24
+ from langchain.text_splitter import CharacterTextSplitter
25
+ import tempfile
26
+ #from PyPDF2 import PdfFileReader
27
  from pdf2image import convert_from_bytes
28
  import pdfplumber
29
  #from line_cor import mark_region
 
99
  st.session_state["photo"]="not done"
100
  if st.session_state["photo"]=="done" or message:
101
  if uploaded_photo and uploaded_photo.type=='application/pdf':
102
+ text=[]
103
+ loader = None
104
+ #tet = read_pdf(uploaded_photo)
105
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
106
+ temp_file.write(uploaded_photo.read())
107
+ temp_file_path = temp_file.name
108
+
109
+ loader = PyPDFLoader(temp_file_path)
110
+ if loader:
111
+ text.extend(loader.load())
112
+ os.remove(temp_file_path)
113
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=100, length_function=len)
114
+ text_chunks = text_splitter.split_documents(text)
115
+ # values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
116
+ #text = tet[values[0]*7*10:values[1]*7*100] if values[0]!=len(tet)//(7*100) else tet[len(tet)//(7*100):]
117
  st.text("Selected text for summarize: ")
118
+ # if st.button("BENGALI"):
119
+ # st.success(text)
120
+ # st.text("Summarized text: ")
121
+ # bansum(text)
122
+ #if st.button("ENGLISH"):
123
+ st.success(type(text_chunks))
124
+ st.success(text_chunks[0])
125
+ st.text("Summarized text: ")
126
+ engsum(text_chunks[0])
127
+
128
  elif uploaded_photo and uploaded_photo.type !='application/pdf':
129
  text=None
130
  img = Image.open(uploaded_photo)