Soumen commited on
Commit
6589236
1 Parent(s): 47d9c5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -75
app.py CHANGED
@@ -33,6 +33,7 @@ import line_cor
33
  import altair as alt
34
  #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
35
  from PIL import Image
 
36
  API_URL0 = "https://api-inference.huggingface.co/models/csebuetnlp/mT5_multilingual_XLSum"
37
  headers0 = {"Authorization": "Bearer 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"}
38
  API_URL1 = "https://api-inference.huggingface.co/models/Michael-Vptn/text-summarization-t5-base"
@@ -82,7 +83,6 @@ def bansum(text):
82
  text_output = out[0]["summary_text"]
83
  st.success(text_output)
84
 
85
- st.title("Bangla and English Summarizer: Upload Images/Pdf or input texts to summarize!")
86
  @st.cache
87
  def load(x):
88
  return x
@@ -94,79 +94,78 @@ def change_photo_state():
94
  st.session_state["photo"]="done"
95
  message = st.text_input("Type your text here!")
96
  c2, c3 = st.columns([2,1])
97
- uploaded_photo = load(c2.file_uploader("Upload your Images/PDF",type=['jpg','png','jpeg','pdf'], on_change=change_photo_state))
98
- camera_photo = load(c3.camera_input("Capture a photo to summarize: ", on_change=change_photo_state))
99
- if st.checkbox("Summarization"):
100
- if st.session_state["photo"]=="done" or message:
101
- if uploaded_photo and uploaded_photo.type=='application/pdf':
102
- tet = read_pdf(uploaded_photo)
103
- # with tempfile.NamedTemporaryFile(delete=False) as temp_file:
104
- # temp_file.write(uploaded_photo.read())
105
- # temp_file_path = temp_file.name
106
-
107
- # loader = PyPDFLoader(temp_file_path)
108
- # if loader:
109
- # text.extend(loader.load())
110
- # os.remove(temp_file_path)
111
- # text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=100, length_function=len)
112
- # text_chunks = text_splitter.split_documents(text)
113
- values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
114
- text = tet[values[0]*7*10:values[1]*10*100] if values[0]!=len(tet)//(10*100) else tet[len(tet)//(10*100):]
115
- #st.success(type(text_chunks))
116
- if st.button("English Pdf Summarize"):
117
- st.subheader("Selected text for summarize: ")
118
- st.success(text)
119
- st.subheader("Summarized Text: ")
120
- engsum(text)
121
-
122
- elif uploaded_photo and uploaded_photo.type !='application/pdf':
123
- text=None
124
- img = Image.open(uploaded_photo)
125
- img = img.save("img.png")
126
- img = cv2.imread("img.png")
127
- st.text("Select the summarization type:")
128
- if st.button("BENGALI"):
129
- text = pytesseract.image_to_string(img, lang="ben")
130
- st.subheader("সারাংশ/সারমর্ম")
131
- bansum(text)
132
- if st.button("ENGLISH"):
133
- text=pytesseract.image_to_string(img)
134
- st.subheader("Summarized Text")
135
- engsum(text)
136
- #st.success(text)
137
- elif camera_photo:
138
- text=None
139
- img = Image.open(camera_photo)
140
- img = img.save("img.png")
141
- img = cv2.imread("img.png")
142
- #text = pytesseract.image_to_string(img) if st.checkbox("Bangla") else pytesseract.image_to_string(img, lang="ben")
143
- st.text("Select the summarization type:")
144
- if st.button("Bangla"):
145
- text = pytesseract.image_to_string(img, lang="ben")
146
- st.subheader("সারাংশ/সারমর্ম")
147
- bansum(text)
148
- if st.button("English"):
149
- text=pytesseract.image_to_string(img)
150
- st.subheader("Summarized Text")
151
- engsum(text)
152
- else:
153
- text=None
154
- text = message
155
- if st.button("Bangla"):
156
- bansum(text)
157
- if st.button("English"):
158
- engsum(text)
159
- # if st.button("English Text Generation"):
160
- # def query(payload):
161
- # response = requests.post(API_URL2, headers=headers2, json=payload)
162
- # return response.json()
163
-
164
- # out = query({
165
- # "inputs": text,
166
- # })
167
- # if isinstance(out, list) and out[0].get("generated_text"):
168
- # text_output = out[0]["generated_text"]
169
- # st.success(text_output)
170
- # #text=text_output
171
 
172
 
 
33
  import altair as alt
34
  #pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
35
  from PIL import Image
36
+ st.title("Bangla and English Summarizer: Upload Images/Pdf or input texts to summarize!")
37
  API_URL0 = "https://api-inference.huggingface.co/models/csebuetnlp/mT5_multilingual_XLSum"
38
  headers0 = {"Authorization": "Bearer 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"}
39
  API_URL1 = "https://api-inference.huggingface.co/models/Michael-Vptn/text-summarization-t5-base"
 
83
  text_output = out[0]["summary_text"]
84
  st.success(text_output)
85
 
 
86
  @st.cache
87
  def load(x):
88
  return x
 
94
  st.session_state["photo"]="done"
95
  message = st.text_input("Type your text here!")
96
  c2, c3 = st.columns([2,1])
97
+ uploaded_photo = load(c3.file_uploader("Upload your Images/PDF",type=['jpg','png','jpeg','pdf'], on_change=change_photo_state))
98
+ camera_photo = load(c2.camera_input("Capture a photo to summarize: ", on_change=change_photo_state))
99
+ if st.session_state["photo"]=="done" or message:
100
+ if uploaded_photo and uploaded_photo.type=='application/pdf':
101
+ tet = read_pdf(uploaded_photo)
102
+ # with tempfile.NamedTemporaryFile(delete=False) as temp_file:
103
+ # temp_file.write(uploaded_photo.read())
104
+ # temp_file_path = temp_file.name
105
+
106
+ # loader = PyPDFLoader(temp_file_path)
107
+ # if loader:
108
+ # text.extend(loader.load())
109
+ # os.remove(temp_file_path)
110
+ # text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=100, length_function=len)
111
+ # text_chunks = text_splitter.split_documents(text)
112
+ values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
113
+ text = tet[values[0]*7*10:values[1]*10*100] if values[0]!=len(tet)//(10*100) else tet[len(tet)//(10*100):]
114
+ #st.success(type(text_chunks))
115
+ if st.button("English Pdf Summarize"):
116
+ st.subheader("Selected text for summarize: ")
117
+ st.success(text)
118
+ st.subheader("Summarized Text: ")
119
+ engsum(text)
120
+
121
+ elif uploaded_photo and uploaded_photo.type !='application/pdf':
122
+ text=None
123
+ img = Image.open(uploaded_photo)
124
+ img = img.save("img.png")
125
+ img = cv2.imread("img.png")
126
+ st.text("Select the summarization type:")
127
+ if st.button("BENGALI"):
128
+ text = pytesseract.image_to_string(img, lang="ben")
129
+ st.subheader("সারাংশ/সারমর্ম")
130
+ bansum(text)
131
+ if st.button("ENGLISH"):
132
+ text=pytesseract.image_to_string(img)
133
+ st.subheader("Summarized Text")
134
+ engsum(text)
135
+ #st.success(text)
136
+ elif camera_photo:
137
+ text=None
138
+ img = Image.open(camera_photo)
139
+ img = img.save("img.png")
140
+ img = cv2.imread("img.png")
141
+ #text = pytesseract.image_to_string(img) if st.checkbox("Bangla") else pytesseract.image_to_string(img, lang="ben")
142
+ st.text("Select the summarization type:")
143
+ if st.button("Bangla"):
144
+ text = pytesseract.image_to_string(img, lang="ben")
145
+ st.subheader("সারাংশ/সারমর্ম")
146
+ bansum(text)
147
+ if st.button("English"):
148
+ text=pytesseract.image_to_string(img)
149
+ st.subheader("Summarized Text")
150
+ engsum(text)
151
+ else:
152
+ text=None
153
+ text = message
154
+ if st.button("Bangla"):
155
+ bansum(text)
156
+ if st.button("English"):
157
+ engsum(text)
158
+ # if st.button("English Text Generation"):
159
+ # def query(payload):
160
+ # response = requests.post(API_URL2, headers=headers2, json=payload)
161
+ # return response.json()
162
+
163
+ # out = query({
164
+ # "inputs": text,
165
+ # })
166
+ # if isinstance(out, list) and out[0].get("generated_text"):
167
+ # text_output = out[0]["generated_text"]
168
+ # st.success(text_output)
169
+ # #text=text_output
 
170
 
171