ppsingh commited on
Commit
8c4c590
·
1 Parent(s): b0f3060

Adding Download to main branch

Browse files
app.py CHANGED
@@ -5,6 +5,7 @@ import appStore.info as info
5
  from appStore.multiapp import MultiApp
6
  import streamlit as st
7
 
 
8
  st.set_page_config(f'SDSN x GIZ Policy Action Tracking v0.1', layout="wide")
9
 
10
  app = MultiApp()
 
5
  from appStore.multiapp import MultiApp
6
  import streamlit as st
7
 
8
+ # This branch is before the download option was implemented
9
  st.set_page_config(f'SDSN x GIZ Policy Action Tracking v0.1', layout="wide")
10
 
11
  app = MultiApp()
appStore/coherence.py CHANGED
@@ -28,6 +28,11 @@ import sqlite3
28
  import json
29
  import urllib.request
30
  import ast
 
 
 
 
 
31
  def app():
32
  # Sidebar
33
  st.sidebar.title('Check Coherence')
@@ -222,10 +227,41 @@ def app():
222
  #label_indices, paragraph_indices = np.where(similarity_matrix>similarity_high_threshold)
223
 
224
  #positive_indices = list(zip(label_indices.tolist(), paragraph_indices.tolist()))
225
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  for _label_idx, _paragraph_idx in positive_indices:
227
  st.write("This paragraph: \n")
 
228
  st.write(paraList[_paragraph_idx])
229
  st.write(f"Is relevant to: \n {list(sent_dict.keys())[_label_idx]}")
 
230
  st.write('-'*10)
 
 
 
 
 
 
 
 
 
 
231
 
 
28
  import json
29
  import urllib.request
30
  import ast
31
+ import docx
32
+ from docx.shared import Inches
33
+ from docx.shared import Pt
34
+ from docx.enum.style import WD_STYLE_TYPE
35
+
36
  def app():
37
  # Sidebar
38
  st.sidebar.title('Check Coherence')
 
227
  #label_indices, paragraph_indices = np.where(similarity_matrix>similarity_high_threshold)
228
 
229
  #positive_indices = list(zip(label_indices.tolist(), paragraph_indices.tolist()))
230
+ document = docx.Document()
231
+ document.add_heading('Document name:{}'.format(file_name), 2)
232
+ section = document.sections[0]
233
+
234
+ # Calling the footer
235
+ footer = section.footer
236
+
237
+ # Calling the paragraph already present in
238
+ # the footer section
239
+ footer_para = footer.paragraphs[0]
240
+
241
+ font_styles = document.styles
242
+ font_charstyle = font_styles.add_style('CommentsStyle', WD_STYLE_TYPE.CHARACTER)
243
+ font_object = font_charstyle.font
244
+ font_object.size = Pt(7)
245
+ # Adding the centered zoned footer
246
+ footer_para.add_run('''\tPowered by GIZ Data and the Sustainable Development Solution Network hosted at Hugging-Face spaces: https://huggingface.co/spaces/ppsingh/streamlit_dev''', style='CommentsStyle')
247
+
248
+ document.add_paragraph("Country Code for which NDC is carried out {}".format(countryCode))
249
+
250
  for _label_idx, _paragraph_idx in positive_indices:
251
  st.write("This paragraph: \n")
252
+ document.add_paragraph("This paragraph: \n")
253
  st.write(paraList[_paragraph_idx])
254
  st.write(f"Is relevant to: \n {list(sent_dict.keys())[_label_idx]}")
255
+ document.add_paragraph(f"Is relevant to: \n {list(sent_dict.keys())[_label_idx]}")
256
  st.write('-'*10)
257
+ document.add_paragraph('-'*10)
258
+
259
+ document.save('demo.docx')
260
+ with open("demo.docx", "rb") as file:
261
+ btn = st.download_button(
262
+ label="Download file",
263
+ data=file,
264
+ file_name="demo.docx",
265
+ mime="txt/docx"
266
+ )
267
 
appStore/keyword_search.py CHANGED
@@ -20,6 +20,10 @@ from sklearn.feature_extraction import _stop_words
20
  import string
21
  from tqdm.autonotebook import tqdm
22
  import numpy as np
 
 
 
 
23
 
24
  import tempfile
25
  import sqlite3
@@ -100,8 +104,29 @@ def app():
100
  return bm25_hits, hits
101
 
102
  def show_results(keywordList):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  for keyword in keywordList:
 
104
  st.write("Results for Query: {}".format(keyword))
 
 
105
  bm25_hits, hits = search(keyword)
106
 
107
  st.markdown("""
@@ -109,24 +134,36 @@ def app():
109
  """)
110
  # In the semantic search part we provide two kind of results one with only Retriever (Bi-Encoder) and other the ReRanker (Cross Encoder)
111
  st.markdown("Top few lexical search (BM25) hits")
 
 
112
  for hit in bm25_hits[0:5]:
113
  if hit['score'] > 0.00:
114
  st.write("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
 
115
 
116
-
117
-
118
 
119
 
120
  # st.table(bm25_hits[0:3])
121
 
122
  st.markdown("\n-------------------------\n")
123
  st.markdown("Top few Bi-Encoder Retrieval hits")
124
-
 
 
125
  hits = sorted(hits, key=lambda x: x['score'], reverse=True)
126
  for hit in hits[0:5]:
127
  # if hit['score'] > 0.45:
128
  st.write("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
 
129
  #st.table(hits[0:3]
 
 
 
 
 
 
 
 
130
 
131
 
132
  @st.cache(allow_output_mutation=True)
@@ -206,6 +243,7 @@ def app():
206
  if st.button("Find them."):
207
  keywordList = [keyword]
208
  if keywordList is not None:
 
209
  show_results(keywordList)
210
 
211
 
 
20
  import string
21
  from tqdm.autonotebook import tqdm
22
  import numpy as np
23
+ import docx
24
+ from docx.shared import Inches
25
+ from docx.shared import Pt
26
+ from docx.enum.style import WD_STYLE_TYPE
27
 
28
  import tempfile
29
  import sqlite3
 
104
  return bm25_hits, hits
105
 
106
  def show_results(keywordList):
107
+ document = docx.Document()
108
+ document.add_heading('Document name:{}'.format(file_name), 2)
109
+ section = document.sections[0]
110
+
111
+ # Calling the footer
112
+ footer = section.footer
113
+
114
+ # Calling the paragraph already present in
115
+ # the footer section
116
+ footer_para = footer.paragraphs[0]
117
+
118
+ font_styles = document.styles
119
+ font_charstyle = font_styles.add_style('CommentsStyle', WD_STYLE_TYPE.CHARACTER)
120
+ font_object = font_charstyle.font
121
+ font_object.size = Pt(7)
122
+ # Adding the centered zoned footer
123
+ footer_para.add_run('''\tPowered by GIZ Data and the Sustainable Development Solution Network hosted at Hugging-Face spaces: https://huggingface.co/spaces/ppsingh/streamlit_dev''', style='CommentsStyle')
124
+ document.add_heading('Your Seacrhed for {}'.format(keywordList), level=1)
125
  for keyword in keywordList:
126
+
127
  st.write("Results for Query: {}".format(keyword))
128
+ para = document.add_paragraph().add_run("Results for Query: {}".format(keyword))
129
+ para.font.size = Pt(12)
130
  bm25_hits, hits = search(keyword)
131
 
132
  st.markdown("""
 
134
  """)
135
  # In the semantic search part we provide two kind of results one with only Retriever (Bi-Encoder) and other the ReRanker (Cross Encoder)
136
  st.markdown("Top few lexical search (BM25) hits")
137
+ document.add_paragraph("Top few lexical search (BM25) hits")
138
+
139
  for hit in bm25_hits[0:5]:
140
  if hit['score'] > 0.00:
141
  st.write("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
142
+ document.add_paragraph("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
143
 
 
 
144
 
145
 
146
  # st.table(bm25_hits[0:3])
147
 
148
  st.markdown("\n-------------------------\n")
149
  st.markdown("Top few Bi-Encoder Retrieval hits")
150
+ document.add_paragraph("\n-------------------------\n")
151
+ document.add_paragraph("Top few Bi-Encoder Retrieval hits")
152
+
153
  hits = sorted(hits, key=lambda x: x['score'], reverse=True)
154
  for hit in hits[0:5]:
155
  # if hit['score'] > 0.45:
156
  st.write("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
157
+ document.add_paragraph("\t Score: {:.3f}: \t{}".format(hit['score'], paraList[hit['corpus_id']].replace("\n", " ")))
158
  #st.table(hits[0:3]
159
+ document.save('demo.docx')
160
+ with open("demo.docx", "rb") as file:
161
+ btn = st.download_button(
162
+ label="Download file",
163
+ data=file,
164
+ file_name="demo.docx",
165
+ mime="txt/docx"
166
+ )
167
 
168
 
169
  @st.cache(allow_output_mutation=True)
 
243
  if st.button("Find them."):
244
  keywordList = [keyword]
245
  if keywordList is not None:
246
+
247
  show_results(keywordList)
248
 
249
 
appStore/sdg_analysis.py CHANGED
@@ -13,7 +13,11 @@ from transformers import pipeline
13
  import matplotlib.pyplot as plt
14
  import numpy as np
15
  import streamlit as st
16
- import pandas as pd
 
 
 
 
17
 
18
  import tempfile
19
  import sqlite3
@@ -111,7 +115,11 @@ def app():
111
  .sort_values(by="Relevancy", ascending=False)
112
  .reset_index(drop=True)
113
  )
114
-
 
 
 
 
115
  df.index += 1
116
 
117
  # Add styling
@@ -162,12 +170,13 @@ def app():
162
 
163
  labels = classifier(par_list)
164
  labels_= [(l['label'],l['score']) for l in labels]
165
- df = DataFrame(labels_, columns=["SDG", "Relevancy"])
166
- df['text'] = par_list
167
- df = df.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
168
- df.index += 1
169
- df =df[df['Relevancy']>.85]
170
- x = df['SDG'].value_counts()
 
171
 
172
  plt.rcParams['font.size'] = 25
173
  colors = plt.get_cmap('Blues')(np.linspace(0.2, 0.7, len(x)))
@@ -175,7 +184,7 @@ def app():
175
  fig, ax = plt.subplots()
176
  ax.pie(x, colors=colors, radius=2, center=(4, 4),
177
  wedgeprops={"linewidth": 1, "edgecolor": "white"}, frame=False,labels =list(x.index))
178
-
179
  st.markdown("## 🎈 Anything related to SDGs?")
180
 
181
  c4, c5, c6 = st.columns([2, 2, 2])
@@ -183,7 +192,7 @@ def app():
183
  # Add styling
184
  cmGreen = sns.light_palette("green", as_cmap=True)
185
  cmRed = sns.light_palette("red", as_cmap=True)
186
- df = df.style.background_gradient(
187
  cmap=cmGreen,
188
  subset=[
189
  "Relevancy",
@@ -194,13 +203,91 @@ def app():
194
  "Relevancy": "{:.1%}",
195
  }
196
 
197
- df = df.format(format_dictionary)
198
 
199
  with c5:
200
  st.pyplot(fig)
201
 
202
  c7, c8, c9 = st.columns([1, 10, 1])
203
  with c8:
204
- st.table(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
 
 
13
  import matplotlib.pyplot as plt
14
  import numpy as np
15
  import streamlit as st
16
+ import pandas as pd
17
+ import docx
18
+ from docx.shared import Inches
19
+ from docx.shared import Pt
20
+ from docx.enum.style import WD_STYLE_TYPE
21
 
22
  import tempfile
23
  import sqlite3
 
115
  .sort_values(by="Relevancy", ascending=False)
116
  .reset_index(drop=True)
117
  )
118
+ df1 = (
119
+ DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
120
+ .sort_values(by="Relevancy", ascending=False)
121
+ .reset_index(drop=True)
122
+ )
123
  df.index += 1
124
 
125
  # Add styling
 
170
 
171
  labels = classifier(par_list)
172
  labels_= [(l['label'],l['score']) for l in labels]
173
+ df2 = DataFrame(labels_, columns=["SDG", "Relevancy"])
174
+ df2['text'] = par_list
175
+ df2 = df2.sort_values(by="Relevancy", ascending=False).reset_index(drop=True)
176
+ df2.index += 1
177
+ df2 =df2[df2['Relevancy']>.85]
178
+ x = df2['SDG'].value_counts()
179
+ df3 = df2.copy()
180
 
181
  plt.rcParams['font.size'] = 25
182
  colors = plt.get_cmap('Blues')(np.linspace(0.2, 0.7, len(x)))
 
184
  fig, ax = plt.subplots()
185
  ax.pie(x, colors=colors, radius=2, center=(4, 4),
186
  wedgeprops={"linewidth": 1, "edgecolor": "white"}, frame=False,labels =list(x.index))
187
+ fig.savefig('temp.png', bbox_inches='tight',dpi= 100)
188
  st.markdown("## 🎈 Anything related to SDGs?")
189
 
190
  c4, c5, c6 = st.columns([2, 2, 2])
 
192
  # Add styling
193
  cmGreen = sns.light_palette("green", as_cmap=True)
194
  cmRed = sns.light_palette("red", as_cmap=True)
195
+ df2 = df2.style.background_gradient(
196
  cmap=cmGreen,
197
  subset=[
198
  "Relevancy",
 
203
  "Relevancy": "{:.1%}",
204
  }
205
 
206
+ df2 = df2.format(format_dictionary)
207
 
208
  with c5:
209
  st.pyplot(fig)
210
 
211
  c7, c8, c9 = st.columns([1, 10, 1])
212
  with c8:
213
+ st.table(df2)
214
+
215
+ document = docx.Document()
216
+ document.add_heading('Document name:{}'.format(file_name), 2)
217
+ # Choosing the top most section of the page
218
+ section = document.sections[0]
219
+
220
+ # Calling the footer
221
+ footer = section.footer
222
+
223
+ # Calling the paragraph already present in
224
+ # the footer section
225
+ footer_para = footer.paragraphs[0]
226
+
227
+ font_styles = document.styles
228
+ font_charstyle = font_styles.add_style('CommentsStyle', WD_STYLE_TYPE.CHARACTER)
229
+ font_object = font_charstyle.font
230
+ font_object.size = Pt(7)
231
+ # Adding the centered zoned footer
232
+ footer_para.add_run('''\tPowered by GIZ Data and the Sustainable Development Solution Network hosted at Hugging-Face spaces: https://huggingface.co/spaces/ppsingh/streamlit_dev''', style='CommentsStyle')
233
+
234
+ #footer_para.text = "\tPowered by GIZ Data and the Sustainable Development Solution Network\
235
+ # hosted at Hugging-Face spaces: https://huggingface.co/spaces/ppsingh/streamlit_dev"
236
+ #footer_para.font.size = docx.shared.Pt(6)
237
+
238
+ document.add_heading('What is the document about', level=1)
239
+ t = document.add_table(df1.shape[0]+1, df1.shape[1])
240
+
241
+
242
+ # add the header rows.
243
+ for j in range(df1.shape[-1]):
244
+ t.cell(0,j).text = df1.columns[j]
245
+
246
+
247
+ # add the rest of the data frame
248
+ for i in range(df1.shape[0]):
249
+ for j in range(df1.shape[-1]):
250
+ t.cell(i+1,j).text = str(df1.values[i,j])
251
+
252
+
253
+
254
+ document.add_heading('Anything Related to SDG', level=1)
255
+ document.add_picture('temp.png', width=Inches(3), height=Inches(3))
256
+ t = document.add_table(df3.shape[0]+1, df3.shape[1])
257
+
258
+ widths = [Inches(0.4), Inches(0.4), Inches(4.5)]
259
+ # add the header rows.
260
+ for j in range(df3.shape[-1]):
261
+ t.cell(0,j).text = df3.columns[j]
262
+ t.cell(0,j).width = widths[j]
263
+
264
+ # add the rest of the data frame
265
+ for i in range(df3.shape[0]):
266
+ for j in range(df3.shape[-1]):
267
+ t.cell(i+1,j).width = widths[j]
268
+ t.cell(i+1,j).text = str(df3.values[i,j])
269
+
270
+
271
+ document.save('demo.docx')
272
+
273
+ #with open('summary.txt', 'w') as f:
274
+ # f.write(df1.to_string())
275
+ # f.write(fig)
276
+ #f.write(df2)
277
+ # f.write(df3.to_string())
278
+
279
+ with open("demo.docx", "rb") as file:
280
+ btn = st.download_button(
281
+ label="Download file",
282
+ data=file,
283
+ file_name="demo.docx",
284
+ mime="txt/docx"
285
+ )
286
+ #with document st.download_button(
287
+ # label="Download data as docx",
288
+ # data=document,
289
+ #file_name='test.docx',
290
+ #mime='text/docx',
291
+ # )
292
 
293
 
requirements.txt CHANGED
@@ -11,4 +11,5 @@ pdfplumber==0.6.2
11
  Pillow==9.1.1
12
  seaborn==0.11.2
13
  transformers==4.13.0
14
- rank_bm25
 
 
11
  Pillow==9.1.1
12
  seaborn==0.11.2
13
  transformers==4.13.0
14
+ rank_bm25
15
+ python-docx