m. polinsky commited on
Commit
9604970
·
unverified ·
1 Parent(s): e037e33

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +58 -62
streamlit_app.py CHANGED
@@ -15,10 +15,7 @@ from source import Source
15
  from scrape_sources import NPRLite, CNNText, stub
16
  import random
17
 
18
- # EDIT: before doing NER check time of last scrape and just read in from JSON store instead of rescraping
19
- # can force rescrape
20
- # This may take a config to get sources as input
21
- @st.cache()
22
  def initialize(limit, rando, use_cache=True):
23
  clusters: dict[str:List[namedtuple]] = dict()
24
  # This is a container for the source classes.
@@ -157,7 +154,7 @@ def ner_results(ner_object, groups=True, NER_THRESHOLD=0.5) -> List[str]:
157
  NER_API_URL = "https://api-inference.huggingface.co/models/dbmdz/bert-large-cased-finetuned-conll03-english"
158
  headers = {"Authorization": f"""Bearer {st.secrets['ato']}"""}
159
 
160
- LIMIT = None # Controls time and number of clusters.
161
  USE_CACHE = True
162
 
163
  if not USE_CACHE:
@@ -176,60 +173,59 @@ article_dict, clusters = initialize(LIMIT, USE_CACHE)
176
  # We now have clusters and cluster data. Redundancy.
177
  # We call a display function and get the user input.
178
  # For this its still streamlit.
179
- loop_control = 'y'
180
- while loop_control == 'y':
181
- selections = []
182
- choices = list(clusters.keys())
183
- choices.insert(0,'None')
184
- # Form used to take 3 menu inputs
185
- with st.form(key='columns_in_form'):
186
- cols = st.columns(3)
187
- for i, col in enumerate(cols):
188
- selections.append(col.selectbox(f'Make a Selection', choices, key=i))
189
- submitted = st.form_submit_button('Submit')
190
- if submitted:
191
- selections = [i for i in selections if i is not None]
192
- with st.spinner(text="Digesting...please wait, this will take a few moments...Maybe check some messages or start reading the latest papers on summarization with transformers...."):
193
- found = False
194
-
195
- # Check if we already have this digest.
196
- for i in digests:
197
- if set(selections) == set(list(i)):
198
- digestor = digests[i]
199
- found = True
200
- break
201
-
202
- # If we need a new digest
203
- if not found:
204
- chosen = []
205
- # Why not just use answers.values()?
206
- for i in selections: # i is supposed to be a list of stubs, mostly one
207
- if i != 'None':
208
- for j in clusters[i]:
209
- if j not in chosen:
210
- chosen.append(j) # j is supposed to be a stub.
211
-
212
- # Article dict contains stubs for unprocessed articles and lists of summarized chunks for processed ones.
213
- # Here we put together a list of article stubs and/or summary chunks and let the digestor sort out what it does with them,
214
- chosen = [i if isinstance(article_dict[i.hed], stub) else article_dict[i.hed] for i in chosen]
215
- # Digestor uses 'chosen', passed through 'stubs' to create digest.
216
- # 'user_choicese' is passed for reference.
217
- # Passing list(answers.values()) includes 'None' choices.
218
- digestor = Digestor(timer=Timer(), cache = USE_CACHE, stubs=chosen, user_choices=list(selections))
219
- # happens internally but may be used differently so it isn't automatic upon digestor creation.
220
- # Easily turn caching off for testing.
221
- digestor.digest() # creates summaries and stores them associated with the digest
222
-
223
-
224
-
225
- # Get displayable digest and digest data
226
- digestor.build_digest()# only returns for data collection
227
- digests[tuple(digestor.user_choices)] = digestor
228
-
229
- if len(digestor.text) == 0:
230
- st.write("You didn't select a topic!")
231
- else:
232
- st.write("Your digest is ready:\n")
233
-
234
- st.write(digestor.text)
235
- loop_control = input('Y to continue...')
 
15
  from scrape_sources import NPRLite, CNNText, stub
16
  import random
17
 
18
+
 
 
 
19
  def initialize(limit, rando, use_cache=True):
20
  clusters: dict[str:List[namedtuple]] = dict()
21
  # This is a container for the source classes.
 
154
  NER_API_URL = "https://api-inference.huggingface.co/models/dbmdz/bert-large-cased-finetuned-conll03-english"
155
  headers = {"Authorization": f"""Bearer {st.secrets['ato']}"""}
156
 
157
+ LIMIT = 20 # Controls time and number of clusters.
158
  USE_CACHE = True
159
 
160
  if not USE_CACHE:
 
173
  # We now have clusters and cluster data. Redundancy.
174
  # We call a display function and get the user input.
175
  # For this its still streamlit.
176
+
177
+ selections = []
178
+ choices = list(clusters.keys())
179
+ choices.insert(0,'None')
180
+ # Form used to take 3 menu inputs
181
+ with st.form(key='columns_in_form'):
182
+ cols = st.columns(3)
183
+ for i, col in enumerate(cols):
184
+ selections.append(col.selectbox(f'Make a Selection', choices, key=i))
185
+ submitted = st.form_submit_button('Submit')
186
+ if submitted:
187
+ selections = [i for i in selections if i is not None]
188
+ with st.spinner(text="Digesting...please wait, this will take a few moments...Maybe check some messages or start reading the latest papers on summarization with transformers...."):
189
+ found = False
190
+
191
+ # Check if we already have this digest.
192
+ for i in digests:
193
+ if set(selections) == set(list(i)):
194
+ digestor = digests[i]
195
+ found = True
196
+ break
197
+
198
+ # If we need a new digest
199
+ if not found:
200
+ chosen = []
201
+ # Why not just use answers.values()?
202
+ for i in selections: # i is supposed to be a list of stubs, mostly one
203
+ if i != 'None':
204
+ for j in clusters[i]:
205
+ if j not in chosen:
206
+ chosen.append(j) # j is supposed to be a stub.
207
+
208
+ # Article dict contains stubs for unprocessed articles and lists of summarized chunks for processed ones.
209
+ # Here we put together a list of article stubs and/or summary chunks and let the digestor sort out what it does with them,
210
+ chosen = [i if isinstance(article_dict[i.hed], stub) else article_dict[i.hed] for i in chosen]
211
+ # Digestor uses 'chosen', passed through 'stubs' to create digest.
212
+ # 'user_choicese' is passed for reference.
213
+ # Passing list(answers.values()) includes 'None' choices.
214
+ digestor = Digestor(timer=Timer(), cache = USE_CACHE, stubs=chosen, user_choices=list(selections))
215
+ # happens internally but may be used differently so it isn't automatic upon digestor creation.
216
+ # Easily turn caching off for testing.
217
+ digestor.digest() # creates summaries and stores them associated with the digest
218
+
219
+
220
+
221
+ # Get displayable digest and digest data
222
+ digestor.build_digest()# only returns for data collection
223
+ digests[tuple(digestor.user_choices)] = digestor
224
+
225
+ if len(digestor.text) == 0:
226
+ st.write("You didn't select a topic!")
227
+ else:
228
+ st.write("Your digest is ready:\n")
229
+
230
+ st.write(digestor.text)
231
+