rdose commited on
Commit
8144261
·
1 Parent(s): 18c6669

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -12
app.py CHANGED
@@ -161,7 +161,7 @@ def get_company_sectors(extracted_names, threshold=0.95):
161
  standard_names_tuples.append(name_match[:2])
162
 
163
  for std_comp_name, _ in standard_names_tuples:
164
- sectors = list(DF_SP500[['Name','Sector']].where( (DF_SP500.Name == std_comp_name) | (DF_SP500.Symbol == std_comp_name)).dropna().itertuples(index=False, name=None))
165
  output += sectors
166
  return output
167
 
@@ -194,10 +194,8 @@ def _inference_ner_spancat(text, limit_outputs=10):
194
  for doc in nlp.pipe(text):
195
  spans = doc.spans["sc"]
196
  #comp_raw_text = dict( sorted( dict(zip([str(x) for x in spans],[float(x)*penalty for x in spans.attrs['scores']])).items(), key=lambda x: x[1], reverse=True) )
197
-
198
- company_list = list(set([str(span).replace('\'s', '') for span in filter_spans(spans, keep_longest=True)]))[:limit_outputs]
199
  out.append(get_company_sectors(company_list))
200
-
201
  return out
202
 
203
  #def _inference_summary_model_pipeline(text):
@@ -342,19 +340,20 @@ def inference(input_batch,isurl,use_archive,filt_companies_topic,limit_companies
342
  df['topic'] = pd.DataFrame(news_sectors).iloc[:, 0]
343
  #df['sector_pred'] = pd.DataFrame(_topic2sector(topics)).iloc[:, 0]
344
  print("[i] Pandas output shape:",df.shape)
345
-
346
  #[[], [('Nvidia', 'Information Technology')], [('Twitter', 'Communication Services'), ('Apple', 'Information Technology')], [], [], [], [], [], []]
347
  df["company"] = np.nan
348
  df["sector"] = np.nan
 
 
349
  for idx in range(len(df.index)):
350
  if ner_labels[idx]: #not empty
351
  for ner in ner_labels[idx]:
352
  if filt_companies_topic:
353
  if news_sectors[idx] != ner[1]:
354
  continue
355
- df = pd.concat( [df, df.loc[[idx]].assign(company=ner[0], sector=ner[1])], join='outer', ignore_index=True) #axis=0
356
-
357
- return df #ner_labels, {'E':float(prob_outs[0]),"S":float(prob_outs[1]),"G":float(prob_outs[2])},{sentiment['label']:float(sentiment['score'])},"**Summary:**\n\n" + summary
358
 
359
  title = "ESG API Demo"
360
  description = """This is a demonstration of the full ESG pipeline backend where given a list of URL (english, news) the news contents are extracted, using extractnet, and fed to three models:
@@ -372,13 +371,21 @@ API input parameters:
372
 
373
  """
374
  examples = [[ [['https://www.bbc.com/news/uk-62732447'],
375
- ['https://www.bbc.com/news/business-62747401'],
376
- ['https://www.bbc.com/news/technology-62744858'],
377
  ['https://www.bbc.com/news/science-environment-62758811'],
378
- ['https://www.theguardian.com/business/2022/sep/02/nord-stream-1-gazprom-announces-indefinite-shutdown-of-pipeline'],
379
- ['https://www.bbc.com/news/world-europe-62766867'],
380
  ['https://www.bbc.com/news/business-62524031'],
381
  ['https://www.bbc.com/news/business-62728621'],
 
 
 
 
 
 
 
 
 
 
 
 
382
  ['https://www.bbc.com/news/science-environment-62680423']],'url',False,False,5]]
383
  demo = gr.Interface(fn=inference,
384
  inputs=[gr.Dataframe(label='input batch', col_count=1, datatype='str', type='array', wrap=True),
 
161
  standard_names_tuples.append(name_match[:2])
162
 
163
  for std_comp_name, _ in standard_names_tuples:
164
+ sectors = list(DF_SP500[['Name','Sector','Symbol']].where( (DF_SP500.Name == std_comp_name) | (DF_SP500.Symbol == std_comp_name)).dropna().itertuples(index=False, name=None))
165
  output += sectors
166
  return output
167
 
 
194
  for doc in nlp.pipe(text):
195
  spans = doc.spans["sc"]
196
  #comp_raw_text = dict( sorted( dict(zip([str(x) for x in spans],[float(x)*penalty for x in spans.attrs['scores']])).items(), key=lambda x: x[1], reverse=True) )
197
+ company_list = list(set([str(span).replace('\'s', '').replace('’s','') for span in filter_spans(spans, keep_longest=True)]))[:limit_outputs]
 
198
  out.append(get_company_sectors(company_list))
 
199
  return out
200
 
201
  #def _inference_summary_model_pipeline(text):
 
340
  df['topic'] = pd.DataFrame(news_sectors).iloc[:, 0]
341
  #df['sector_pred'] = pd.DataFrame(_topic2sector(topics)).iloc[:, 0]
342
  print("[i] Pandas output shape:",df.shape)
 
343
  #[[], [('Nvidia', 'Information Technology')], [('Twitter', 'Communication Services'), ('Apple', 'Information Technology')], [], [], [], [], [], []]
344
  df["company"] = np.nan
345
  df["sector"] = np.nan
346
+ df["symbol"] = np.nan
347
+ dfo = pd.DataFrame(columns=['E','S','G','URL','sent_lbl','sent_score','sector_pred','company','sector','symbol'])
348
  for idx in range(len(df.index)):
349
  if ner_labels[idx]: #not empty
350
  for ner in ner_labels[idx]:
351
  if filt_companies_topic:
352
  if news_sectors[idx] != ner[1]:
353
  continue
354
+ dfo = pd.concat( [dfo, df.loc[[idx]].assign(company=ner[0], sector=ner[1], symbol=ner[2])], join='outer', ignore_index=True) #axis=0
355
+ print("[i] Pandas output shape:",dfo.shape)
356
+ return dfo #ner_labels, {'E':float(prob_outs[0]),"S":float(prob_outs[1]),"G":float(prob_outs[2])},{sentiment['label']:float(sentiment['score'])},"**Summary:**\n\n" + summary
357
 
358
  title = "ESG API Demo"
359
  description = """This is a demonstration of the full ESG pipeline backend where given a list of URL (english, news) the news contents are extracted, using extractnet, and fed to three models:
 
371
 
372
  """
373
  examples = [[ [['https://www.bbc.com/news/uk-62732447'],
 
 
374
  ['https://www.bbc.com/news/science-environment-62758811'],
 
 
375
  ['https://www.bbc.com/news/business-62524031'],
376
  ['https://www.bbc.com/news/business-62728621'],
377
+ ["https://www.knowesg.com/investors/blackstone-and-sphera-work-together-for-portfolio-decarbonization-program-17022022"],
378
+ ["https://www.esgtoday.com/amazon-partners-with-matt-damons-water-org-to-provide-water-access-to-100-million-people/"],
379
+ ["https://www.esgtoday.com/walmart-allocates-over-1-billion-to-renewable-energy-sustainable-buildings-circular-economy/"],
380
+ ["https://www.esgtoday.com/anglo-american-ties-interest-on-745-million-bond-to-climate-water-job-creation-goals/"],
381
+ ["https://www.esgtoday.com/blackrock-acquires-new-zealand-solar-as-a-service-provider-solarzero/"],
382
+ ["https://www.esgtoday.com/blackrock-strikes-back-against-climate-activism-claims/"],
383
+ ["https://www.esgtoday.com/hm-to-remove-sustainability-labels-from-products-following-investigation-by-regulator/"],
384
+ ["https://www.knowesg.com/sustainable-finance/exxonmobil-fails-the-energy-transition-due-to-failed-governance-structure-04122021"],
385
+ ["https://www.knowesg.com/companies/tesla-is-investigated-by-the-securities-and-exchange-commission-sec-on-solar-07122021"],
386
+ ["https://www.knowesg.com/tech/pcg-and-exxonmobil-will-collaborate-on-plastic-recycling-in-malaysia-20092022"],
387
+ ["https://esgnews.com/nike-launches-community-climate-resilience-program-with-2-million-grant-to-trust-for-public-land/"],
388
+ ["https://esgnews.com/walmart-and-unitedhealth-group-collaborate-to-deliver-access-to-high-quality-affordable-health-care/"],
389
  ['https://www.bbc.com/news/science-environment-62680423']],'url',False,False,5]]
390
  demo = gr.Interface(fn=inference,
391
  inputs=[gr.Dataframe(label='input batch', col_count=1, datatype='str', type='array', wrap=True),