Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -161,7 +161,7 @@ def get_company_sectors(extracted_names, threshold=0.95):
|
|
161 |
standard_names_tuples.append(name_match[:2])
|
162 |
|
163 |
for std_comp_name, _ in standard_names_tuples:
|
164 |
-
sectors = list(DF_SP500[['Name','Sector']].where( (DF_SP500.Name == std_comp_name) | (DF_SP500.Symbol == std_comp_name)).dropna().itertuples(index=False, name=None))
|
165 |
output += sectors
|
166 |
return output
|
167 |
|
@@ -194,10 +194,8 @@ def _inference_ner_spancat(text, limit_outputs=10):
|
|
194 |
for doc in nlp.pipe(text):
|
195 |
spans = doc.spans["sc"]
|
196 |
#comp_raw_text = dict( sorted( dict(zip([str(x) for x in spans],[float(x)*penalty for x in spans.attrs['scores']])).items(), key=lambda x: x[1], reverse=True) )
|
197 |
-
|
198 |
-
company_list = list(set([str(span).replace('\'s', '') for span in filter_spans(spans, keep_longest=True)]))[:limit_outputs]
|
199 |
out.append(get_company_sectors(company_list))
|
200 |
-
|
201 |
return out
|
202 |
|
203 |
#def _inference_summary_model_pipeline(text):
|
@@ -342,19 +340,20 @@ def inference(input_batch,isurl,use_archive,filt_companies_topic,limit_companies
|
|
342 |
df['topic'] = pd.DataFrame(news_sectors).iloc[:, 0]
|
343 |
#df['sector_pred'] = pd.DataFrame(_topic2sector(topics)).iloc[:, 0]
|
344 |
print("[i] Pandas output shape:",df.shape)
|
345 |
-
|
346 |
#[[], [('Nvidia', 'Information Technology')], [('Twitter', 'Communication Services'), ('Apple', 'Information Technology')], [], [], [], [], [], []]
|
347 |
df["company"] = np.nan
|
348 |
df["sector"] = np.nan
|
|
|
|
|
349 |
for idx in range(len(df.index)):
|
350 |
if ner_labels[idx]: #not empty
|
351 |
for ner in ner_labels[idx]:
|
352 |
if filt_companies_topic:
|
353 |
if news_sectors[idx] != ner[1]:
|
354 |
continue
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
|
359 |
title = "ESG API Demo"
|
360 |
description = """This is a demonstration of the full ESG pipeline backend where given a list of URL (english, news) the news contents are extracted, using extractnet, and fed to three models:
|
@@ -372,13 +371,21 @@ API input parameters:
|
|
372 |
|
373 |
"""
|
374 |
examples = [[ [['https://www.bbc.com/news/uk-62732447'],
|
375 |
-
['https://www.bbc.com/news/business-62747401'],
|
376 |
-
['https://www.bbc.com/news/technology-62744858'],
|
377 |
['https://www.bbc.com/news/science-environment-62758811'],
|
378 |
-
['https://www.theguardian.com/business/2022/sep/02/nord-stream-1-gazprom-announces-indefinite-shutdown-of-pipeline'],
|
379 |
-
['https://www.bbc.com/news/world-europe-62766867'],
|
380 |
['https://www.bbc.com/news/business-62524031'],
|
381 |
['https://www.bbc.com/news/business-62728621'],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
382 |
['https://www.bbc.com/news/science-environment-62680423']],'url',False,False,5]]
|
383 |
demo = gr.Interface(fn=inference,
|
384 |
inputs=[gr.Dataframe(label='input batch', col_count=1, datatype='str', type='array', wrap=True),
|
|
|
161 |
standard_names_tuples.append(name_match[:2])
|
162 |
|
163 |
for std_comp_name, _ in standard_names_tuples:
|
164 |
+
sectors = list(DF_SP500[['Name','Sector','Symbol']].where( (DF_SP500.Name == std_comp_name) | (DF_SP500.Symbol == std_comp_name)).dropna().itertuples(index=False, name=None))
|
165 |
output += sectors
|
166 |
return output
|
167 |
|
|
|
194 |
for doc in nlp.pipe(text):
|
195 |
spans = doc.spans["sc"]
|
196 |
#comp_raw_text = dict( sorted( dict(zip([str(x) for x in spans],[float(x)*penalty for x in spans.attrs['scores']])).items(), key=lambda x: x[1], reverse=True) )
|
197 |
+
company_list = list(set([str(span).replace('\'s', '').replace('’s','') for span in filter_spans(spans, keep_longest=True)]))[:limit_outputs]
|
|
|
198 |
out.append(get_company_sectors(company_list))
|
|
|
199 |
return out
|
200 |
|
201 |
#def _inference_summary_model_pipeline(text):
|
|
|
340 |
df['topic'] = pd.DataFrame(news_sectors).iloc[:, 0]
|
341 |
#df['sector_pred'] = pd.DataFrame(_topic2sector(topics)).iloc[:, 0]
|
342 |
print("[i] Pandas output shape:",df.shape)
|
|
|
343 |
#[[], [('Nvidia', 'Information Technology')], [('Twitter', 'Communication Services'), ('Apple', 'Information Technology')], [], [], [], [], [], []]
|
344 |
df["company"] = np.nan
|
345 |
df["sector"] = np.nan
|
346 |
+
df["symbol"] = np.nan
|
347 |
+
dfo = pd.DataFrame(columns=['E','S','G','URL','sent_lbl','sent_score','sector_pred','company','sector','symbol'])
|
348 |
for idx in range(len(df.index)):
|
349 |
if ner_labels[idx]: #not empty
|
350 |
for ner in ner_labels[idx]:
|
351 |
if filt_companies_topic:
|
352 |
if news_sectors[idx] != ner[1]:
|
353 |
continue
|
354 |
+
dfo = pd.concat( [dfo, df.loc[[idx]].assign(company=ner[0], sector=ner[1], symbol=ner[2])], join='outer', ignore_index=True) #axis=0
|
355 |
+
print("[i] Pandas output shape:",dfo.shape)
|
356 |
+
return dfo #ner_labels, {'E':float(prob_outs[0]),"S":float(prob_outs[1]),"G":float(prob_outs[2])},{sentiment['label']:float(sentiment['score'])},"**Summary:**\n\n" + summary
|
357 |
|
358 |
title = "ESG API Demo"
|
359 |
description = """This is a demonstration of the full ESG pipeline backend where given a list of URL (english, news) the news contents are extracted, using extractnet, and fed to three models:
|
|
|
371 |
|
372 |
"""
|
373 |
examples = [[ [['https://www.bbc.com/news/uk-62732447'],
|
|
|
|
|
374 |
['https://www.bbc.com/news/science-environment-62758811'],
|
|
|
|
|
375 |
['https://www.bbc.com/news/business-62524031'],
|
376 |
['https://www.bbc.com/news/business-62728621'],
|
377 |
+
["https://www.knowesg.com/investors/blackstone-and-sphera-work-together-for-portfolio-decarbonization-program-17022022"],
|
378 |
+
["https://www.esgtoday.com/amazon-partners-with-matt-damons-water-org-to-provide-water-access-to-100-million-people/"],
|
379 |
+
["https://www.esgtoday.com/walmart-allocates-over-1-billion-to-renewable-energy-sustainable-buildings-circular-economy/"],
|
380 |
+
["https://www.esgtoday.com/anglo-american-ties-interest-on-745-million-bond-to-climate-water-job-creation-goals/"],
|
381 |
+
["https://www.esgtoday.com/blackrock-acquires-new-zealand-solar-as-a-service-provider-solarzero/"],
|
382 |
+
["https://www.esgtoday.com/blackrock-strikes-back-against-climate-activism-claims/"],
|
383 |
+
["https://www.esgtoday.com/hm-to-remove-sustainability-labels-from-products-following-investigation-by-regulator/"],
|
384 |
+
["https://www.knowesg.com/sustainable-finance/exxonmobil-fails-the-energy-transition-due-to-failed-governance-structure-04122021"],
|
385 |
+
["https://www.knowesg.com/companies/tesla-is-investigated-by-the-securities-and-exchange-commission-sec-on-solar-07122021"],
|
386 |
+
["https://www.knowesg.com/tech/pcg-and-exxonmobil-will-collaborate-on-plastic-recycling-in-malaysia-20092022"],
|
387 |
+
["https://esgnews.com/nike-launches-community-climate-resilience-program-with-2-million-grant-to-trust-for-public-land/"],
|
388 |
+
["https://esgnews.com/walmart-and-unitedhealth-group-collaborate-to-deliver-access-to-high-quality-affordable-health-care/"],
|
389 |
['https://www.bbc.com/news/science-environment-62680423']],'url',False,False,5]]
|
390 |
demo = gr.Interface(fn=inference,
|
391 |
inputs=[gr.Dataframe(label='input batch', col_count=1, datatype='str', type='array', wrap=True),
|