Spaces:

ESG-TFM-UV
/

ESG_API_BATCH

Build error

App Files Files Community

rdose commited on Sep 21, 2022

Commit

8144261

1 Parent(s): 18c6669

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -12

app.py CHANGED Viewed

@@ -161,7 +161,7 @@ def get_company_sectors(extracted_names, threshold=0.95):
       standard_names_tuples.append(name_match[:2])
   for std_comp_name, _ in standard_names_tuples:
-    sectors = list(DF_SP500[['Name','Sector']].where( (DF_SP500.Name == std_comp_name) | (DF_SP500.Symbol == std_comp_name)).dropna().itertuples(index=False, name=None))
     output += sectors
   return output
@@ -194,10 +194,8 @@ def _inference_ner_spancat(text, limit_outputs=10):
     for doc in nlp.pipe(text):
         spans = doc.spans["sc"]
         #comp_raw_text = dict( sorted( dict(zip([str(x) for x in spans],[float(x)*penalty for x in spans.attrs['scores']])).items(), key=lambda x: x[1], reverse=True) )
-        company_list = list(set([str(span).replace('\'s', '') for span in filter_spans(spans, keep_longest=True)]))[:limit_outputs]
         out.append(get_company_sectors(company_list))
     return out
 #def _inference_summary_model_pipeline(text):
@@ -342,19 +340,20 @@ def inference(input_batch,isurl,use_archive,filt_companies_topic,limit_companies
     df['topic'] = pd.DataFrame(news_sectors).iloc[:, 0]
     #df['sector_pred'] = pd.DataFrame(_topic2sector(topics)).iloc[:, 0]
     print("[i] Pandas output shape:",df.shape)
     #[[], [('Nvidia', 'Information Technology')], [('Twitter', 'Communication Services'), ('Apple', 'Information Technology')], [], [], [], [], [], []]
     df["company"] = np.nan
     df["sector"] = np.nan
     for idx in range(len(df.index)):
       if ner_labels[idx]: #not empty
         for ner in ner_labels[idx]:
           if filt_companies_topic:
               if news_sectors[idx] != ner[1]:
                   continue
-          df = pd.concat( [df, df.loc[[idx]].assign(company=ner[0], sector=ner[1])], join='outer', ignore_index=True) #axis=0
-    return df #ner_labels, {'E':float(prob_outs[0]),"S":float(prob_outs[1]),"G":float(prob_outs[2])},{sentiment['label']:float(sentiment['score'])},"**Summary:**\n\n" + summary
 title = "ESG API Demo"
 description = """This is a demonstration of the full ESG pipeline backend where given a list of URL (english, news) the news contents are extracted, using extractnet, and fed to three models:
@@ -372,13 +371,21 @@ API input parameters:
 """
 examples = [[ [['https://www.bbc.com/news/uk-62732447'],
-            ['https://www.bbc.com/news/business-62747401'],
-            ['https://www.bbc.com/news/technology-62744858'],
             ['https://www.bbc.com/news/science-environment-62758811'],
-            ['https://www.theguardian.com/business/2022/sep/02/nord-stream-1-gazprom-announces-indefinite-shutdown-of-pipeline'],
-            ['https://www.bbc.com/news/world-europe-62766867'],
             ['https://www.bbc.com/news/business-62524031'],
             ['https://www.bbc.com/news/business-62728621'],
             ['https://www.bbc.com/news/science-environment-62680423']],'url',False,False,5]]
 demo = gr.Interface(fn=inference,
                     inputs=[gr.Dataframe(label='input batch', col_count=1, datatype='str', type='array', wrap=True),

       standard_names_tuples.append(name_match[:2])
   for std_comp_name, _ in standard_names_tuples:
+    sectors = list(DF_SP500[['Name','Sector','Symbol']].where( (DF_SP500.Name == std_comp_name) | (DF_SP500.Symbol == std_comp_name)).dropna().itertuples(index=False, name=None))
     output += sectors
   return output
     for doc in nlp.pipe(text):
         spans = doc.spans["sc"]
         #comp_raw_text = dict( sorted( dict(zip([str(x) for x in spans],[float(x)*penalty for x in spans.attrs['scores']])).items(), key=lambda x: x[1], reverse=True) )
+        company_list = list(set([str(span).replace('\'s', '').replace('’s','') for span in filter_spans(spans, keep_longest=True)]))[:limit_outputs]
         out.append(get_company_sectors(company_list))
     return out
 #def _inference_summary_model_pipeline(text):
     df['topic'] = pd.DataFrame(news_sectors).iloc[:, 0]
     #df['sector_pred'] = pd.DataFrame(_topic2sector(topics)).iloc[:, 0]
     print("[i] Pandas output shape:",df.shape)
     #[[], [('Nvidia', 'Information Technology')], [('Twitter', 'Communication Services'), ('Apple', 'Information Technology')], [], [], [], [], [], []]
     df["company"] = np.nan
     df["sector"] = np.nan
+    df["symbol"] = np.nan
+    dfo = pd.DataFrame(columns=['E','S','G','URL','sent_lbl','sent_score','sector_pred','company','sector','symbol'])
     for idx in range(len(df.index)):
       if ner_labels[idx]: #not empty
         for ner in ner_labels[idx]:
           if filt_companies_topic:
               if news_sectors[idx] != ner[1]:
                   continue
+          dfo = pd.concat( [dfo, df.loc[[idx]].assign(company=ner[0], sector=ner[1], symbol=ner[2])], join='outer', ignore_index=True) #axis=0
+     print("[i] Pandas output shape:",dfo.shape)
+     return dfo #ner_labels, {'E':float(prob_outs[0]),"S":float(prob_outs[1]),"G":float(prob_outs[2])},{sentiment['label']:float(sentiment['score'])},"**Summary:**\n\n" + summary
 title = "ESG API Demo"
 description = """This is a demonstration of the full ESG pipeline backend where given a list of URL (english, news) the news contents are extracted, using extractnet, and fed to three models:
 """
 examples = [[ [['https://www.bbc.com/news/uk-62732447'],
             ['https://www.bbc.com/news/science-environment-62758811'],
             ['https://www.bbc.com/news/business-62524031'],
             ['https://www.bbc.com/news/business-62728621'],
+            ["https://www.knowesg.com/investors/blackstone-and-sphera-work-together-for-portfolio-decarbonization-program-17022022"],
+            ["https://www.esgtoday.com/amazon-partners-with-matt-damons-water-org-to-provide-water-access-to-100-million-people/"],
+            ["https://www.esgtoday.com/walmart-allocates-over-1-billion-to-renewable-energy-sustainable-buildings-circular-economy/"],
+            ["https://www.esgtoday.com/anglo-american-ties-interest-on-745-million-bond-to-climate-water-job-creation-goals/"],
+            ["https://www.esgtoday.com/blackrock-acquires-new-zealand-solar-as-a-service-provider-solarzero/"],
+            ["https://www.esgtoday.com/blackrock-strikes-back-against-climate-activism-claims/"],
+            ["https://www.esgtoday.com/hm-to-remove-sustainability-labels-from-products-following-investigation-by-regulator/"],
+            ["https://www.knowesg.com/sustainable-finance/exxonmobil-fails-the-energy-transition-due-to-failed-governance-structure-04122021"],
+            ["https://www.knowesg.com/companies/tesla-is-investigated-by-the-securities-and-exchange-commission-sec-on-solar-07122021"],
+            ["https://www.knowesg.com/tech/pcg-and-exxonmobil-will-collaborate-on-plastic-recycling-in-malaysia-20092022"],
+            ["https://esgnews.com/nike-launches-community-climate-resilience-program-with-2-million-grant-to-trust-for-public-land/"],
+            ["https://esgnews.com/walmart-and-unitedhealth-group-collaborate-to-deliver-access-to-high-quality-affordable-health-care/"],
             ['https://www.bbc.com/news/science-environment-62680423']],'url',False,False,5]]
 demo = gr.Interface(fn=inference,
                     inputs=[gr.Dataframe(label='input batch', col_count=1, datatype='str', type='array', wrap=True),