Spaces:

Jayeshbhaal
/

news_filter_for_social_wellbeing

Runtime error

File size: 6,587 Bytes

import requests
import gradio as gr
import pandas as pd
import os

from newsapi import NewsApiClient
from datetime import date, timedelta
from transformers import pipeline

HF_TOKEN = os.environ["newsapi"]
# Initialization
newsapi = NewsApiClient(api_key=HF_TOKEN)

classifier = pipeline(model="cardiffnlp/twitter-roberta-base-sentiment") 
today = str(date.today())

print("******** Outside Inference function ********")
print(f"HF_TOKEN is - {HF_TOKEN}")

#top-headlines
all_top_headlines = newsapi.get_top_headlines(country='in')
sentiment_tophead = ['Negative' if classifier(entry['content'])[0]['label'] == 'LABEL_0' else 'Neutral' if classifier(entry['content'])[0]['label'] == 'LABEL_1' else 'Positive' for entry in all_top_headlines['articles']]
print(f"sentiment_tophead length is {len(sentiment_tophead)}")
print(f"all_top_headlines length is {len(all_top_headlines['articles'])}")
print("************** sentiment start ****************")
print(sentiment_tophead)
print("************** sentiment end ****************")

#times of india
all_articles_toi = newsapi.get_everything(sources='the-times-of-india',
                                    domains= 'http://timesofindia.indiatimes.com', #'timesofindia.indiatimes.com',
                                    from_param=today,
                                    to=today,
                                    language='en',
                                    sort_by='relevancy',)
sentiment_toi = ['Negative' if classifier(entry['content'])[0]['label'] == 'LABEL_0' else 'Neutral' if classifier(entry['content'])[0]['label'] == 'LABEL_1' else 'Positive' for entry in all_articles_toi['articles']]
print(f"sentiment_toi length is {len(sentiment_toi)}")
print(f"all_articles_toi length is {len(all_articles_toi['articles'])}")


#Driver positive
def inference_pos(newssource): #, date):
  
  if newssource == "Times Of India":
    sentiment = sentiment_toi
    all_articles = all_articles_toi
  elif newssource == "Top Headlines":
    sentiment = sentiment_tophead
    all_articles = all_top_headlines
  
  #"<a href=" + "url" + "></a>"link text</a>
  description = [entry['description'] for entry in all_articles['articles']]
  content = [entry['content'] for entry in all_articles['articles']]
  url = ["<a href=" + entry['url'] + ' target="_blank">Click here for the original news article</a>' for entry in all_articles['articles']] 
  urlToImage = ["<img src= " + entry['urlToImage']+">" for entry in all_articles['articles']]
  
  print("********************* Positive News **************************")
  print(f"Newssource is - {newssource}")
  print(f"description length is - {len(description)}") 
  print(f"content length is - {len(content)}")
  print(f"url length is - {len(url)}")
  print(f"urlToImage length is - {len(urlToImage)}")
  print(f"sentiment length is - {len(sentiment)}")
  
  dictnews = { 'description' : description, 'content' : content, 'url' : url, 'urlToImage' : urlToImage, 'sentiment' : sentiment}

  df  = pd.DataFrame.from_dict(dictnews)
  df = df.loc[df['sentiment'] == 'Positive']

  print(f"dataframe shape is :,{df.shape}")
  return df

#Driver - negative
def inference_neg(newssource): #, date):
  
  if newssource == "Times Of India":
    sentiment = sentiment_toi
    all_articles = all_articles_toi
  elif newssource == "Top Headlines":
    sentiment = sentiment_tophead
    all_articles = all_top_headlines
  
  description = [entry['description'] for entry in all_articles['articles']]
  content = [entry['content'] for entry in all_articles['articles']]
  url = ["<a href=" + entry['url'] + ' target="_blank">Click here for the original news article</a>' for entry in all_articles['articles']] 
  urlToImage = ["<img src= " + entry['urlToImage']+">" for entry in all_articles['articles']]
  
  print("********************* Negative News ***********************")
  print(f"Newssource is - {newssource}")
  print(f"description length is - {len(description)}")
  print(f"content length is - {len(content)}")
  print(f"url length is - {len(url)}")
  print(f"urlToImage length is - {len(urlToImage)}")
  print(f"sentiment length is - {len(sentiment)}")
  
  dictnews = { 'description' : description, 'content' : content, 'url' : url, 'urlToImage' : urlToImage, 'sentiment' : sentiment}

  df = pd.DataFrame.from_dict(dictnews)
  df = df.loc[df['sentiment'] == 'Negative']
  print(f"dataframe shape is :,{df.shape}")
  return df 

#Driver - neutral
def inference_neut(newssource): #, date):
  
  if newssource == "Times Of India":
    sentiment = sentiment_toi
    all_articles = all_articles_toi
  elif newssource == "Top Headlines":
    sentiment = sentiment_tophead
    all_articles = all_top_headlines
  
  description = [entry['description'] for entry in all_articles['articles']]
  content = [entry['content'] for entry in all_articles['articles']]
  url = ["<a href=" + entry['url'] + ' target="_blank">Click here for the original news article</a>' for entry in all_articles['articles']] 
  urlToImage = ["<img src= " + entry['urlToImage']+">" for entry in all_articles['articles']]
  
  print("********************* Neutral News ***********************")
  print(f"Newssource is - {newssource}")
  print(f"description length is - {len(description)}")
  print(f"content length is - {len(content)}")
  print(f"url length is - {len(url)}")
  print(f"urlToImage length is - {len(urlToImage)}")
  print(f"sentiment length is - {len(sentiment)}")
  
  dictnews = { 'description' : description, 'content' : content, 'url' : url, 'urlToImage' : urlToImage, 'sentiment' : sentiment}

  df = pd.DataFrame.from_dict(dictnews)
  df = df.loc[df['sentiment'] == 'Neutral']
  print(f"dataframe shape is :,{df.shape}")
  return df 


#Gradio Blocks
with gr.Blocks() as demo:
  with gr.Row():
    in_newssource =  gr.Dropdown(["Times Of India", "Top Headlines"], label='Choose a News Outlet')
    #in_date = gr.Textbox(visible = False, value = today)

  with gr.Row():
    b1 = gr.Button("Get Positive News")
    b2 = gr.Button("Get Negative News")
    b3 = gr.Button("Get Neutral News")

  with gr.Row():
    #sample
    #out_news = gr.HTML(label="First News Link", show_label=True) 
    out_dataframe = gr.Dataframe(wrap=True, datatype = ["str", "str", "markdown", "markdown", "str"])
    
  b1.click(fn=inference_pos, inputs=in_newssource, outputs=out_dataframe) #, out_news])
  b2.click(fn=inference_neg, inputs=in_newssource, outputs=out_dataframe) #, out_news])
  b3.click(fn=inference_neut, inputs=in_newssource, outputs=out_dataframe) #, out_news])
  
demo.launch(debug=True, show_error=True)