File size: 6,587 Bytes
5430213
 
bd02e1e
ecec571
bd02e1e
 
 
 
283921a
 
4ced569
 
283921a
 
80a2499
199825b
9072761
ee98c85
 
132ce6b
 
 
 
 
ae55747
 
 
132ce6b
199825b
4252e62
a6d76f1
80a2499
 
 
 
4252e62
ee98c85
eba9c51
4252e62
283921a
dabe490
 
199825b
 
 
4252e62
132ce6b
 
 
 
cb360ef
0721d35
 
793fd68
6e8b49f
0721d35
dabe490
9072761
7a5f403
9072761
 
 
 
199825b
4500272
ae55747
4ced569
dabe490
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4500272
dabe490
 
 
 
23eea63
dabe490
4ced569
 
 
 
 
132ce6b
4ced569
 
 
 
 
793fd68
4ced569
 
 
cb360ef
4ced569
 
dabe490
 
 
793fd68
4ced569
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import requests
import gradio as gr
import pandas as pd
import os

from newsapi import NewsApiClient
from datetime import date, timedelta
from transformers import pipeline

HF_TOKEN = os.environ["newsapi"]
# Initialization
newsapi = NewsApiClient(api_key=HF_TOKEN)

classifier = pipeline(model="cardiffnlp/twitter-roberta-base-sentiment") 
today = str(date.today())

print("******** Outside Inference function ********")
print(f"HF_TOKEN is - {HF_TOKEN}")

#top-headlines
all_top_headlines = newsapi.get_top_headlines(country='in')
sentiment_tophead = ['Negative' if classifier(entry['content'])[0]['label'] == 'LABEL_0' else 'Neutral' if classifier(entry['content'])[0]['label'] == 'LABEL_1' else 'Positive' for entry in all_top_headlines['articles']]
print(f"sentiment_tophead length is {len(sentiment_tophead)}")
print(f"all_top_headlines length is {len(all_top_headlines['articles'])}")
print("************** sentiment start ****************")
print(sentiment_tophead)
print("************** sentiment end ****************")

#times of india
all_articles_toi = newsapi.get_everything(sources='the-times-of-india',
                                    domains= 'http://timesofindia.indiatimes.com', #'timesofindia.indiatimes.com',
                                    from_param=today,
                                    to=today,
                                    language='en',
                                    sort_by='relevancy',)
sentiment_toi = ['Negative' if classifier(entry['content'])[0]['label'] == 'LABEL_0' else 'Neutral' if classifier(entry['content'])[0]['label'] == 'LABEL_1' else 'Positive' for entry in all_articles_toi['articles']]
print(f"sentiment_toi length is {len(sentiment_toi)}")
print(f"all_articles_toi length is {len(all_articles_toi['articles'])}")


#Driver positive
def inference_pos(newssource): #, date):
  
  if newssource == "Times Of India":
    sentiment = sentiment_toi
    all_articles = all_articles_toi
  elif newssource == "Top Headlines":
    sentiment = sentiment_tophead
    all_articles = all_top_headlines
  
  #"<a href=" + "url" + "></a>"link text</a>
  description = [entry['description'] for entry in all_articles['articles']]
  content = [entry['content'] for entry in all_articles['articles']]
  url = ["<a href=" + entry['url'] + ' target="_blank">Click here for the original news article</a>' for entry in all_articles['articles']] 
  urlToImage = ["<img src= " + entry['urlToImage']+">" for entry in all_articles['articles']]
  
  print("********************* Positive News **************************")
  print(f"Newssource is - {newssource}")
  print(f"description length is - {len(description)}") 
  print(f"content length is - {len(content)}")
  print(f"url length is - {len(url)}")
  print(f"urlToImage length is - {len(urlToImage)}")
  print(f"sentiment length is - {len(sentiment)}")
  
  dictnews = { 'description' : description, 'content' : content, 'url' : url, 'urlToImage' : urlToImage, 'sentiment' : sentiment}

  df  = pd.DataFrame.from_dict(dictnews)
  df = df.loc[df['sentiment'] == 'Positive']

  print(f"dataframe shape is :,{df.shape}")
  return df

#Driver - negative
def inference_neg(newssource): #, date):
  
  if newssource == "Times Of India":
    sentiment = sentiment_toi
    all_articles = all_articles_toi
  elif newssource == "Top Headlines":
    sentiment = sentiment_tophead
    all_articles = all_top_headlines
  
  description = [entry['description'] for entry in all_articles['articles']]
  content = [entry['content'] for entry in all_articles['articles']]
  url = ["<a href=" + entry['url'] + ' target="_blank">Click here for the original news article</a>' for entry in all_articles['articles']] 
  urlToImage = ["<img src= " + entry['urlToImage']+">" for entry in all_articles['articles']]
  
  print("********************* Negative News ***********************")
  print(f"Newssource is - {newssource}")
  print(f"description length is - {len(description)}")
  print(f"content length is - {len(content)}")
  print(f"url length is - {len(url)}")
  print(f"urlToImage length is - {len(urlToImage)}")
  print(f"sentiment length is - {len(sentiment)}")
  
  dictnews = { 'description' : description, 'content' : content, 'url' : url, 'urlToImage' : urlToImage, 'sentiment' : sentiment}

  df = pd.DataFrame.from_dict(dictnews)
  df = df.loc[df['sentiment'] == 'Negative']
  print(f"dataframe shape is :,{df.shape}")
  return df 

#Driver - neutral
def inference_neut(newssource): #, date):
  
  if newssource == "Times Of India":
    sentiment = sentiment_toi
    all_articles = all_articles_toi
  elif newssource == "Top Headlines":
    sentiment = sentiment_tophead
    all_articles = all_top_headlines
  
  description = [entry['description'] for entry in all_articles['articles']]
  content = [entry['content'] for entry in all_articles['articles']]
  url = ["<a href=" + entry['url'] + ' target="_blank">Click here for the original news article</a>' for entry in all_articles['articles']] 
  urlToImage = ["<img src= " + entry['urlToImage']+">" for entry in all_articles['articles']]
  
  print("********************* Neutral News ***********************")
  print(f"Newssource is - {newssource}")
  print(f"description length is - {len(description)}")
  print(f"content length is - {len(content)}")
  print(f"url length is - {len(url)}")
  print(f"urlToImage length is - {len(urlToImage)}")
  print(f"sentiment length is - {len(sentiment)}")
  
  dictnews = { 'description' : description, 'content' : content, 'url' : url, 'urlToImage' : urlToImage, 'sentiment' : sentiment}

  df = pd.DataFrame.from_dict(dictnews)
  df = df.loc[df['sentiment'] == 'Neutral']
  print(f"dataframe shape is :,{df.shape}")
  return df 


#Gradio Blocks
with gr.Blocks() as demo:
  with gr.Row():
    in_newssource =  gr.Dropdown(["Times Of India", "Top Headlines"], label='Choose a News Outlet')
    #in_date = gr.Textbox(visible = False, value = today)

  with gr.Row():
    b1 = gr.Button("Get Positive News")
    b2 = gr.Button("Get Negative News")
    b3 = gr.Button("Get Neutral News")

  with gr.Row():
    #sample
    #out_news = gr.HTML(label="First News Link", show_label=True) 
    out_dataframe = gr.Dataframe(wrap=True, datatype = ["str", "str", "markdown", "markdown", "str"])
    
  b1.click(fn=inference_pos, inputs=in_newssource, outputs=out_dataframe) #, out_news])
  b2.click(fn=inference_neg, inputs=in_newssource, outputs=out_dataframe) #, out_news])
  b3.click(fn=inference_neut, inputs=in_newssource, outputs=out_dataframe) #, out_news])
  
demo.launch(debug=True, show_error=True)