Spaces:
Running
Running
import os | |
from dotenv import load_dotenv | |
from transformers import pipeline | |
import pandas as pd | |
from GoogleNews import GoogleNews | |
from langchain_openai import ChatOpenAI | |
import praw | |
from datetime import datetime | |
load_dotenv() | |
def fetch_news(topic): | |
""" Fetches news articles within a specified date range. | |
Args: | |
- topic (str): Topic of interest | |
Returns: | |
- list: A list of dictionaries containing news. """ | |
load_dotenv() | |
days_to_fetch_news = os.environ["DAYS_TO_FETCH_NEWS"] | |
googlenews = GoogleNews() | |
googlenews.set_period(days_to_fetch_news) | |
googlenews.get_news(topic) | |
news_json=googlenews.get_texts() | |
urls=googlenews.get_links() | |
no_of_news_articles_to_fetch = os.environ["NO_OF_NEWS_ARTICLES_TO_FETCH"] | |
news_article_list = [] | |
counter = 0 | |
for article in news_json: | |
if(counter >= int(no_of_news_articles_to_fetch)): | |
break | |
relevant_info = { | |
'News_Article': article, | |
'URL': urls[counter] | |
} | |
news_article_list.append(relevant_info) | |
counter+=1 | |
return news_article_list | |
def fetch_reddit_news(topic): | |
load_dotenv() | |
REDDIT_USER_AGENT= os.environ["REDDIT_USER_AGENT"] | |
REDDIT_CLIENT_ID= os.environ["REDDIT_CLIENT_ID"] | |
REDDIT_CLIENT_SECRET= os.environ["REDDIT_CLIENT_SECRET"] | |
#https://medium.com/geekculture/a-complete-guide-to-web-scraping-reddit-with-python-16e292317a52 | |
user_agent = REDDIT_USER_AGENT | |
reddit = praw.Reddit ( | |
client_id= REDDIT_CLIENT_ID, | |
client_secret= REDDIT_CLIENT_SECRET, | |
user_agent=user_agent | |
) | |
headlines = set ( ) | |
for submission in reddit.subreddit('nova').search(topic,time_filter='week'): | |
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url) | |
if len(headlines)<10: | |
for submission in reddit.subreddit('nova').search(topic,time_filter='year'): | |
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url) | |
if len(headlines)<10: | |
for submission in reddit.subreddit('nova').search(topic): #,time_filter='week'): | |
headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url) | |
return headlines | |
def analyze_sentiment(article): | |
""" | |
Analyzes the sentiment of a given news article. | |
Args: | |
- news_article (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys. | |
Returns: | |
- dict: A dictionary containing sentiment analysis results. | |
""" | |
#Analyze sentiment using default model | |
#classifier = pipeline('sentiment-analysis') | |
#Analyze sentiment using specific model | |
classifier = pipeline(model='tabularisai/robust-sentiment-analysis') #mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis') | |
sentiment_result = classifier(str(article)) | |
analysis_result = { | |
'News_Article': article, | |
'Sentiment': sentiment_result | |
} | |
return analysis_result | |
def generate_summary_of_sentiment(sentiment_analysis_results): #, dominant_sentiment): | |
news_article_sentiment = str(sentiment_analysis_results) | |
print("News article sentiment : " + news_article_sentiment) | |
os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_API_KEY"] | |
model = ChatOpenAI( | |
model="gpt-4o", | |
temperature=0, | |
max_tokens=None, | |
timeout=None, | |
max_retries=2, | |
# api_key="...", # if you prefer to pass api key in directly instaed of using env vars | |
# base_url="...", | |
# organization="...", | |
# other params... | |
) | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant that looks at all news articles, their sentiment, along with domainant sentiment and generates a summary rationalizing dominant sentiment. At the end of the summary, add URL links with dates for all the articles in the markdown format for streamlit. Example of adding the URLs: The Check out the links: [link](%s) % url, 2024-03-01 "}, | |
{"role": "user", "content": f"News articles and their sentiments: {news_article_sentiment}"} #, and dominant sentiment is: {dominant_sentiment}"} | |
] | |
response = model.invoke(messages) | |
summary = response.content | |
print ("+++++++++++++++++++++++++++++++++++++++++++++++") | |
print(summary) | |
print ("+++++++++++++++++++++++++++++++++++++++++++++++") | |
return summary | |
def plot_sentiment_graph(sentiment_analysis_results): | |
""" | |
Plots a sentiment analysis graph | |
Args: | |
- sentiment_analysis_result): (dict): Dictionary containing 'Review Title : Summary', 'Rating', and 'Sentiment' keys. | |
Returns: | |
- dict: A dictionary containing sentiment analysis results. | |
""" | |
df = pd.DataFrame(sentiment_analysis_results) | |
print(df) | |
#Group by Rating, sentiment value count | |
grouped = df['Sentiment'].value_counts() | |
sentiment_counts = df['Sentiment'].value_counts() | |
# Plotting pie chart | |
# fig = plt.figure(figsize=(5, 3)) | |
# plt.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=140) | |
# plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. | |
#Open below when u running this program locally and c | |
#plt.show() | |
return sentiment_counts | |
def get_dominant_sentiment (sentiment_analysis_results): | |
""" | |
Returns overall sentiment, negative or positive or neutral depending on the count of negative sentiment vs positive sentiment | |
Args: | |
- sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys. | |
Returns: | |
- dict: A dictionary containing sentiment analysis results. | |
""" | |
df = pd.DataFrame(sentiment_analysis_results) | |
# Group by the 'sentiment' column and count the occurrences of each sentiment value | |
print(df) | |
print(df['Sentiment']) | |
sentiment_counts = df['Sentiment'].value_counts().reset_index() | |
sentiment_counts.columns = ['sentiment', 'count'] | |
print(sentiment_counts) | |
# Find the sentiment with the highest count | |
dominant_sentiment = sentiment_counts.loc[sentiment_counts['count'].idxmax()] | |
return dominant_sentiment['sentiment'] | |
#starting point of the program | |
if __name__ == '__main__': | |
#fetch news | |
news_articles = fetch_news('AAPL') | |
analysis_results = [] | |
#Perform sentiment analysis for each product review | |
for article in news_articles: | |
sentiment_analysis_result = analyze_sentiment(article['News_Article']) | |
# Display sentiment analysis results | |
print(f'News Article: {sentiment_analysis_result["News_Article"]} : Sentiment: {sentiment_analysis_result["Sentiment"]}', '\n') | |
result = { | |
'News_Article': sentiment_analysis_result["News_Article"], | |
'Sentiment': sentiment_analysis_result["Sentiment"][0]['label'] | |
} | |
analysis_results.append(result) | |
#Graph dominant sentiment based on sentiment analysis data of reviews | |
dominant_sentiment = get_dominant_sentiment(analysis_results) | |
print(dominant_sentiment) | |
#Plot graph | |
plot_sentiment_graph(analysis_results) | |