yaay commited on
Commit
ecc3a10
·
1 Parent(s): cdf6a21

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 1. Install and Import Baseline Dependencies
2
+ from transformers import PegasusTokenizer, PegasusForConditionalGeneration
3
+ from bs4 import BeautifulSoup
4
+ import requests
5
+ import re
6
+ from transformers import pipeline
7
+ import csv
8
+ import streamlit as st
9
+
10
+ st.title('Stocks Analysis Machine')
11
+
12
+ x = st.slider('Select a value')
13
+ st.write(x, 'squared is', x * x)
14
+
15
+
16
+ # 2. Setup Model
17
+ model_name = "human-centered-summarization/financial-summarization-pegasus"
18
+ tokenizer = PegasusTokenizer.from_pretrained(model_name)
19
+ model = PegasusForConditionalGeneration.from_pretrained(model_name)
20
+
21
+ # 3. Setup Pipeline
22
+ monitored_tickers = ['ETH']
23
+
24
+ # 4.1. Search for Stock News using Google and Yahoo Finance
25
+ print('Searching for stock news for', monitored_tickers)
26
+ def search_for_stock_news_links(ticker):
27
+ search_url = 'https://www.google.com/search?q=yahoo+finance+{}&tbm=nws'.format(ticker)
28
+ r = requests.get(search_url)
29
+ soup = BeautifulSoup(r.text, 'html.parser')
30
+ atags = soup.find_all('a')
31
+ hrefs = [link['href'] for link in atags]
32
+ return hrefs
33
+
34
+ raw_urls = {ticker:search_for_stock_news_links(ticker) for ticker in monitored_tickers}
35
+
36
+ # 4.2. Strip out unwanted URLs
37
+ print('Cleaning URLs.')
38
+ exclude_list = ['maps', 'policies', 'preferences', 'accounts', 'support']
39
+ def strip_unwanted_urls(urls, exclude_list):
40
+ val = []
41
+ for url in urls:
42
+ if 'https://' in url and not any(exc in url for exc in exclude_list):
43
+ res = re.findall(r'(https?://\S+)', url)[0].split('&')[0]
44
+ val.append(res)
45
+ return list(set(val))
46
+
47
+ cleaned_urls = {ticker:strip_unwanted_urls(raw_urls[ticker] , exclude_list) for ticker in monitored_tickers}
48
+
49
+ # 4.3. Search and Scrape Cleaned URLs
50
+ print('Scraping news links.')
51
+ def scrape_and_process(URLs):
52
+ ARTICLES = []
53
+ for url in URLs:
54
+ r = requests.get(url)
55
+ soup = BeautifulSoup(r.text, 'html.parser')
56
+ results = soup.find_all('p')
57
+ text = [res.text for res in results]
58
+ words = ' '.join(text).split(' ')[:350]
59
+ ARTICLE = ' '.join(words)
60
+ ARTICLES.append(ARTICLE)
61
+ return ARTICLES
62
+ articles = {ticker:scrape_and_process(cleaned_urls[ticker]) for ticker in monitored_tickers}
63
+
64
+ # 4.4. Summarise all Articles
65
+ print('Summarizing articles.')
66
+ def summarize(articles):
67
+ summaries = []
68
+ for article in articles:
69
+ input_ids = tokenizer.encode(article, return_tensors="pt")
70
+ output = model.generate(input_ids, max_length=55, num_beams=5, early_stopping=True)
71
+ summary = tokenizer.decode(output[0], skip_special_tokens=True)
72
+ summaries.append(summary)
73
+ return summaries
74
+
75
+ summaries = {ticker:summarize(articles[ticker]) for ticker in monitored_tickers}
76
+
77
+ # 5. Adding Sentiment Analysis
78
+ print('Calculating sentiment.')
79
+ sentiment = pipeline("sentiment-analysis")
80
+ scores = {ticker:sentiment(summaries[ticker]) for ticker in monitored_tickers}
81
+
82
+ # # 6. Exporting Results
83
+ print('Exporting results')
84
+ def create_output_array(summaries, scores, urls):
85
+ output = []
86
+ for ticker in monitored_tickers:
87
+ for counter in range(len(summaries[ticker])):
88
+ output_this = [
89
+ ticker,
90
+ summaries[ticker][counter],
91
+ scores[ticker][counter]['label'],
92
+ scores[ticker][counter]['score'],
93
+ urls[ticker][counter]
94
+ ]
95
+ output.append(output_this)
96
+ return output
97
+ final_output = create_output_array(summaries, scores, cleaned_urls)
98
+ final_output.insert(0, ['Ticker','Summary', 'Sentiment', 'Sentiment Score', 'URL'])
99
+
100
+ with open('ethsummaries.csv', mode='w', newline='') as f:
101
+ csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
102
+ csv_writer.writerows(final_output)