IoannisTr commited on
Commit
c02a60c
·
1 Parent(s): 5e08cd9

Upload stocks.py

Browse files
Files changed (1) hide show
  1. stocks.py +122 -0
stocks.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from asyncio.windows_events import NULL
2
+ from configparser import ParsingError
3
+ from logging import raiseExceptions
4
+ from sympy import comp
5
+ import yfinance as yf
6
+ import requests
7
+ import pandas as pd
8
+ from bs4 import BeautifulSoup
9
+
10
+ class Stock_Data(object):
11
+ '''
12
+ This class contains 5 methods responsible for choosing a stock's ticker, then checking whether the
13
+ stock exchange it is listed in is open or not, and in case it is, it gets data for the last 6 months
14
+ from "yfinance" module of Yahoo Inc. which will be fed to the models.
15
+ '''
16
+
17
+ def Ticker(self, tick):
18
+ '''
19
+ This method will "carry" the company's ticker, and it will also be used as a placeholder.
20
+ '''
21
+ global ticker
22
+ ticker = tick
23
+
24
+ return ticker
25
+
26
+
27
+ def status_getter(self, Ticker):
28
+ '''
29
+ This method gets the company ticker the user chooses, creates a www.marketwatch.com
30
+ link, then scraps the HTML code of the corresponding company page in marketwatch website,
31
+ and gets the current market status of the exchange this stock is listed in. Possible values are:
32
+ After Hours, Open, and Market Closed.
33
+ '''
34
+ global company_ticker
35
+ company_ticker = Ticker
36
+ link_1 = 'https://www.marketwatch.com/investing/stock/'
37
+ link_2 = '?mod=search_symbol'
38
+ # Pasting the above 3 parts to create the URL
39
+ global final_link
40
+ final_link = link_1 + company_ticker + link_2
41
+
42
+ page = requests.get(final_link)
43
+ global soup
44
+ soup = BeautifulSoup(page.text, "lxml")
45
+ if soup is None:
46
+ raise ParsingError("HTML code of MarketWatch website was not scraped and current status can not be found")
47
+ else:
48
+ current_status = soup.find("div", class_="status").text # Finding the market status
49
+ return current_status
50
+
51
+
52
+ def current_price_getter(self, Ticker):
53
+ '''
54
+ This method will get the current price only if the market is open.
55
+ '''
56
+ current_price = NULL
57
+ if self.status_getter(Ticker) == "Open":
58
+ current_price = float(soup.find("bg-quote", class_="value").text.replace(',',''))
59
+ return current_price
60
+ else:
61
+ return "Market Closed"
62
+
63
+ def stock_data_getter(self, Ticker):
64
+ '''
65
+ This method will return a dataframe containing Stock data from the Yahoo's "yfinance"
66
+ library in case the market is open.
67
+ '''
68
+ if self.status_getter(Ticker) == "Open":
69
+ data = yf.download(tickers = str(Ticker), period = "6mo", interval = "1d")
70
+ df = pd.DataFrame(data)
71
+ return df
72
+ else:
73
+ return "Market Closed"
74
+
75
+ def LSTM_stock_data_getter(self, Ticker):
76
+ '''
77
+ This method will return a dataframe containing Stock data from the Yahoo's "yfinance"
78
+ library regardrless of whether the market is open or not, and will feed the LSTM model.
79
+ '''
80
+ data = yf.download(tickers = str(Ticker), period = "2y", interval = "1d")
81
+ df = pd.DataFrame(data)
82
+ # If the user wants to run the model to have a General Prediction of stocks with data of the last 2 years , uncomment the 2 lines below
83
+ # data = yf.download(tickers = str(Ticker),period = "2y", interval = "1d") #period = "2y", interval = "1d"
84
+ # df = pd.DataFrame(data)
85
+ return df
86
+
87
+
88
+ def article_parser(self, ticker):
89
+ '''
90
+ This method gets as input a stock ticker, creates the www.marketwatch.com link of this stock
91
+ and returns a dataframe with the last 17 articles' headers.
92
+ '''
93
+ company_ticker = self.Ticker(tick=ticker)
94
+ link_1 = 'https://www.marketwatch.com/investing/stock/'
95
+ link_2 = '?mod=search_symbol'
96
+ # Pasting the above 3 parts to create the URL
97
+ final_link = link_1 + company_ticker + link_2
98
+
99
+
100
+ page = requests.get(final_link)
101
+ soup = BeautifulSoup(page.content, "html.parser")
102
+ results = soup.find("div", class_="tab__pane is-active j-tabPane")
103
+ articles = results.find_all("a", class_="link")
104
+
105
+ headerList = ["ticker", "headline"]
106
+ rows = []
107
+ counter = 1
108
+ df_headers = pd.DataFrame()
109
+
110
+ for art in articles:
111
+ if counter <= 17:
112
+ ticker = company_ticker
113
+ title = art.text.strip()
114
+ if title is NULL:
115
+ break
116
+ rows.append([ticker, title])
117
+ counter = counter + 1
118
+
119
+ df_headers = pd.DataFrame(rows, columns=headerList)
120
+
121
+ return df_headers
122
+