|
""" |
|
Demo is based on https://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html |
|
""" |
|
|
|
import sys |
|
import numpy as np |
|
import pandas as pd |
|
|
|
symbol_dict = { |
|
"TOT": "Total", |
|
"XOM": "Exxon", |
|
"CVX": "Chevron", |
|
"COP": "ConocoPhillips", |
|
"VLO": "Valero Energy", |
|
"MSFT": "Microsoft", |
|
"IBM": "IBM", |
|
"TWX": "Time Warner", |
|
"CMCSA": "Comcast", |
|
"CVC": "Cablevision", |
|
"YHOO": "Yahoo", |
|
"DELL": "Dell", |
|
"HPQ": "HP", |
|
"AMZN": "Amazon", |
|
"TM": "Toyota", |
|
"CAJ": "Canon", |
|
"SNE": "Sony", |
|
"F": "Ford", |
|
"HMC": "Honda", |
|
"NAV": "Navistar", |
|
"NOC": "Northrop Grumman", |
|
"BA": "Boeing", |
|
"KO": "Coca Cola", |
|
"MMM": "3M", |
|
"MCD": "McDonald's", |
|
"PEP": "Pepsi", |
|
"K": "Kellogg", |
|
"UN": "Unilever", |
|
"MAR": "Marriott", |
|
"PG": "Procter Gamble", |
|
"CL": "Colgate-Palmolive", |
|
"GE": "General Electrics", |
|
"WFC": "Wells Fargo", |
|
"JPM": "JPMorgan Chase", |
|
"AIG": "AIG", |
|
"AXP": "American express", |
|
"BAC": "Bank of America", |
|
"GS": "Goldman Sachs", |
|
"AAPL": "Apple", |
|
"SAP": "SAP", |
|
"CSCO": "Cisco", |
|
"TXN": "Texas Instruments", |
|
"XRX": "Xerox", |
|
"WMT": "Wal-Mart", |
|
"HD": "Home Depot", |
|
"GSK": "GlaxoSmithKline", |
|
"PFE": "Pfizer", |
|
"SNY": "Sanofi-Aventis", |
|
"NVS": "Novartis", |
|
"KMB": "Kimberly-Clark", |
|
"R": "Ryder", |
|
"GD": "General Dynamics", |
|
"RTN": "Raytheon", |
|
"CVS": "CVS", |
|
"CAT": "Caterpillar", |
|
"DD": "DuPont de Nemours", |
|
} |
|
|
|
|
|
symbols, names = np.array(sorted(symbol_dict.items())).T |
|
|
|
quotes = [] |
|
|
|
for symbol in symbols: |
|
print("Fetching quote history for %r" % symbol, file=sys.stderr) |
|
url = ( |
|
"https://raw.githubusercontent.com/scikit-learn/examples-data/" |
|
"master/financial-data/{}.csv" |
|
) |
|
quotes.append(pd.read_csv(url.format(symbol))) |
|
|
|
close_prices = np.vstack([q["close"] for q in quotes]) |
|
open_prices = np.vstack([q["open"] for q in quotes]) |
|
|
|
|
|
variation = close_prices - open_prices |
|
|
|
|
|
from sklearn import covariance |
|
|
|
alphas = np.logspace(-1.5, 1, num=10) |
|
edge_model = covariance.GraphicalLassoCV(alphas=alphas) |
|
|
|
|
|
|
|
|
|
X = variation.copy().T |
|
X /= X.std(axis=0) |
|
edge_model.fit(X) |
|
|
|
|
|
from sklearn import cluster |
|
|
|
_, labels = cluster.affinity_propagation(edge_model.covariance_, random_state=0) |
|
n_labels = labels.max() |
|
|
|
|
|
|
|
|
|
|
|
from sklearn import manifold |
|
|
|
node_position_model = manifold.LocallyLinearEmbedding( |
|
n_components=3, eigen_solver="dense", n_neighbors=6 |
|
) |
|
|
|
embedding = node_position_model.fit_transform(X.T).T |
|
|
|
import matplotlib.pyplot as plt |
|
from matplotlib.collections import LineCollection |
|
import plotly.graph_objs as go |
|
|
|
|
|
def visualize_stocks(): |
|
|
|
partial_correlations = edge_model.precision_.copy() |
|
d = 1 / np.sqrt(np.diag(partial_correlations)) |
|
partial_correlations *= d |
|
partial_correlations *= d[:, np.newaxis] |
|
non_zero = np.abs(np.triu(partial_correlations, k=1)) > 0.02 |
|
|
|
|
|
scatter = go.Scatter3d( |
|
x=embedding[0], |
|
y=embedding[1], |
|
z=embedding[2], |
|
mode="markers", |
|
marker=dict(size=35 * d**2, color=labels, colorscale="Viridis"), |
|
hovertext=names, |
|
hovertemplate="%{hovertext}<br>", |
|
) |
|
|
|
|
|
start_idx, end_idx = np.where(non_zero) |
|
|
|
|
|
segments = [ |
|
dict( |
|
x=[embedding[0][start], embedding[0][stop]], |
|
y=[embedding[1][start], embedding[1][stop]], |
|
z=[embedding[2][start], embedding[2][stop]], |
|
colorscale="Hot", |
|
color=np.abs(partial_correlations[start, stop]), |
|
line=dict(width=10 * np.abs(partial_correlations[start, stop])), |
|
) |
|
for start, stop in zip(start_idx, end_idx) |
|
] |
|
fig = go.Figure(data=[scatter]) |
|
|
|
for idx, segment in enumerate(segments, 1): |
|
fig.add_trace( |
|
go.Scatter3d( |
|
x=segment["x"], |
|
y=segment["y"], |
|
z=segment["z"], |
|
mode="lines", |
|
line=dict( |
|
color=segment["color"], |
|
colorscale=segment["colorscale"], |
|
width=segment["line"]["width"] * 2.5, |
|
), |
|
hoverinfo="none", |
|
), |
|
) |
|
fig.data[idx].showlegend = False |
|
|
|
return fig |
|
|
|
|
|
import gradio as gr |
|
|
|
title = " π Visualizing the stock market structure π" |
|
|
|
with gr.Blocks(title=title) as demo: |
|
gr.Markdown(f"# {title}") |
|
gr.Markdown(" Data is of 56 stocks between the period of 2003 - 2008 <br>") |
|
gr.Markdown( |
|
" Stocks the move in together with each other are grouped together in a cluster <br>" |
|
) |
|
|
|
gr.Markdown( |
|
" **[Demo is based on sklearn docs](https://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html)**" |
|
) |
|
|
|
for i in range(n_labels + 1): |
|
gr.Markdown(f"Cluster {i + 1}: {', '.join(names[labels == i])}") |
|
|
|
btn = gr.Button(value="Visualize") |
|
btn.click( |
|
visualize_stocks, outputs=gr.Plot(label="Visualizing stock into clusters") |
|
) |
|
|
|
demo.launch() |
|
|