"""
Demo is based on https://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html
"""
import sys
import numpy as np
import pandas as pd
symbol_dict = {
"TOT": "Total",
"XOM": "Exxon",
"CVX": "Chevron",
"COP": "ConocoPhillips",
"VLO": "Valero Energy",
"MSFT": "Microsoft",
"IBM": "IBM",
"TWX": "Time Warner",
"CMCSA": "Comcast",
"CVC": "Cablevision",
"YHOO": "Yahoo",
"DELL": "Dell",
"HPQ": "HP",
"AMZN": "Amazon",
"TM": "Toyota",
"CAJ": "Canon",
"SNE": "Sony",
"F": "Ford",
"HMC": "Honda",
"NAV": "Navistar",
"NOC": "Northrop Grumman",
"BA": "Boeing",
"KO": "Coca Cola",
"MMM": "3M",
"MCD": "McDonald's",
"PEP": "Pepsi",
"K": "Kellogg",
"UN": "Unilever",
"MAR": "Marriott",
"PG": "Procter Gamble",
"CL": "Colgate-Palmolive",
"GE": "General Electrics",
"WFC": "Wells Fargo",
"JPM": "JPMorgan Chase",
"AIG": "AIG",
"AXP": "American express",
"BAC": "Bank of America",
"GS": "Goldman Sachs",
"AAPL": "Apple",
"SAP": "SAP",
"CSCO": "Cisco",
"TXN": "Texas Instruments",
"XRX": "Xerox",
"WMT": "Wal-Mart",
"HD": "Home Depot",
"GSK": "GlaxoSmithKline",
"PFE": "Pfizer",
"SNY": "Sanofi-Aventis",
"NVS": "Novartis",
"KMB": "Kimberly-Clark",
"R": "Ryder",
"GD": "General Dynamics",
"RTN": "Raytheon",
"CVS": "CVS",
"CAT": "Caterpillar",
"DD": "DuPont de Nemours",
}
symbols, names = np.array(sorted(symbol_dict.items())).T
quotes = []
for symbol in symbols:
print("Fetching quote history for %r" % symbol, file=sys.stderr)
url = (
"https://raw.githubusercontent.com/scikit-learn/examples-data/"
"master/financial-data/{}.csv"
)
quotes.append(pd.read_csv(url.format(symbol)))
close_prices = np.vstack([q["close"] for q in quotes])
open_prices = np.vstack([q["open"] for q in quotes])
# The daily variations of the quotes are what carry the most information
variation = close_prices - open_prices
from sklearn import covariance
alphas = np.logspace(-1.5, 1, num=10)
edge_model = covariance.GraphicalLassoCV(alphas=alphas)
# standardize the time series: using correlations rather than covariance
# former is more efficient for structurerelations rather than covariance
# former is more efficient for structure recovery
X = variation.copy().T
X /= X.std(axis=0)
edge_model.fit(X)
from sklearn import cluster
_, labels = cluster.affinity_propagation(edge_model.covariance_, random_state=0)
n_labels = labels.max()
# Finding a low-dimension embedding for visualization: find the best position of
# the nodes (the stocks) on a 2D plane
from sklearn import manifold
node_position_model = manifold.LocallyLinearEmbedding(
n_components=3, eigen_solver="dense", n_neighbors=6
)
embedding = node_position_model.fit_transform(X.T).T
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import plotly.graph_objs as go
def visualize_stocks():
# Plot the graph of partial correlations
partial_correlations = edge_model.precision_.copy()
d = 1 / np.sqrt(np.diag(partial_correlations))
partial_correlations *= d
partial_correlations *= d[:, np.newaxis]
non_zero = np.abs(np.triu(partial_correlations, k=1)) > 0.02
# Plot the nodes using the coordinates of our embedding
scatter = go.Scatter3d(
x=embedding[0],
y=embedding[1],
z=embedding[2],
mode="markers",
marker=dict(size=35 * d**2, color=labels, colorscale="Viridis"),
hovertext=names,
hovertemplate="%{hovertext}
",
)
# # Plot the edges
start_idx, end_idx = np.where(non_zero)
# print(non_zero, non_zero.shape)
# print(start_idx, start_idx.shape)
segments = [
dict(
x=[embedding[0][start], embedding[0][stop]],
y=[embedding[1][start], embedding[1][stop]],
z=[embedding[2][start], embedding[2][stop]],
colorscale="Hot",
color=np.abs(partial_correlations[start, stop]),
line=dict(width=10 * np.abs(partial_correlations[start, stop])),
)
for start, stop in zip(start_idx, end_idx)
]
fig = go.Figure(data=[scatter])
for idx, segment in enumerate(segments, 1):
fig.add_trace(
go.Scatter3d(
x=segment["x"], # x-coordinates of the line segment
y=segment["y"], # y-coordinates of the line segment
z=segment["z"], # z-coordinates of the line segment
mode="lines", # type of the plot (line)
line=dict(
color=segment["color"], # color of the line
colorscale=segment["colorscale"], # color scale of the line
width=segment["line"]["width"] * 2.5, # width of the line
),
hoverinfo="none", # disable hover for the line segments
),
)
fig.data[idx].showlegend = False
return fig
import gradio as gr
title = " 📈 Visualizing the stock market structure 📈"
with gr.Blocks(title=title) as demo:
gr.Markdown(f"# {title}")
gr.Markdown(" Data is of 56 stocks between the period of 2003 - 2008
")
gr.Markdown(
" Stocks the move in together with each other are grouped together in a cluster
"
)
gr.Markdown(
" **[Demo is based on sklearn docs](https://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html)**"
)
for i in range(n_labels + 1):
gr.Markdown(f"Cluster {i + 1}: {', '.join(names[labels == i])}")
btn = gr.Button(value="Visualize")
btn.click(
visualize_stocks, outputs=gr.Plot(label="Visualizing stock into clusters")
)
demo.launch()