Spaces:
Configuration error
Configuration error
from io import BytesIO | |
import streamlit as st | |
import pandas as pd | |
from scipy.cluster.hierarchy import linkage | |
from datetime import date | |
from dateutil.relativedelta import relativedelta | |
import plotly.figure_factory as ff | |
from fastdtw import fastdtw | |
from scipy.spatial.distance import euclidean | |
import numpy as np | |
from modules import tables | |
def data_request(country_to_request, start, currency='USD', end=date.today()): | |
data = tables.EquityMaster(country=country_to_request, field='IQ_CLOSEPRICE_ADJ', currency=currency).query(rename=['asset'], | |
start=start, end=str(end)) | |
adtv = tables.EquityMaster(country=country_to_request, field='IQ_VALUE_TRADED', currency=currency).query(rename=['asset'], | |
start=start, end=str(end)).median() | |
marketcap = tables.EquityMaster(country=country_to_request, field='IQ_MARKETCAP', currency=currency).query(rename=['asset'], | |
start=start, end=str(end)).median() | |
return data, adtv, marketcap | |
def data_filter(data, adtv, marketcap, adtv_threshold, mktcap_threshold, p): | |
adtv_filter = (adtv >= adtv_threshold) | |
adtv = adtv.loc[adtv_filter] | |
marketcap_filter = (marketcap >= mktcap_threshold) | |
marketcap = marketcap.loc[marketcap_filter] | |
data = data.loc[:, data.columns.isin(adtv.index)] | |
data = data.loc[:, data.columns.isin(marketcap.index)] | |
file_to_read = 'Data/Company_Base_Definitivo.xlsx' | |
company_base = pd.read_excel(file_to_read, sheet_name='Compilado') | |
id_to_ticker = {str(row['ID_Quant']): str(row['Ticker Bloomberg']).split()[0] for i, row in company_base.iterrows()} | |
data = data.loc[:, data.columns.isin(id_to_ticker.keys())] | |
data.columns = [id_to_ticker[col] for col in data.columns] | |
if isinstance(p, str): | |
returns_final = data.resample(p).last().pct_change().fillna(0) | |
else: | |
returns_final = data.iloc[::p].pct_change().fillna(0) | |
return returns_final | |
def dist(correlation): | |
return ((1-correlation)/2.)**.5 | |
def to_excel(df_to_write): | |
output = BytesIO() | |
writer = pd.ExcelWriter(output, engine='xlsxwriter') | |
df_to_write.to_excel(writer, index=False, sheet_name='Sheet1') | |
workbook = writer.book | |
worksheet = writer.sheets['Sheet1'] | |
format1 = workbook.add_format({'num_format': '0.00'}) | |
worksheet.set_column('A:A', None, format1) | |
writer.save() | |
processed_data = output.getvalue() | |
return processed_data | |
def get_dtw_distance(x, y): | |
distance_dtw = fastdtw(x, y, dist=euclidean)[0] | |
return distance_dtw | |
def clustering_basado_en_correlacion(): | |
form = st.form("Correlation Clustering") | |
posible_countries = ('Todos', 'Argentina', 'Brazil', 'Chile', 'Colombia', 'Mexico', 'Peru') | |
countries = form.multiselect('¿Qué países desea visualizar?', posible_countries) | |
if 'Todos' in countries: | |
countries = ('Argentina', 'Brazil', 'Chile', 'Colombia', 'Mexico', 'Peru') | |
adtv_p = form.number_input('Ingrese el mínimo Average Daily Traded Value que desea considerar', value=1., format="%.2f") | |
mktcap_thresh = form.number_input('Ingrese el mínimo Market Cap que desea considerar', value=200., format="%.2f") | |
start_date = form.selectbox('Ingrese la fecha de inicio que desea considerar', ('3 Meses', '6 Meses', '1 Año')) | |
period = form.number_input('Defina la frecuencia en la que desea las observaciones (en días)', value=1) | |
accept = form.form_submit_button('Aceptar') | |
if accept: | |
start_date = str(date.today() - relativedelta(months=int(start_date[0]))) | |
for country in countries: | |
data_, adtv_, marketcap_ = data_request(country, start_date) | |
# Filtramos para que se cumplan los filtros del usuario en los datos | |
returns = data_filter(data_, adtv_, marketcap_, adtv_p, mktcap_thresh, period) | |
# Normalizamos | |
base = (returns.subtract(returns.mean(0), axis=1)).div(returns.std(axis=0), axis=1) | |
base = base.sort_index(axis=1) | |
# Procedemos a calcular correlación y covarianza | |
corr, covs = base.corr(), base.cov() | |
file = to_excel(corr) | |
# Definimos la matriz de distancia | |
dist_matrix = dist(corr) | |
hierarchy = linkage(dist_matrix) | |
ct = 0.54 * max(hierarchy[:, 2]) | |
fig = ff.create_dendrogram(dist_matrix, orientation='left', labels=list(base.columns), | |
color_threshold=ct, linkagefun=linkage) | |
fig.update_layout(title='{} desde {} hasta {}'.format(country, | |
returns.index[0].date(), | |
returns.index[-1].date())) | |
if country == 'Brazil': | |
fig.update_layout(height=2000) | |
else: | |
fig.update_layout(height=900) | |
st.plotly_chart(fig, use_container_width=True) | |
st.download_button(label='Descargar Matriz de Correlación para {}'.format(country), data=file, | |
file_name='{}_correlacion.xlsx'.format(country)) | |
def clustering_con_dtw(): | |
form = st.form("Dynamic Time Warping Clustering") | |
posible_countries = ('Todos', 'Brazil', 'Argentina', 'Chile', 'Colombia', 'Mexico', 'Peru') | |
countries = form.multiselect('¿Qué países desea visualizar?', posible_countries) | |
if 'Todos' in countries: | |
countries = ('Brazil', 'Argentina', 'Chile', 'Colombia', 'Mexico', 'Peru') | |
adtv_p = form.number_input('Ingrese el mínimo Average Daily Traded Value que desea considerar', value=1., | |
format="%.2f") | |
mktcap_thresh = form.number_input('Ingrese el Mínimo Market Cap que desea considerar', value=200., format="%.2f") | |
start_date = form.selectbox('Ingrese la fecha de inicio que desea considerar', ('3 Meses', '6 Meses', '1 Año')) | |
period = form.number_input('Defina la frecuencia en la que desea las observaciones (en días)', value=1) | |
accept = form.form_submit_button('Aceptar') | |
if accept: | |
start_date = str(date.today() - relativedelta(months=int(start_date[0]))) | |
for country in countries: | |
data_, adtv_, marketcap_ = data_request(country, start_date) | |
# Filtramos para que se cumplan los filtros del usuario en los datos | |
returns = data_filter(data_, adtv_, marketcap_, adtv_p, mktcap_thresh, period) | |
# Normalizamos returns | |
base = (returns.subtract(returns.mean(0), axis=1)).div(returns.std(axis=0), axis=1) | |
base = base.sort_index(axis=1) | |
# Procedemos a calcular correlación y covarianza | |
N = len(base[:base.index[0]].T) | |
# Creamos la Matriz de Distancias para DTW | |
Dist = np.zeros((N, N)) | |
place = st.empty() | |
for i in range(N): | |
place.write("Cargando: " + str(round(i*100/N)) + " %") | |
for j in range(i - 1, N): | |
company_1 = base.columns[i] | |
company_2 = base.columns[j] | |
Dist[i, j] = get_dtw_distance(base[company_1], base[company_2]) | |
# La matriz es simétrica | |
Dist[j, i] = Dist[i, j] | |
# Creamos un DataFrame con la matriz de distancias | |
df = pd.DataFrame(Dist) | |
df.index = base.columns | |
df.columns = base.columns | |
# Pasamos el df a excel para descarga del usuario | |
file = to_excel(df) | |
hierarchy = linkage(Dist) | |
ct = 0.54 * max(hierarchy[:, 2]) | |
fig = ff.create_dendrogram(Dist, orientation='left', labels=list(base.columns), | |
color_threshold=ct, linkagefun=linkage) | |
fig.update_layout( | |
title='{} desde {} hasta {}'.format(country, returns.index[0].date(), returns.index[-1].date())) | |
if country == 'Brazil': | |
fig.update_layout(height=2000) | |
else: | |
fig.update_layout(height=900) | |
st.plotly_chart(fig, use_container_width=True) | |
st.download_button(label='Descargar Matriz de distancias con DTW para {}'.format(country), data=file, | |
file_name='{}_correlacion.xlsx'.format(country)) | |