Spaces:
Configuration error
Configuration error
# Imports | |
import pandas as pd | |
import streamlit as st | |
import pybase64 as base64 | |
# from modules.tables import RealEstateMaste | |
from sqlalchemy import create_engine | |
import plotly.express as px | |
import io | |
import urllib.request | |
import bs4 as bs | |
import scipy.stats as stats | |
import plotly.figure_factory as ff | |
import scipy.stats as stats | |
import plotly.graph_objects as go | |
import numpy as np | |
from datetime import date | |
from datetime import timedelta | |
from Data.credentials import credentials_postgresql as credpost | |
def formatnum(numero): | |
''' | |
Esta función permite dar formato a los montos de saldo y valor cuota en | |
las cartolas. | |
''' | |
return '{:,.0f}'.format(numero).replace(",", "@").replace(".", ",").replace("@", ".") | |
def get_UF(): | |
link = "https://valoruf.cl/" | |
req = urllib.request.Request(link) | |
res = urllib.request.urlopen(req) | |
resData = res.read() | |
soup = bs.BeautifulSoup(resData) | |
uf = soup.find(class_="vpr").contents[0] | |
uf_value = uf.split(' ')[1] | |
uf_value = uf_value.replace('.', '') | |
uf_value = uf_value.replace(',', '.') | |
return uf_value | |
def button_style(): | |
style_button = """ | |
<style> | |
button { | |
display: inline-block; | |
background-color: #e8e8e8; | |
border-radius: 15px; | |
border: 4px #cccccc; | |
color: #4a4a4a; | |
text-align: center; | |
font-size: 18px; | |
padding: 2px; | |
width: 300px; | |
transition: all 0.5s; | |
cursor: pointer; | |
margin: 5px; | |
} | |
button span { | |
cursor: pointer; | |
display: inline-block; | |
position: relative; | |
transition: 0.5s; | |
} | |
button span:after { | |
content: '\00bb'; | |
position: absolute; | |
opacity: 0; | |
top: 0; | |
right: -20px; | |
transition: 0.5s; | |
} | |
button:hover { | |
background-color: #bb1114; | |
color:#e8e8e8; | |
} | |
button:hover span { | |
padding-right: 25px; | |
} | |
button:hover span:after { | |
opacity: 1; | |
right: 0; | |
} | |
</style> | |
""" | |
st.markdown(style_button, unsafe_allow_html=True) | |
def plot_column(data, column, nom_var_inv, placeholder, sigmas=3, limite=None): | |
data = data.rename(columns=nom_var_inv) | |
column = nom_var_inv[column] | |
limit = data[column].mean() + sigmas * data[column].std() | |
data = data.loc[data[column] <= limit] | |
if limite is not None: | |
data = data.loc[data[column] <= limite] | |
fig = px.scatter_mapbox(data_frame=data, lat='latitud', lon='longitud', | |
zoom=10, color=column, | |
hover_data=['codigo', 'Superficie', 'dormitorios', 'banos', 'Precio en CLP'], | |
color_continuous_scale=px.colors.diverging.RdYlBu, | |
width=1000, height=800) | |
placeholder.plotly_chart(fig, use_container_width=True) | |
def plot_column2(data, column, nom_var_inv, placeholder, zoom, sigmas=10, limite=None): | |
data = data.rename(columns=nom_var_inv) | |
column = nom_var_inv[column] | |
limit = data[column].mean() + sigmas * data[column].std() | |
data = data.loc[data[column] <= limit] | |
if limite is not None: | |
data = data.loc[data[column] <= limite] | |
fig = px.scatter_mapbox(data_frame=data, lat='latitud', lon='longitud', | |
zoom=zoom, color=column, | |
hover_data=['codigo', 'Superficie', 'dormitorios', 'banos', 'Precio en CLP', 'Comuna'], | |
color_continuous_scale=px.colors.diverging.RdYlBu, | |
width=1000, height=800,size='Superficie',size_max=25) | |
placeholder.plotly_chart(fig, use_container_width=True) | |
def get_comuna(direction): | |
values = direction.split(',') | |
return values[-2] | |
def unit_separator(string): | |
values = string.split(" ") | |
return values[0] | |
def transform_df(scraping_df): | |
# Extract numbers from string | |
# Change datatype | |
# Add 'Comuna' and 'Región' | |
scraping_df[["Comuna"]] = scraping_df[["direccion"]].applymap(lambda x: get_comuna(x)) | |
# Scrap UF value for date | |
#scraping_df[["Valor UF"]] = get_UF() | |
return scraping_df | |
def get_table_excel_link(df, name): | |
towrite = io.BytesIO() | |
downloaded_file = df.to_excel(towrite, encoding='utf-8', index=False, | |
header=True) | |
towrite.seek(0) # reset pointer | |
file_name = 'Data' + name + '.xlsx' | |
style = 'style="color:black;text-decoration: none; font-size:18px;"' | |
name_mark = "Descargar " + name + ".xlsx" | |
b64 = base64.b64encode(towrite.read()).decode() # some strings | |
linko= f'<center><a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" '+style+'download="'+file_name+'"><button>'+name_mark+'</button></a></center>' | |
return linko | |
def query_inmob(): | |
url = credpost["POSTGRESQL"] | |
engine = create_engine(url, echo=False) | |
semana_pasada = date.today() - timedelta(7) | |
semana_pasada = semana_pasada.strftime("%Y-%m-%d") | |
data = pd.read_sql_query("""select * from scraping_inmob where Fecha > | |
'{Fecha} 00:00:00' ORDER BY Fecha desc""".format(Fecha=semana_pasada) , con=engine) | |
return data | |
def run_scrapping(): | |
button_style() | |
px.set_mapbox_access_token('pk.eyJ1IjoibW9rc2VuYmVyZyIsImEiOiJja3QwOTc3dHgyNzBhMnFsczJ2Y2w3bWJlIn0.m8c3duvR5hQVjbjEByorWQ') | |
data = query_inmob() | |
precio_uf = get_UF() | |
data["valor_uf"] = data["valor_peso"]/float(precio_uf) | |
data['predicted_venta_por_m2_UF']=data['predicted_venta_por_m2']/float(precio_uf) | |
nom_var = {"Precio en UF": "valor_uf", | |
"Precio en CLP": "valor_peso", | |
"Superficie": "superficie", | |
"Predicted venta por M2 en CLP": "predicted_venta_por_m2", | |
"Predicted venta por M2 en UF": "predicted_venta_por_m2_UF", | |
"Predicted arriendo por M2": "predicted_arriendo_por_m2", | |
"Predicted yield anual": "predicted_yield_anual" | |
} | |
nom_var_inv = {v: k for k, v in nom_var.items()} | |
var = list(nom_var.keys()) | |
with st.form(key='my_form'): | |
col1, col2, col3, col4 = st.columns((9, 1, 9, 1)) | |
cols = st.columns((9, 1, 5, 5)) | |
pre_selection = col1.selectbox("Variable", var) | |
selection = nom_var[pre_selection] | |
mercado = col3.selectbox("Mercado", ["Venta", "Arriendo", "Todos"]) | |
if mercado != "Todos": | |
mercado = mercado.lower() | |
data_mercado = data[data["mercado"] == mercado] | |
else: | |
data_mercado = data | |
cifras = len(str(int(max(data_mercado[selection])))) | |
tipo_prop = cols[0].selectbox("Tipo de propiedad", | |
["Todos", "Casa", "Departamento"]) | |
if tipo_prop != "Todos": | |
tipo_prop2 = tipo_prop.lower() | |
data_mercado = data_mercado[data_mercado["tipo_propiedad"] == tipo_prop2] | |
if cifras > 3: | |
corte = 10 ** (cifras - 3) | |
cota_final = (int(max(data_mercado[selection].dropna()))//corte+1) *corte | |
cota_inf = cols[2].number_input(label="Valor Mínimo", | |
min_value=0, | |
value=0, | |
max_value=cota_final | |
) | |
cota_sup = cols[3].number_input(label="Valor Máximo", | |
min_value=0, | |
value=cota_final, | |
max_value=cota_final | |
) | |
else: | |
cota_final = max(data_mercado[selection].dropna())+1 | |
cota_inf = cols[2].number_input(label="Valor Mínimo", | |
min_value=0.0, | |
value=0.0, | |
step=0.1, | |
max_value=cota_final | |
) | |
cota_sup = cols[3].number_input(label="Valor Máximo", | |
min_value=0.0, | |
step=0.1, | |
value=cota_final, | |
max_value=cota_final | |
) | |
data_final = data_mercado[data_mercado[selection] < cota_sup] | |
data_final = data_final[data_final[selection] > cota_inf] | |
submit_button = st.form_submit_button(label='Actualizar') | |
col1, col2 = st.columns((0.65, 1)) | |
placeholder = st.empty() | |
placeholder2 = st.empty() | |
placeholder2.markdown(get_table_excel_link(data, "Data Completa"), | |
unsafe_allow_html=True) | |
plot_column(data, selection, nom_var_inv, placeholder) | |
if submit_button: | |
plot_column(data_final, selection, nom_var_inv, placeholder) | |
st.markdown(get_table_excel_link(data_final, " Data filtrada"), | |
unsafe_allow_html=True) | |
def scraping_localizado(): | |
button_style() | |
data = query_inmob() | |
data = transform_df(data) | |
precio_uf = get_UF() | |
data["valor_uf"] = data["valor_peso"]/float(precio_uf) | |
data['predicted_venta_por_m2_UF'] = data['predicted_venta_por_m2']/float(precio_uf) | |
nom_var = {"Precio en UF": "valor_uf", | |
"Precio en CLP": "valor_peso", | |
"Superficie": "superficie", | |
"Predicted venta por M2 en CLP": "predicted_venta_por_m2", | |
"Predicted venta por M2 en UF": "predicted_venta_por_m2_UF", | |
"Predicted arriendo por M2": "predicted_arriendo_por_m2", | |
"Predicted yield anual": "predicted_yield_anual" | |
} | |
nom_var_inv = {v : k for k, v in nom_var.items()} | |
var = list(nom_var.keys()) | |
with st.form(key='my_form'): | |
col1, col2, col3, col4 = st.columns((9, 1, 9, 1)) | |
cols = st.columns((9, 1, 5, 5)) | |
pre_selection = col1.selectbox("Variable", var) | |
selection = nom_var[pre_selection] | |
mercado = col3.selectbox("Mercado", ["Venta", "Arriendo", "Todos"]) | |
banos = col1.slider("Baños", value=(1, 5), min_value=0, max_value=10) | |
dormitorios = col3.slider("Dormitorios", value=(1, 5), min_value=0, | |
max_value=10) | |
com = ["Todas"] + sorted(list(dict.fromkeys(data["Comuna"]))) | |
comuna = col1.selectbox("Comuna", com) | |
zoom = col3.number_input("Zoom", value=12) | |
if mercado != "Todos": | |
mercado = mercado.lower() | |
data_mercado = data[data["mercado"] == mercado] | |
else: | |
data_mercado = data | |
if comuna != "Todas": | |
data_mercado = data_mercado[data_mercado["Comuna"] == comuna] | |
if len(data_mercado) > 0: | |
cifras = len(str(int(max(data_mercado[selection].dropna())))) | |
else: | |
cifras = 0 | |
tipo_prop = cols[0].selectbox("Tipo de Propiedad", | |
["Todos", "Casa", "Departamento"]) | |
if tipo_prop != "Todos": | |
tipo_prop2 = tipo_prop.lower() | |
data_mercado = data_mercado[data_mercado["tipo_propiedad"] == tipo_prop2] | |
if cifras > 3: | |
corte = 10 ** (cifras - 3) | |
cota_final = (int(max(data_mercado[selection].dropna()))//corte+1)*corte | |
cota_inf = cols[2].number_input(label="Valor Mínimo", | |
min_value=0, | |
value=0, | |
max_value=cota_final | |
) | |
cota_sup = cols[3].number_input(label="Valor Máximo", | |
min_value=0, | |
value=cota_final, | |
max_value=cota_final | |
) | |
else: | |
cota_final = max(data_mercado[selection].dropna())+1 | |
cota_inf = cols[2].number_input(label="Valor Mínimo", | |
min_value=0.0, | |
value=0.0, | |
step=0.1, | |
max_value=cota_final | |
) | |
cota_sup = cols[3].number_input(label="Valor Máximo", | |
min_value=0.0, | |
step=0.1, | |
value=cota_final, | |
max_value=cota_final | |
) | |
data_final = data_mercado[data_mercado[selection] < cota_sup] | |
data_final = data_final[data_final[selection] > cota_inf] | |
data_final = data_final[data_final["banos"] > banos[0]] | |
data_final = data_final[data_final["banos"] < banos[1]] | |
data_final = data_final[data_final["dormitorios"] > dormitorios[0]] | |
data_final = data_final[data_final["dormitorios"] < dormitorios[1]] | |
st.write('Stats principales del análisis') | |
st.write('Valor promedio en UF por metro cuadrado: {:.2f} UF'.format((data_final['valor_uf']/data_final['superficie']).mean())) | |
st.write('Superficie promedio: {:.0f} m2'.format(data_final['superficie'].mean())) | |
st.write('N° de dormitorios promedio: {:.2f}'.format(data_final['dormitorios'].mean())) | |
st.write('N° de baños promedio: {:.2f}'.format(data_final['banos'].mean())) | |
submit_button = st.form_submit_button(label='Actualizar') | |
col1, col2 = st.columns((0.65, 1)) | |
placeholder = st.empty() | |
placeholder2 = st.empty() | |
placeholder3 = st.empty() | |
placeholder2.markdown(get_table_excel_link(data, "Data Completa"), | |
unsafe_allow_html=True) | |
plot_column2(data, selection, nom_var_inv, placeholder, zoom) | |
dist_gamma = stats.gamma.rvs(1, scale=250000, size=2965) | |
if submit_button: | |
plot_column2(data_final, selection, nom_var_inv, placeholder, zoom) | |
st.markdown(get_table_excel_link(data_final, " Data filtrada"), | |
unsafe_allow_html=True) | |
data_hist = data_final[data_final["mercado"] == "venta"] | |
data_hist = data_hist[data_hist["valor_peso"] > 40000000] | |
data_hist = data_hist[data_hist["valor_peso"] < 1000000000]["valor_peso"] * 0.2 | |
fig2 = px.histogram((data_hist), x='valor_peso') | |
#placeholder3.plotly_chart(fig2) | |
# streamlit run analisis_inmob.py | |
# scraping_localizado() | |