PortalLVAM / apps /analisis_inmob.py
bullm's picture
ldsakjl
025632f
raw
history blame
15 kB
# Imports
import pandas as pd
import streamlit as st
import pybase64 as base64
# from modules.tables import RealEstateMaste
from sqlalchemy import create_engine
import plotly.express as px
import io
import urllib.request
import bs4 as bs
import scipy.stats as stats
import plotly.figure_factory as ff
import scipy.stats as stats
import plotly.graph_objects as go
import numpy as np
from datetime import date
from datetime import timedelta
from Data.credentials import credentials_postgresql as credpost
def formatnum(numero):
'''
Esta función permite dar formato a los montos de saldo y valor cuota en
las cartolas.
'''
return '{:,.0f}'.format(numero).replace(",", "@").replace(".", ",").replace("@", ".")
def get_UF():
link = "https://valoruf.cl/"
req = urllib.request.Request(link)
res = urllib.request.urlopen(req)
resData = res.read()
soup = bs.BeautifulSoup(resData)
uf = soup.find(class_="vpr").contents[0]
uf_value = uf.split(' ')[1]
uf_value = uf_value.replace('.', '')
uf_value = uf_value.replace(',', '.')
return uf_value
def button_style():
style_button = """
<style>
button {
display: inline-block;
background-color: #e8e8e8;
border-radius: 15px;
border: 4px #cccccc;
color: #4a4a4a;
text-align: center;
font-size: 18px;
padding: 2px;
width: 300px;
transition: all 0.5s;
cursor: pointer;
margin: 5px;
}
button span {
cursor: pointer;
display: inline-block;
position: relative;
transition: 0.5s;
}
button span:after {
content: '\00bb';
position: absolute;
opacity: 0;
top: 0;
right: -20px;
transition: 0.5s;
}
button:hover {
background-color: #bb1114;
color:#e8e8e8;
}
button:hover span {
padding-right: 25px;
}
button:hover span:after {
opacity: 1;
right: 0;
}
</style>
"""
st.markdown(style_button, unsafe_allow_html=True)
def plot_column(data, column, nom_var_inv, placeholder, sigmas=3, limite=None):
data = data.rename(columns=nom_var_inv)
column = nom_var_inv[column]
limit = data[column].mean() + sigmas * data[column].std()
data = data.loc[data[column] <= limit]
if limite is not None:
data = data.loc[data[column] <= limite]
fig = px.scatter_mapbox(data_frame=data, lat='latitud', lon='longitud',
zoom=10, color=column,
hover_data=['codigo', 'Superficie', 'dormitorios', 'banos', 'Precio en CLP'],
color_continuous_scale=px.colors.diverging.RdYlBu,
width=1000, height=800)
placeholder.plotly_chart(fig, use_container_width=True)
def plot_column2(data, column, nom_var_inv, placeholder, zoom, sigmas=10, limite=None):
data = data.rename(columns=nom_var_inv)
column = nom_var_inv[column]
limit = data[column].mean() + sigmas * data[column].std()
data = data.loc[data[column] <= limit]
if limite is not None:
data = data.loc[data[column] <= limite]
fig = px.scatter_mapbox(data_frame=data, lat='latitud', lon='longitud',
zoom=zoom, color=column,
hover_data=['codigo', 'Superficie', 'dormitorios', 'banos', 'Precio en CLP', 'Comuna'],
color_continuous_scale=px.colors.diverging.RdYlBu,
width=1000, height=800,size='Superficie',size_max=25)
placeholder.plotly_chart(fig, use_container_width=True)
def get_comuna(direction):
values = direction.split(',')
return values[-2]
def unit_separator(string):
values = string.split(" ")
return values[0]
def transform_df(scraping_df):
# Extract numbers from string
# Change datatype
# Add 'Comuna' and 'Región'
scraping_df[["Comuna"]] = scraping_df[["direccion"]].applymap(lambda x: get_comuna(x))
# Scrap UF value for date
#scraping_df[["Valor UF"]] = get_UF()
return scraping_df
def get_table_excel_link(df, name):
towrite = io.BytesIO()
downloaded_file = df.to_excel(towrite, encoding='utf-8', index=False,
header=True)
towrite.seek(0) # reset pointer
file_name = 'Data' + name + '.xlsx'
style = 'style="color:black;text-decoration: none; font-size:18px;"'
name_mark = "Descargar " + name + ".xlsx"
b64 = base64.b64encode(towrite.read()).decode() # some strings
linko= f'<center><a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" '+style+'download="'+file_name+'"><button>'+name_mark+'</button></a></center>'
return linko
@st.cache(allow_output_mutation=True)
def query_inmob():
url = credpost["POSTGRESQL"]
engine = create_engine(url, echo=False)
semana_pasada = date.today() - timedelta(7)
semana_pasada = semana_pasada.strftime("%Y-%m-%d")
data = pd.read_sql_query("""select * from scraping_inmob where Fecha >
'{Fecha} 00:00:00' ORDER BY Fecha desc""".format(Fecha=semana_pasada) , con=engine)
return data
def run_scrapping():
button_style()
px.set_mapbox_access_token('pk.eyJ1IjoibW9rc2VuYmVyZyIsImEiOiJja3QwOTc3dHgyNzBhMnFsczJ2Y2w3bWJlIn0.m8c3duvR5hQVjbjEByorWQ')
data = query_inmob()
precio_uf = get_UF()
data["valor_uf"] = data["valor_peso"]/float(precio_uf)
data['predicted_venta_por_m2_UF']=data['predicted_venta_por_m2']/float(precio_uf)
nom_var = {"Precio en UF": "valor_uf",
"Precio en CLP": "valor_peso",
"Superficie": "superficie",
"Predicted venta por M2 en CLP": "predicted_venta_por_m2",
"Predicted venta por M2 en UF": "predicted_venta_por_m2_UF",
"Predicted arriendo por M2": "predicted_arriendo_por_m2",
"Predicted yield anual": "predicted_yield_anual"
}
nom_var_inv = {v: k for k, v in nom_var.items()}
var = list(nom_var.keys())
with st.form(key='my_form'):
col1, col2, col3, col4 = st.columns((9, 1, 9, 1))
cols = st.columns((9, 1, 5, 5))
pre_selection = col1.selectbox("Variable", var)
selection = nom_var[pre_selection]
mercado = col3.selectbox("Mercado", ["Venta", "Arriendo", "Todos"])
if mercado != "Todos":
mercado = mercado.lower()
data_mercado = data[data["mercado"] == mercado]
else:
data_mercado = data
cifras = len(str(int(max(data_mercado[selection]))))
tipo_prop = cols[0].selectbox("Tipo de propiedad",
["Todos", "Casa", "Departamento"])
if tipo_prop != "Todos":
tipo_prop2 = tipo_prop.lower()
data_mercado = data_mercado[data_mercado["tipo_propiedad"] == tipo_prop2]
if cifras > 3:
corte = 10 ** (cifras - 3)
cota_final = (int(max(data_mercado[selection].dropna()))//corte+1) *corte
cota_inf = cols[2].number_input(label="Valor Mínimo",
min_value=0,
value=0,
max_value=cota_final
)
cota_sup = cols[3].number_input(label="Valor Máximo",
min_value=0,
value=cota_final,
max_value=cota_final
)
else:
cota_final = max(data_mercado[selection].dropna())+1
cota_inf = cols[2].number_input(label="Valor Mínimo",
min_value=0.0,
value=0.0,
step=0.1,
max_value=cota_final
)
cota_sup = cols[3].number_input(label="Valor Máximo",
min_value=0.0,
step=0.1,
value=cota_final,
max_value=cota_final
)
data_final = data_mercado[data_mercado[selection] < cota_sup]
data_final = data_final[data_final[selection] > cota_inf]
submit_button = st.form_submit_button(label='Actualizar')
col1, col2 = st.columns((0.65, 1))
placeholder = st.empty()
placeholder2 = st.empty()
placeholder2.markdown(get_table_excel_link(data, "Data Completa"),
unsafe_allow_html=True)
plot_column(data, selection, nom_var_inv, placeholder)
if submit_button:
plot_column(data_final, selection, nom_var_inv, placeholder)
st.markdown(get_table_excel_link(data_final, " Data filtrada"),
unsafe_allow_html=True)
def scraping_localizado():
button_style()
data = query_inmob()
data = transform_df(data)
precio_uf = get_UF()
data["valor_uf"] = data["valor_peso"]/float(precio_uf)
data['predicted_venta_por_m2_UF'] = data['predicted_venta_por_m2']/float(precio_uf)
nom_var = {"Precio en UF": "valor_uf",
"Precio en CLP": "valor_peso",
"Superficie": "superficie",
"Predicted venta por M2 en CLP": "predicted_venta_por_m2",
"Predicted venta por M2 en UF": "predicted_venta_por_m2_UF",
"Predicted arriendo por M2": "predicted_arriendo_por_m2",
"Predicted yield anual": "predicted_yield_anual"
}
nom_var_inv = {v : k for k, v in nom_var.items()}
var = list(nom_var.keys())
with st.form(key='my_form'):
col1, col2, col3, col4 = st.columns((9, 1, 9, 1))
cols = st.columns((9, 1, 5, 5))
pre_selection = col1.selectbox("Variable", var)
selection = nom_var[pre_selection]
mercado = col3.selectbox("Mercado", ["Venta", "Arriendo", "Todos"])
banos = col1.slider("Baños", value=(1, 5), min_value=0, max_value=10)
dormitorios = col3.slider("Dormitorios", value=(1, 5), min_value=0,
max_value=10)
com = ["Todas"] + sorted(list(dict.fromkeys(data["Comuna"])))
comuna = col1.selectbox("Comuna", com)
zoom = col3.number_input("Zoom", value=12)
if mercado != "Todos":
mercado = mercado.lower()
data_mercado = data[data["mercado"] == mercado]
else:
data_mercado = data
if comuna != "Todas":
data_mercado = data_mercado[data_mercado["Comuna"] == comuna]
if len(data_mercado) > 0:
cifras = len(str(int(max(data_mercado[selection].dropna()))))
else:
cifras = 0
tipo_prop = cols[0].selectbox("Tipo de Propiedad",
["Todos", "Casa", "Departamento"])
if tipo_prop != "Todos":
tipo_prop2 = tipo_prop.lower()
data_mercado = data_mercado[data_mercado["tipo_propiedad"] == tipo_prop2]
if cifras > 3:
corte = 10 ** (cifras - 3)
cota_final = (int(max(data_mercado[selection].dropna()))//corte+1)*corte
cota_inf = cols[2].number_input(label="Valor Mínimo",
min_value=0,
value=0,
max_value=cota_final
)
cota_sup = cols[3].number_input(label="Valor Máximo",
min_value=0,
value=cota_final,
max_value=cota_final
)
else:
cota_final = max(data_mercado[selection].dropna())+1
cota_inf = cols[2].number_input(label="Valor Mínimo",
min_value=0.0,
value=0.0,
step=0.1,
max_value=cota_final
)
cota_sup = cols[3].number_input(label="Valor Máximo",
min_value=0.0,
step=0.1,
value=cota_final,
max_value=cota_final
)
data_final = data_mercado[data_mercado[selection] < cota_sup]
data_final = data_final[data_final[selection] > cota_inf]
data_final = data_final[data_final["banos"] > banos[0]]
data_final = data_final[data_final["banos"] < banos[1]]
data_final = data_final[data_final["dormitorios"] > dormitorios[0]]
data_final = data_final[data_final["dormitorios"] < dormitorios[1]]
st.write('Stats principales del análisis')
st.write('Valor promedio en UF por metro cuadrado: {:.2f} UF'.format((data_final['valor_uf']/data_final['superficie']).mean()))
st.write('Superficie promedio: {:.0f} m2'.format(data_final['superficie'].mean()))
st.write('N° de dormitorios promedio: {:.2f}'.format(data_final['dormitorios'].mean()))
st.write('N° de baños promedio: {:.2f}'.format(data_final['banos'].mean()))
submit_button = st.form_submit_button(label='Actualizar')
col1, col2 = st.columns((0.65, 1))
placeholder = st.empty()
placeholder2 = st.empty()
placeholder3 = st.empty()
placeholder2.markdown(get_table_excel_link(data, "Data Completa"),
unsafe_allow_html=True)
plot_column2(data, selection, nom_var_inv, placeholder, zoom)
dist_gamma = stats.gamma.rvs(1, scale=250000, size=2965)
if submit_button:
plot_column2(data_final, selection, nom_var_inv, placeholder, zoom)
st.markdown(get_table_excel_link(data_final, " Data filtrada"),
unsafe_allow_html=True)
data_hist = data_final[data_final["mercado"] == "venta"]
data_hist = data_hist[data_hist["valor_peso"] > 40000000]
data_hist = data_hist[data_hist["valor_peso"] < 1000000000]["valor_peso"] * 0.2
fig2 = px.histogram((data_hist), x='valor_peso')
#placeholder3.plotly_chart(fig2)
# streamlit run analisis_inmob.py
# scraping_localizado()