Spaces:
Sleeping
Sleeping
Added weekly data
Browse files- apps/housing.py +140 -66
apps/housing.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import os
|
2 |
import pathlib
|
3 |
import requests
|
@@ -51,19 +52,66 @@ data_links = {
|
|
51 |
}
|
52 |
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
@st.cache
|
55 |
def get_inventory_data(url):
|
56 |
df = pd.read_csv(url)
|
57 |
-
|
|
|
58 |
df["county_fips"] = df["county_fips"].map(str)
|
59 |
df["county_fips"] = df["county_fips"].str.zfill(5)
|
60 |
-
elif "
|
61 |
df["STUSPS"] = df["state_id"].str.upper()
|
62 |
-
elif "
|
63 |
df["cbsa_code"] = df["cbsa_code"].map(str)
|
64 |
-
elif "
|
65 |
df["postal_code"] = df["postal_code"].map(str)
|
66 |
df["postal_code"] = df["postal_code"].str.zfill(5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
return df
|
68 |
|
69 |
|
@@ -77,28 +125,6 @@ def get_periods(df):
|
|
77 |
return [str(d) for d in list(set(df["month_date_yyyymm"].tolist()))]
|
78 |
|
79 |
|
80 |
-
def get_data_columns(df, category):
|
81 |
-
if category.lower() == "county":
|
82 |
-
del_cols = ["month_date_yyyymm", "county_fips", "county_name"]
|
83 |
-
elif category.lower() == "state":
|
84 |
-
del_cols = ["month_date_yyyymm", "state", "state_id"]
|
85 |
-
elif category.lower() == "national":
|
86 |
-
del_cols = ["month_date_yyyymm", "country"]
|
87 |
-
elif category.lower() == "metro":
|
88 |
-
del_cols = ["month_date_yyyymm", "cbsa_code", "cbsa_title", "HouseholdRank"]
|
89 |
-
elif category.lower() == "zip":
|
90 |
-
del_cols = ["month_date_yyyymm", "postal_code", "zip_name", "flag"]
|
91 |
-
cols = df.columns.values.tolist()
|
92 |
-
|
93 |
-
for col in cols:
|
94 |
-
if col.strip() in del_cols:
|
95 |
-
cols.remove(col)
|
96 |
-
if category.lower() == "metro":
|
97 |
-
return cols[2:]
|
98 |
-
else:
|
99 |
-
return cols[1:]
|
100 |
-
|
101 |
-
|
102 |
@st.cache
|
103 |
def get_geom_data(category):
|
104 |
|
@@ -134,6 +160,8 @@ def join_attributes(gdf, df, category):
|
|
134 |
elif category == "state":
|
135 |
new_gdf = gdf.merge(df, left_on="STUSPS", right_on="STUSPS", how="outer")
|
136 |
elif category == "national":
|
|
|
|
|
137 |
new_gdf = gdf.merge(df, left_on="NAME", right_on="country", how="outer")
|
138 |
elif category == "metro":
|
139 |
new_gdf = gdf.merge(df, left_on="CBSAFP", right_on="cbsa_code", how="outer")
|
@@ -160,6 +188,21 @@ def get_data_dict(name):
|
|
160 |
return label, desc
|
161 |
|
162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
def app():
|
164 |
|
165 |
st.title("Real Estate Data and Market Trends")
|
@@ -173,62 +216,93 @@ def app():
|
|
173 |
[0.6, 0.8, 0.6, 1.4, 2]
|
174 |
)
|
175 |
with row1_col1:
|
176 |
-
frequency = st.selectbox("Monthly/weekly data", ["Monthly"])
|
177 |
-
# frequency = st.selectbox("Monthly/weekly data", ["Monthly", "Weekly"])
|
178 |
with row1_col2:
|
|
|
|
|
|
|
179 |
cur_hist = st.selectbox(
|
180 |
"Current/historical data",
|
181 |
-
|
182 |
)
|
183 |
with row1_col3:
|
184 |
-
|
|
|
|
|
|
|
|
|
|
|
185 |
|
186 |
gdf = get_geom_data(scale.lower())
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
with
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
inventory_df = get_inventory_data(
|
193 |
-
data_links["
|
194 |
)
|
195 |
-
|
196 |
-
|
197 |
-
with
|
198 |
-
|
199 |
-
"
|
200 |
-
start_year,
|
201 |
-
end_year,
|
202 |
-
value=start_year,
|
203 |
-
step=1,
|
204 |
)
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
|
222 |
with row1_col4:
|
223 |
selected_col = st.selectbox("Attribute", data_cols)
|
224 |
with row1_col5:
|
225 |
show_desc = st.checkbox("Show attribute description")
|
226 |
if show_desc:
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
|
|
|
|
|
|
232 |
|
233 |
row2_col1, row2_col2, row2_col3, row2_col4 = st.columns([1, 1, 2, 2])
|
234 |
|
|
|
1 |
+
import datetime
|
2 |
import os
|
3 |
import pathlib
|
4 |
import requests
|
|
|
52 |
}
|
53 |
|
54 |
|
55 |
+
def get_data_columns(df, category, frequency="monthly"):
|
56 |
+
if frequency == "monthly":
|
57 |
+
if category.lower() == "county":
|
58 |
+
del_cols = ["month_date_yyyymm", "county_fips", "county_name"]
|
59 |
+
elif category.lower() == "state":
|
60 |
+
del_cols = ["month_date_yyyymm", "state", "state_id"]
|
61 |
+
elif category.lower() == "national":
|
62 |
+
del_cols = ["month_date_yyyymm", "country"]
|
63 |
+
elif category.lower() == "metro":
|
64 |
+
del_cols = ["month_date_yyyymm", "cbsa_code", "cbsa_title", "HouseholdRank"]
|
65 |
+
elif category.lower() == "zip":
|
66 |
+
del_cols = ["month_date_yyyymm", "postal_code", "zip_name", "flag"]
|
67 |
+
elif frequency == "weekly":
|
68 |
+
if category.lower() == "national":
|
69 |
+
del_cols = ["week_end_date", "geo_country"]
|
70 |
+
elif category.lower() == "metro":
|
71 |
+
del_cols = ["week_end_date", "cbsa_code", "cbsa_title", "hh_rank"]
|
72 |
+
|
73 |
+
cols = df.columns.values.tolist()
|
74 |
+
|
75 |
+
for col in cols:
|
76 |
+
if col.strip() in del_cols:
|
77 |
+
cols.remove(col)
|
78 |
+
if category.lower() == "metro":
|
79 |
+
return cols[2:]
|
80 |
+
else:
|
81 |
+
return cols[1:]
|
82 |
+
|
83 |
+
|
84 |
@st.cache
|
85 |
def get_inventory_data(url):
|
86 |
df = pd.read_csv(url)
|
87 |
+
url = url.lower()
|
88 |
+
if "county" in url:
|
89 |
df["county_fips"] = df["county_fips"].map(str)
|
90 |
df["county_fips"] = df["county_fips"].str.zfill(5)
|
91 |
+
elif "state" in url:
|
92 |
df["STUSPS"] = df["state_id"].str.upper()
|
93 |
+
elif "metro" in url:
|
94 |
df["cbsa_code"] = df["cbsa_code"].map(str)
|
95 |
+
elif "zip" in url:
|
96 |
df["postal_code"] = df["postal_code"].map(str)
|
97 |
df["postal_code"] = df["postal_code"].str.zfill(5)
|
98 |
+
|
99 |
+
if "listing_weekly_core_aggregate_by_country" in url:
|
100 |
+
columns = get_data_columns(df, "national", "weekly")
|
101 |
+
for column in columns:
|
102 |
+
if column != "median_days_on_market_by_day_yy":
|
103 |
+
df[column] = df[column].str.rstrip("%").astype(float) / 100
|
104 |
+
if "listing_weekly_core_aggregate_by_metro" in url:
|
105 |
+
columns = get_data_columns(df, "metro", "weekly")
|
106 |
+
for column in columns:
|
107 |
+
if column != "median_days_on_market_by_day_yy":
|
108 |
+
df[column] = df[column].str.rstrip("%").astype(float) / 100
|
109 |
+
|
110 |
+
return df
|
111 |
+
|
112 |
+
|
113 |
+
def filter_weekly_inventory(df, week):
|
114 |
+
df = df[df["week_end_date"] == week]
|
115 |
return df
|
116 |
|
117 |
|
|
|
125 |
return [str(d) for d in list(set(df["month_date_yyyymm"].tolist()))]
|
126 |
|
127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
@st.cache
|
129 |
def get_geom_data(category):
|
130 |
|
|
|
160 |
elif category == "state":
|
161 |
new_gdf = gdf.merge(df, left_on="STUSPS", right_on="STUSPS", how="outer")
|
162 |
elif category == "national":
|
163 |
+
if "geo_country" in df.columns.values.tolist():
|
164 |
+
df["country"] = "United States"
|
165 |
new_gdf = gdf.merge(df, left_on="NAME", right_on="country", how="outer")
|
166 |
elif category == "metro":
|
167 |
new_gdf = gdf.merge(df, left_on="CBSAFP", right_on="cbsa_code", how="outer")
|
|
|
188 |
return label, desc
|
189 |
|
190 |
|
191 |
+
def get_weeks(df):
|
192 |
+
weeks = [
|
193 |
+
datetime.date(int(d.split("/")[2]), int(d.split("/")[0]), int(d.split("/")[1]))
|
194 |
+
for d in list(set(df["week_end_date"].tolist()))
|
195 |
+
]
|
196 |
+
weeks.sort()
|
197 |
+
return weeks
|
198 |
+
|
199 |
+
|
200 |
+
def get_saturday(in_date):
|
201 |
+
idx = (in_date.weekday() + 1) % 7
|
202 |
+
sat = in_date + datetime.timedelta(6 - idx)
|
203 |
+
return sat
|
204 |
+
|
205 |
+
|
206 |
def app():
|
207 |
|
208 |
st.title("Real Estate Data and Market Trends")
|
|
|
216 |
[0.6, 0.8, 0.6, 1.4, 2]
|
217 |
)
|
218 |
with row1_col1:
|
219 |
+
frequency = st.selectbox("Monthly/weekly data", ["Monthly", "Weekly"])
|
|
|
220 |
with row1_col2:
|
221 |
+
types = ["Current month data", "Historical data"]
|
222 |
+
if frequency == "Weekly":
|
223 |
+
types.remove("Current month data")
|
224 |
cur_hist = st.selectbox(
|
225 |
"Current/historical data",
|
226 |
+
types,
|
227 |
)
|
228 |
with row1_col3:
|
229 |
+
if frequency == "Monthly":
|
230 |
+
scale = st.selectbox(
|
231 |
+
"Scale", ["National", "State", "Metro", "County"], index=3
|
232 |
+
)
|
233 |
+
else:
|
234 |
+
scale = st.selectbox("Scale", ["National", "Metro"], index=1)
|
235 |
|
236 |
gdf = get_geom_data(scale.lower())
|
237 |
+
|
238 |
+
if frequency == "Weekly":
|
239 |
+
inventory_df = get_inventory_data(data_links["weekly"][scale.lower()])
|
240 |
+
weeks = get_weeks(inventory_df)
|
241 |
+
with row1_col1:
|
242 |
+
selected_date = st.date_input("Select a date", value=weeks[-1])
|
243 |
+
saturday = get_saturday(selected_date)
|
244 |
+
selected_period = saturday.strftime("%-m/%-d/%Y")
|
245 |
+
if saturday not in weeks:
|
246 |
+
st.error(
|
247 |
+
"The selected date is not available in the data. Please select a date between {} and {}".format(
|
248 |
+
weeks[0], weeks[-1]
|
249 |
+
)
|
250 |
+
)
|
251 |
+
selected_period = weeks[-1].strftime("%-m/%-d/%Y")
|
252 |
+
inventory_df = get_inventory_data(data_links["weekly"][scale.lower()])
|
253 |
+
inventory_df = filter_weekly_inventory(inventory_df, selected_period)
|
254 |
+
|
255 |
+
if frequency == "Monthly":
|
256 |
+
if cur_hist == "Current month data":
|
257 |
inventory_df = get_inventory_data(
|
258 |
+
data_links["monthly_current"][scale.lower()]
|
259 |
)
|
260 |
+
selected_period = get_periods(inventory_df)[0]
|
261 |
+
else:
|
262 |
+
with row1_col2:
|
263 |
+
inventory_df = get_inventory_data(
|
264 |
+
data_links["monthly_historical"][scale.lower()]
|
|
|
|
|
|
|
|
|
265 |
)
|
266 |
+
start_year, end_year = get_start_end_year(inventory_df)
|
267 |
+
periods = get_periods(inventory_df)
|
268 |
+
with st.expander("Select year and month", True):
|
269 |
+
selected_year = st.slider(
|
270 |
+
"Year",
|
271 |
+
start_year,
|
272 |
+
end_year,
|
273 |
+
value=start_year,
|
274 |
+
step=1,
|
275 |
+
)
|
276 |
+
selected_month = st.slider(
|
277 |
+
"Month",
|
278 |
+
min_value=1,
|
279 |
+
max_value=12,
|
280 |
+
value=int(periods[0][-2:]),
|
281 |
+
step=1,
|
282 |
+
)
|
283 |
+
selected_period = str(selected_year) + str(selected_month).zfill(2)
|
284 |
+
if selected_period not in periods:
|
285 |
+
st.error("Data not available for selected year and month")
|
286 |
+
selected_period = periods[0]
|
287 |
+
inventory_df = inventory_df[
|
288 |
+
inventory_df["month_date_yyyymm"] == int(selected_period)
|
289 |
+
]
|
290 |
+
|
291 |
+
data_cols = get_data_columns(inventory_df, scale.lower(), frequency.lower())
|
292 |
|
293 |
with row1_col4:
|
294 |
selected_col = st.selectbox("Attribute", data_cols)
|
295 |
with row1_col5:
|
296 |
show_desc = st.checkbox("Show attribute description")
|
297 |
if show_desc:
|
298 |
+
try:
|
299 |
+
label, desc = get_data_dict(selected_col.strip())
|
300 |
+
markdown = f"""
|
301 |
+
**{label}**: {desc}
|
302 |
+
"""
|
303 |
+
st.markdown(markdown)
|
304 |
+
except:
|
305 |
+
st.warning("No description available for selected attribute")
|
306 |
|
307 |
row2_col1, row2_col2, row2_col3, row2_col4 = st.columns([1, 1, 2, 2])
|
308 |
|