giswqs commited on
Commit
3768e5d
·
1 Parent(s): 3fbbe2d

Added weekly data

Browse files
Files changed (1) hide show
  1. apps/housing.py +140 -66
apps/housing.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import pathlib
3
  import requests
@@ -51,19 +52,66 @@ data_links = {
51
  }
52
 
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  @st.cache
55
  def get_inventory_data(url):
56
  df = pd.read_csv(url)
57
- if "County" in url:
 
58
  df["county_fips"] = df["county_fips"].map(str)
59
  df["county_fips"] = df["county_fips"].str.zfill(5)
60
- elif "State" in url:
61
  df["STUSPS"] = df["state_id"].str.upper()
62
- elif "Metro" in url:
63
  df["cbsa_code"] = df["cbsa_code"].map(str)
64
- elif "Zip" in url:
65
  df["postal_code"] = df["postal_code"].map(str)
66
  df["postal_code"] = df["postal_code"].str.zfill(5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  return df
68
 
69
 
@@ -77,28 +125,6 @@ def get_periods(df):
77
  return [str(d) for d in list(set(df["month_date_yyyymm"].tolist()))]
78
 
79
 
80
- def get_data_columns(df, category):
81
- if category.lower() == "county":
82
- del_cols = ["month_date_yyyymm", "county_fips", "county_name"]
83
- elif category.lower() == "state":
84
- del_cols = ["month_date_yyyymm", "state", "state_id"]
85
- elif category.lower() == "national":
86
- del_cols = ["month_date_yyyymm", "country"]
87
- elif category.lower() == "metro":
88
- del_cols = ["month_date_yyyymm", "cbsa_code", "cbsa_title", "HouseholdRank"]
89
- elif category.lower() == "zip":
90
- del_cols = ["month_date_yyyymm", "postal_code", "zip_name", "flag"]
91
- cols = df.columns.values.tolist()
92
-
93
- for col in cols:
94
- if col.strip() in del_cols:
95
- cols.remove(col)
96
- if category.lower() == "metro":
97
- return cols[2:]
98
- else:
99
- return cols[1:]
100
-
101
-
102
  @st.cache
103
  def get_geom_data(category):
104
 
@@ -134,6 +160,8 @@ def join_attributes(gdf, df, category):
134
  elif category == "state":
135
  new_gdf = gdf.merge(df, left_on="STUSPS", right_on="STUSPS", how="outer")
136
  elif category == "national":
 
 
137
  new_gdf = gdf.merge(df, left_on="NAME", right_on="country", how="outer")
138
  elif category == "metro":
139
  new_gdf = gdf.merge(df, left_on="CBSAFP", right_on="cbsa_code", how="outer")
@@ -160,6 +188,21 @@ def get_data_dict(name):
160
  return label, desc
161
 
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  def app():
164
 
165
  st.title("Real Estate Data and Market Trends")
@@ -173,62 +216,93 @@ def app():
173
  [0.6, 0.8, 0.6, 1.4, 2]
174
  )
175
  with row1_col1:
176
- frequency = st.selectbox("Monthly/weekly data", ["Monthly"])
177
- # frequency = st.selectbox("Monthly/weekly data", ["Monthly", "Weekly"])
178
  with row1_col2:
 
 
 
179
  cur_hist = st.selectbox(
180
  "Current/historical data",
181
- ["Current month data", "Historical data"],
182
  )
183
  with row1_col3:
184
- scale = st.selectbox("Scale", ["National", "State", "Metro", "County"], index=3)
 
 
 
 
 
185
 
186
  gdf = get_geom_data(scale.lower())
187
- if cur_hist == "Current month data":
188
- inventory_df = get_inventory_data(data_links["monthly_current"][scale.lower()])
189
- selected_period = get_periods(inventory_df)[0]
190
- else:
191
- with row1_col2:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  inventory_df = get_inventory_data(
193
- data_links["monthly_historical"][scale.lower()]
194
  )
195
- start_year, end_year = get_start_end_year(inventory_df)
196
- periods = get_periods(inventory_df)
197
- with st.expander("Select year and month", True):
198
- selected_year = st.slider(
199
- "Year",
200
- start_year,
201
- end_year,
202
- value=start_year,
203
- step=1,
204
  )
205
- selected_month = st.slider(
206
- "Month",
207
- min_value=1,
208
- max_value=12,
209
- value=int(periods[0][-2:]),
210
- step=1,
211
- )
212
- selected_period = str(selected_year) + str(selected_month).zfill(2)
213
- if selected_period not in periods:
214
- st.error("Data not available for selected year and month")
215
- selected_period = periods[0]
216
- inventory_df = inventory_df[
217
- inventory_df["month_date_yyyymm"] == int(selected_period)
218
- ]
219
-
220
- data_cols = get_data_columns(inventory_df, scale.lower())
 
 
 
 
 
 
 
 
 
 
221
 
222
  with row1_col4:
223
  selected_col = st.selectbox("Attribute", data_cols)
224
  with row1_col5:
225
  show_desc = st.checkbox("Show attribute description")
226
  if show_desc:
227
- label, desc = get_data_dict(selected_col.strip())
228
- markdown = f"""
229
- **{label}**: {desc}
230
- """
231
- st.markdown(markdown)
 
 
 
232
 
233
  row2_col1, row2_col2, row2_col3, row2_col4 = st.columns([1, 1, 2, 2])
234
 
 
1
+ import datetime
2
  import os
3
  import pathlib
4
  import requests
 
52
  }
53
 
54
 
55
+ def get_data_columns(df, category, frequency="monthly"):
56
+ if frequency == "monthly":
57
+ if category.lower() == "county":
58
+ del_cols = ["month_date_yyyymm", "county_fips", "county_name"]
59
+ elif category.lower() == "state":
60
+ del_cols = ["month_date_yyyymm", "state", "state_id"]
61
+ elif category.lower() == "national":
62
+ del_cols = ["month_date_yyyymm", "country"]
63
+ elif category.lower() == "metro":
64
+ del_cols = ["month_date_yyyymm", "cbsa_code", "cbsa_title", "HouseholdRank"]
65
+ elif category.lower() == "zip":
66
+ del_cols = ["month_date_yyyymm", "postal_code", "zip_name", "flag"]
67
+ elif frequency == "weekly":
68
+ if category.lower() == "national":
69
+ del_cols = ["week_end_date", "geo_country"]
70
+ elif category.lower() == "metro":
71
+ del_cols = ["week_end_date", "cbsa_code", "cbsa_title", "hh_rank"]
72
+
73
+ cols = df.columns.values.tolist()
74
+
75
+ for col in cols:
76
+ if col.strip() in del_cols:
77
+ cols.remove(col)
78
+ if category.lower() == "metro":
79
+ return cols[2:]
80
+ else:
81
+ return cols[1:]
82
+
83
+
84
  @st.cache
85
  def get_inventory_data(url):
86
  df = pd.read_csv(url)
87
+ url = url.lower()
88
+ if "county" in url:
89
  df["county_fips"] = df["county_fips"].map(str)
90
  df["county_fips"] = df["county_fips"].str.zfill(5)
91
+ elif "state" in url:
92
  df["STUSPS"] = df["state_id"].str.upper()
93
+ elif "metro" in url:
94
  df["cbsa_code"] = df["cbsa_code"].map(str)
95
+ elif "zip" in url:
96
  df["postal_code"] = df["postal_code"].map(str)
97
  df["postal_code"] = df["postal_code"].str.zfill(5)
98
+
99
+ if "listing_weekly_core_aggregate_by_country" in url:
100
+ columns = get_data_columns(df, "national", "weekly")
101
+ for column in columns:
102
+ if column != "median_days_on_market_by_day_yy":
103
+ df[column] = df[column].str.rstrip("%").astype(float) / 100
104
+ if "listing_weekly_core_aggregate_by_metro" in url:
105
+ columns = get_data_columns(df, "metro", "weekly")
106
+ for column in columns:
107
+ if column != "median_days_on_market_by_day_yy":
108
+ df[column] = df[column].str.rstrip("%").astype(float) / 100
109
+
110
+ return df
111
+
112
+
113
+ def filter_weekly_inventory(df, week):
114
+ df = df[df["week_end_date"] == week]
115
  return df
116
 
117
 
 
125
  return [str(d) for d in list(set(df["month_date_yyyymm"].tolist()))]
126
 
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  @st.cache
129
  def get_geom_data(category):
130
 
 
160
  elif category == "state":
161
  new_gdf = gdf.merge(df, left_on="STUSPS", right_on="STUSPS", how="outer")
162
  elif category == "national":
163
+ if "geo_country" in df.columns.values.tolist():
164
+ df["country"] = "United States"
165
  new_gdf = gdf.merge(df, left_on="NAME", right_on="country", how="outer")
166
  elif category == "metro":
167
  new_gdf = gdf.merge(df, left_on="CBSAFP", right_on="cbsa_code", how="outer")
 
188
  return label, desc
189
 
190
 
191
+ def get_weeks(df):
192
+ weeks = [
193
+ datetime.date(int(d.split("/")[2]), int(d.split("/")[0]), int(d.split("/")[1]))
194
+ for d in list(set(df["week_end_date"].tolist()))
195
+ ]
196
+ weeks.sort()
197
+ return weeks
198
+
199
+
200
+ def get_saturday(in_date):
201
+ idx = (in_date.weekday() + 1) % 7
202
+ sat = in_date + datetime.timedelta(6 - idx)
203
+ return sat
204
+
205
+
206
  def app():
207
 
208
  st.title("Real Estate Data and Market Trends")
 
216
  [0.6, 0.8, 0.6, 1.4, 2]
217
  )
218
  with row1_col1:
219
+ frequency = st.selectbox("Monthly/weekly data", ["Monthly", "Weekly"])
 
220
  with row1_col2:
221
+ types = ["Current month data", "Historical data"]
222
+ if frequency == "Weekly":
223
+ types.remove("Current month data")
224
  cur_hist = st.selectbox(
225
  "Current/historical data",
226
+ types,
227
  )
228
  with row1_col3:
229
+ if frequency == "Monthly":
230
+ scale = st.selectbox(
231
+ "Scale", ["National", "State", "Metro", "County"], index=3
232
+ )
233
+ else:
234
+ scale = st.selectbox("Scale", ["National", "Metro"], index=1)
235
 
236
  gdf = get_geom_data(scale.lower())
237
+
238
+ if frequency == "Weekly":
239
+ inventory_df = get_inventory_data(data_links["weekly"][scale.lower()])
240
+ weeks = get_weeks(inventory_df)
241
+ with row1_col1:
242
+ selected_date = st.date_input("Select a date", value=weeks[-1])
243
+ saturday = get_saturday(selected_date)
244
+ selected_period = saturday.strftime("%-m/%-d/%Y")
245
+ if saturday not in weeks:
246
+ st.error(
247
+ "The selected date is not available in the data. Please select a date between {} and {}".format(
248
+ weeks[0], weeks[-1]
249
+ )
250
+ )
251
+ selected_period = weeks[-1].strftime("%-m/%-d/%Y")
252
+ inventory_df = get_inventory_data(data_links["weekly"][scale.lower()])
253
+ inventory_df = filter_weekly_inventory(inventory_df, selected_period)
254
+
255
+ if frequency == "Monthly":
256
+ if cur_hist == "Current month data":
257
  inventory_df = get_inventory_data(
258
+ data_links["monthly_current"][scale.lower()]
259
  )
260
+ selected_period = get_periods(inventory_df)[0]
261
+ else:
262
+ with row1_col2:
263
+ inventory_df = get_inventory_data(
264
+ data_links["monthly_historical"][scale.lower()]
 
 
 
 
265
  )
266
+ start_year, end_year = get_start_end_year(inventory_df)
267
+ periods = get_periods(inventory_df)
268
+ with st.expander("Select year and month", True):
269
+ selected_year = st.slider(
270
+ "Year",
271
+ start_year,
272
+ end_year,
273
+ value=start_year,
274
+ step=1,
275
+ )
276
+ selected_month = st.slider(
277
+ "Month",
278
+ min_value=1,
279
+ max_value=12,
280
+ value=int(periods[0][-2:]),
281
+ step=1,
282
+ )
283
+ selected_period = str(selected_year) + str(selected_month).zfill(2)
284
+ if selected_period not in periods:
285
+ st.error("Data not available for selected year and month")
286
+ selected_period = periods[0]
287
+ inventory_df = inventory_df[
288
+ inventory_df["month_date_yyyymm"] == int(selected_period)
289
+ ]
290
+
291
+ data_cols = get_data_columns(inventory_df, scale.lower(), frequency.lower())
292
 
293
  with row1_col4:
294
  selected_col = st.selectbox("Attribute", data_cols)
295
  with row1_col5:
296
  show_desc = st.checkbox("Show attribute description")
297
  if show_desc:
298
+ try:
299
+ label, desc = get_data_dict(selected_col.strip())
300
+ markdown = f"""
301
+ **{label}**: {desc}
302
+ """
303
+ st.markdown(markdown)
304
+ except:
305
+ st.warning("No description available for selected attribute")
306
 
307
  row2_col1, row2_col2, row2_col3, row2_col4 = st.columns([1, 1, 2, 2])
308