import streamlit as st import pandas as pd from sodapy import Socrata import hopsworks import joblib import xgboost as xgb def unencode_weekday(fri, mon, sat, sun, thu, tue, wed): if fri==1.0: return "Friday" elif mon==1.0: return "Monday" elif sat==1.0: return "Saturday" elif sun==1.0: return "Sunday" elif thu==1.0: return "Thursday" elif tue==1.0: return "Tuesday" elif wed==1.0: return "Wednesday" else: return "Invalid Weekday" def unencode_report_type_code(ii, iss, vi, vs): if ii==1.0: return "II" elif iss==1.0: return "IS" elif vi==1.0: return "VI" elif vs==1.0: return "VS" else: return "Invalid Report Type Code" def unencode_police_district(bay, cen, ing, mis, nor, out, par, ric, sou, tar, ten): if bay==1.0: return "Bayview" elif cen==1.0: return "Central" elif ing==1.0: return "Ingleside" elif mis==1.0: return "Mission" elif nor==1.0: return "Northern" elif out==1.0: return "OutOfSF" elif par==1.0: return "Park" elif ric==1.0: return "Richmond" elif sou==1.0: return "Southern" elif tar==1.0: return "Taraval" elif ten==1.0: return "Tenderloin" else: return "Invalid Police District" st.set_page_config(layout="wide") st.title('Latest SF Incident Category Prediction') client = Socrata("data.sfgov.org", "gZmg4iarmENBTk1Vzsb94bnse", username="xinyulia@kth.se", password="Xw990504") results = client.get("wg3w-h783", limit=800000) results_df = pd.DataFrame.from_records(results) from preprocessor_pipeline import preprocessing_incident results_df_preprocessed = preprocessing_incident(results_df) results_df_preprocessed.incident_datetime=pd.to_datetime(results_df_preprocessed.incident_datetime) results_df_preprocessed.sort_values(by='incident_datetime', ascending = False, inplace = True) results_df_preprocessed = results_df_preprocessed[:100] project = hopsworks.login() fs = project.get_feature_store() mr = project.get_model_registry() model = mr.get_model("incident_modal", version=1) model_dir = model.download() model = joblib.load(model_dir + "/incident_model.pkl") batch_data = results_df_preprocessed.copy() batch_data.drop(columns=['incident_datetime','incident_category'], inplace=True) y_pred = model.predict(batch_data) results_df_preprocessed["incident_day_of_week"]=results_df_preprocessed.apply(lambda x:unencode_weekday(x.incident_day_of_week_Friday,x.incident_day_of_week_Monday,x.incident_day_of_week_Saturday,x.incident_day_of_week_Sunday,x.incident_day_of_week_Thursday,x.incident_day_of_week_Tuesday,x.incident_day_of_week_Wednesday),axis=1) results_df_preprocessed.drop(columns=["incident_day_of_week_Friday","incident_day_of_week_Monday","incident_day_of_week_Saturday","incident_day_of_week_Sunday","incident_day_of_week_Thursday","incident_day_of_week_Tuesday","incident_day_of_week_Wednesday"],inplace=True) results_df_preprocessed["report_type_code"]=results_df_preprocessed.apply(lambda x:unencode_report_type_code(x.report_type_code_II,x.report_type_code_IS,x.report_type_code_VI,x.report_type_code_VS),axis=1) results_df_preprocessed.drop(columns=["report_type_code_II","report_type_code_IS","report_type_code_VI","report_type_code_VS"],inplace=True) results_df_preprocessed["police_district"]=results_df_preprocessed.apply(lambda x:unencode_police_district(x.police_district_Bayview,x.police_district_Central,x.police_district_Ingleside,x.police_district_Mission,x.police_district_Northern,x.police_district_OutOfSF,x.police_district_Park,x.police_district_Richmond,x.police_district_Southern,x.police_district_Taraval,x.police_district_Tenderloin),axis=1) results_df_preprocessed.drop(columns=["police_district_Bayview","police_district_Central","police_district_Ingleside","police_district_Mission","police_district_Northern","police_district_OutOfSF","police_district_Park","police_district_Richmond","police_district_Southern","police_district_Taraval","police_district_Tenderloin"],inplace=True) results_df_preprocessed.reset_index(inplace=True) results_df_preprocessed["idx_temp"]=results_df_preprocessed.index results_df_preprocessed["incident_category_pred"]=y_pred[results_df_preprocessed.idx_temp] results_df_preprocessed.drop(columns=["idx_temp"],inplace=True) df = results_df_preprocessed[['incident_datetime', 'latitude', 'longitude', 'incident_day_of_week', 'report_type_code', 'police_district', 'incident_category', 'incident_category_pred']] st.write(df) st.button("Re-run")