File size: 10,404 Bytes
064a25d
e4fbfab
 
 
 
 
 
 
2bd6eac
064a25d
effa819
 
e4fbfab
 
 
 
 
 
 
 
6f35e8c
8aa409a
7030e08
e4fbfab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
effa819
 
 
 
 
 
 
 
e4fbfab
39a6a86
 
7762bc9
 
 
 
 
 
39a6a86
7762bc9
 
39a6a86
 
 
7762bc9
 
 
 
 
39a6a86
 
7762bc9
 
 
 
39a6a86
 
 
 
e4fbfab
7762bc9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4fbfab
 
7762bc9
e4fbfab
8dd2873
e4fbfab
 
 
8dd2873
e4fbfab
 
618cd91
 
 
 
 
e4fbfab
8dd2873
e4fbfab
618cd91
e4fbfab
 
 
 
 
 
 
 
7edb958
effa819
618cd91
e4fbfab
 
 
effa819
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39a6a86
7762bc9
 
 
 
 
 
 
 
 
 
 
 
 
39a6a86
 
 
 
 
 
 
 
 
effa819
39a6a86
effa819
39a6a86
 
 
effa819
39a6a86
 
 
 
 
 
 
 
effa819
39a6a86
7762bc9
 
39a6a86
 
 
 
 
 
 
7762bc9
 
 
39a6a86
 
 
 
7762bc9
2bd6eac
effa819
 
 
 
 
 
 
e4fbfab
 
39a6a86
 
e4fbfab
 
d5c7130
 
 
 
 
 
 
 
 
8dd2873
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
import streamlit as st
import hopsworks
import pandas as pd
import os
import time
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import altair as alt

import api

# Constants
DATA_DIR = "data"
TIMESTAMP_FILE = "last_download_time.txt"

# Initialize Hopsworks connection
def connect_to_hopsworks():
    st.write("Connecting to Hopsworks...")
    project_name = "id2223AirQuality"
    HOPSWORKS_API_KEY = os.getenv("HOPSWORKS_API_KEY")
    print(f"HOPSWORKS_API_KEY {HOPSWORKS_API_KEY}")
    project = hopsworks.login(project="id2223AirQuality", api_key_value = os.getenv('HOPSWORKS_API_KEY'))
    return project

# Fetch data from Hopsworks feature group
def fetch_data_from_feature_group(project, feature_group_name, version):
    feature_store = project.get_feature_store()
    feature_group = feature_store.get_feature_group(name=feature_group_name, version=version)
    data = feature_group.read()
    return data

# Save data locally
def save_data_locally(data, filename):
    os.makedirs(DATA_DIR, exist_ok=True)
    filepath = os.path.join(DATA_DIR, filename)
    data.to_csv(filepath, index=False)
    
    # Save timestamp
    timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE)
    with open(timestamp_path, "w") as f:
        f.write(str(datetime.now()))
    return filepath

# Load local data
def load_local_data(filename):
    filepath = os.path.join(DATA_DIR, filename)
    if os.path.exists(filepath):
        return pd.read_csv(filepath)
    else:
        return None

# Check if local data is valid
def is_local_data_valid():
    timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE)
    if not os.path.exists(timestamp_path):
        return False
    try:
        with open(timestamp_path, "r") as f:
            last_download_time = datetime.fromisoformat(f.read().strip())
        # Check if the data is more than a day old
        if datetime.now() - last_download_time > timedelta(days=1):
            return False
        return True
    except Exception as e:
        st.warning(f"Error reading timestamp: {e}")
        return False
    
def get_buses():
    bus_df = st.session_state.data[["trip_id", "route_long_name", "route_short_name"]]
    bus_df = bus_df.drop_duplicates()
    bus_list = bus_df[["route_long_name", "route_short_name"]]
    bus_list = bus_list.drop_duplicates()
    short_bus_list = list(pd.unique(bus_df["route_short_name"]))
    return bus_df, bus_list, short_bus_list

def plot_graph(plot_df):
    #Nu vill vi plotta!
    categories =  {0 : 'Empty',
    1: 'Many seats available',
    2:'Few seats available',
    3:'Standing room only',
    4:'Crushed standing room',
    5: 'Full'}
    
    plot_df = plot_df[["datetime", "vehicle_occupancystatus", "stop_name"]]
    plot_df = plot_df.sort_values("datetime")
    st.write(plot_df.head())
    st.write(plot_df.tail())
    #plot_df = plot_df.set_index("datetime")
    plot_df["Occupancy"] = plot_df["vehicle_occupancystatus"].map(categories)
    # Explicitly set the order for Y_category
    category_order = list(categories.values())  # ['Empty', 'Many seats available', ..., 'Full']
    category_order.reverse()

    #st.line_chart(plot_df)
    # Create the Altair chart
    chart = alt.Chart(plot_df).mark_line(point=True, interpolate="step-after").encode(
        x=alt.X('stop_name:N', title="Stop name"),  # Use column name as string
        y=alt.Y('Occupancy:N', title="Vehicle Occupancy Status (Categories)", sort=category_order, scale=alt.Scale(domain=category_order)),  # Treat Y as categorical
        tooltip=["datetime", 'stop_name', 'Occupancy']  # Add tooltips for interactivity
    ).properties(
        title="Vehicle Occupancy Status Over Time"
    )
    st.altair_chart(chart, use_container_width=True)

def visualize(filtered_data):
    import folium
    from streamlit_folium import st_folium

    categories =  {0 : 'Empty',
    1: 'Many seats available',
    2:'Few seats available',
    3:'Standing room only',
    4:'Crushed standing room',
    5: 'Full'}

    # Create a folium map centered around a location
    m = folium.Map(location=[filtered_data.iloc[0]["stop_lat"], filtered_data.iloc[0]["stop_lon"]], zoom_start=12)

    sw = filtered_data[['stop_lat', 'stop_lon']].min().values.tolist()
    ne = filtered_data[['stop_lat', 'stop_lon']].max().values.tolist()

    m.fit_bounds([sw, ne])

    # Add bus stop markers based on filtered data
    for idx, row in filtered_data.iterrows():
        folium.Marker(
            [row['stop_lat'], row['stop_lon']],
            popup=f"Bus stop: {row['stop_name']} Bus occupancy: {categories[row['vehicle_occupancystatus']] }",
            icon = folium.Icon(icon="bus-simple", prefix="fa")
        ).add_to(m)

    # Display the map
    st_folium(m, width=700, height=500)

# Streamlit UI
def main():
    st.title("Wheely Fun Times - Bus Occupancy Explorer")
    
    # Initialize session state
    if "hopsworks_project" not in st.session_state:
        st.session_state.hopsworks_project = None
    if "data" not in st.session_state:
        st.session_state.data = None

    # User inputs for feature group and version
    #st.sidebar.title("Data Settings")
    #feature_group_name = st.sidebar.text_input("Feature Group Name", value="predictions")
    #version = st.sidebar.number_input("Feature Group Version", value=1, min_value=1)
    #filename = st.sidebar.text_input("Local Filename", value="data.csv")
    
    # Check for valid local data
    if is_local_data_valid():
        st.write("Using cached local data.")
        st.session_state.data = load_local_data("data.csv")
    else:
        # Fetch data if local data is invalid
        if st.session_state.hopsworks_project is None:
            st.write("Initializing Hopsworks connection...")
            st.session_state.hopsworks_project = connect_to_hopsworks()
            st.success("Connected to Hopsworks!")
        
        project = st.session_state.hopsworks_project
        data = fetch_data_from_feature_group(project, "predictions", 1)
        #print(data.head())
        filepath = save_data_locally(data, "data.csv")
        st.session_state.data = data
        st.success(f"Data fetched and saved locally at {filepath}")

    buses_df, bus_list, short_bus = get_buses()

    # Sidebar section for searching buses
    st.sidebar.title("Search for your desired bus")

    # Create a multiselect dropdown in the sidebar
    search = st.sidebar.selectbox(
        "Search for your bus number:",
        options=short_bus,
        help="Select one bus to view details."
    )

    # Display the results
    if search:
        route = bus_list[bus_list["route_short_name"]==search]
        long_names = list(pd.unique(route["route_long_name"]))
        if len(long_names)==1:
            bus = long_names[0]
            st.write("### Selected Bus")
            st.write(f"{search}: {bus}")
        else:
            bus = st.sidebar.selectbox(
            "Pick bus route:",
            options=long_names,
            help="Select one bus to view details."
            )
            st.write("### Selected Bus")
            st.write(f"{search}: {bus}")
        
        # Streamlit checkbox to toggle bus direction
        if "direction" not in st.session_state:
            st.session_state.direction = False
        
        # Streamlit button to toggle bus direction
        if st.sidebar.button('Change Direction'):
            # Toggle between 'North' and 'South'
            st.session_state.direction = not st.session_state.direction
            print(st.session_state.direction)

            
        #direction = st.sidebar.checkbox('Direction of bus', value=True) 

        today = datetime.now()
        tomorrow = today + timedelta(days=1)
        today = today.date()
        tomorrow = tomorrow.date()
        
        date_options = {
            today.strftime("%d %B %Y") : today,
            tomorrow.strftime("%d %B %Y") : tomorrow
        }

        day_choice = st.sidebar.radio("Select the day:", options=list(date_options.keys()))

        # Add time input widgets in the sidebar
        start_time = st.sidebar.time_input("Select a start time", value=None)
        end_time = st.sidebar.time_input("Select an end time", value=None)

        #Plocka alla aktuella trip_ids från buses
        trips = buses_df[buses_df["route_long_name"]==bus]
        bus_trips = st.session_state.data[st.session_state.data["route_long_name"]==bus]
        bus_trips["datetime"] = pd.to_datetime(bus_trips["datetime"])
        bus_trips["datetime"] = bus_trips["datetime"].dt.tz_convert(None)
        
        #TODO remove
        trip_ids = list(trips["trip_id"])
        plot_df = st.session_state.data[st.session_state.data["trip_id"]==trip_ids[0]]

        #TODO direction

        print(f"start time {type(start_time)}")
        print(f"end time {type(end_time)}")
        print(f"day {type(day_choice)}")

        if start_time != None and end_time != None:
            #TODO hur filtrera på tid?
            st.write(f"Displaying buses between {start_time.strftime('%H:%M')} and {end_time.strftime('%H:%M')} the {day_choice}")
            selected_trips = bus_trips[(bus_trips["datetime"] >= datetime.combine(date_options[day_choice], start_time)) 
                                       & (bus_trips["datetime"] <= datetime.combine(date_options[day_choice], end_time))
                                       & (bus_trips["direction_id"] == st.session_state.direction )]
            trip_ids = list(pd.unique(selected_trips["trip_id"]))
            st.write(f"Length {len(trip_ids)}")
            for id in trip_ids:
                plot_graph(st.session_state.data[st.session_state.data["trip_id"]==id])
                visualize(st.session_state.data[st.session_state.data["trip_id"]==id])
            


    else:
        st.write("No buses selected. Please search in the sidebar.")
    
    

    # Display data and graphs
    if st.session_state.data is not None:
        #plot_graphs(st.session_state.data)
        st.write("Hi")

main()

# Visa alla busslinjer? Söka?
    # Hur se riktning?
# Filtrera på busslinje och riktning
# Filtrera på tid 
    # Ska användaren ange tid
# Se alla unika trip ids
# Mappa position till stop
# Visa någon sorts graf för alla bussar inom den tiden
    # Ska det vara för alla stopp eller bara de som användaren angivit att den ska åka