import streamlit as st import hopsworks import pandas as pd import os import time import matplotlib.pyplot as plt import seaborn as sns from datetime import datetime, timedelta # Constants DATA_DIR = "data" TIMESTAMP_FILE = "last_download_time.txt" # Initialize Hopsworks connection def connect_to_hopsworks(): st.write("Connecting to Hopsworks...") project_name = "id2223AirQuality" api_key = os.getenv("HOPSWORKS_API_KEY") conn = hopsworks.connection(api_key_value=api_key) project = conn.get_project(project_name) return project # Fetch data from Hopsworks feature group def fetch_data_from_feature_group(project, feature_group_name, version): feature_store = project.get_feature_store() feature_group = feature_store.get_feature_group(name=feature_group_name, version=version) data = feature_group.read() return data # Save data locally def save_data_locally(data, filename): os.makedirs(DATA_DIR, exist_ok=True) filepath = os.path.join(DATA_DIR, filename) data.to_csv(filepath, index=False) # Save timestamp timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE) with open(timestamp_path, "w") as f: f.write(str(datetime.now())) return filepath # Load local data def load_local_data(filename): filepath = os.path.join(DATA_DIR, filename) if os.path.exists(filepath): return pd.read_csv(filepath) else: return None # Check if local data is valid def is_local_data_valid(): timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE) if not os.path.exists(timestamp_path): return False try: with open(timestamp_path, "r") as f: last_download_time = datetime.fromisoformat(f.read().strip()) # Check if the data is more than a day old if datetime.now() - last_download_time > timedelta(days=1): return False return True except Exception as e: st.warning(f"Error reading timestamp: {e}") return False # Plot graphs def plot_graphs(data): st.write("### Data Preview") st.dataframe(data.head()) #st.write("### Histogram") #column = st.selectbox("Select column for histogram", data.columns) #fig, ax = plt.subplots() #sns.histplot(data[column], kde=True, ax=ax) #st.pyplot(fig) #st.write("### Correlation Matrix") #fig, ax = plt.subplots() #sns.heatmap(data.corr(), annot=True, cmap="coolwarm", ax=ax) #st.pyplot(fig) # Streamlit UI def main(): st.title("Hopsworks Feature Group Explorer") # Initialize session state if "hopsworks_project" not in st.session_state: st.session_state.hopsworks_project = None if "data" not in st.session_state: st.session_state.data = None # User inputs for feature group and version """st.sidebar.title("Data Settings") feature_group_name = st.sidebar.text_input("Feature Group Name", value="predictions") version = st.sidebar.number_input("Feature Group Version", value=1, min_value=1) filename = st.sidebar.text_input("Local Filename", value="data.csv") """ # Check for valid local data if is_local_data_valid(): st.write("Using cached local data.") st.session_state.data = load_local_data(filename) else: # Fetch data if local data is invalid if st.session_state.hopsworks_project is None: st.write("Initializing Hopsworks connection...") st.session_state.hopsworks_project = connect_to_hopsworks() st.success("Connected to Hopsworks!") project = st.session_state.hopsworks_project data = fetch_data_from_feature_group(project, "predictions", 1) print(data.head()) filepath = save_data_locally(data, "./data") st.session_state.data = data st.success(f"Data fetched and saved locally at {filepath}") # Display data and graphs if st.session_state.data is not None: plot_graphs(st.session_state.data) main() # Visa alla busslinjer? Söka? # Hur se riktning? # Filtrera på busslinje och riktning # Filtrera på tid # Ska användaren ange tid # Se alla unika trip ids # Mappa position till stop # Visa någon sorts graf för alla bussar inom den tiden # Ska det vara för alla stopp eller bara de som användaren angivit att den ska åka