|
import streamlit as st |
|
import hopsworks |
|
import pandas as pd |
|
import os |
|
import time |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from datetime import datetime, timedelta |
|
|
|
|
|
DATA_DIR = "data" |
|
TIMESTAMP_FILE = "last_download_time.txt" |
|
|
|
|
|
def connect_to_hopsworks(): |
|
st.write("Connecting to Hopsworks...") |
|
project_name = "id2223AirQuality" |
|
api_key = os.getenv("HOPSWORKS_API_KEY") |
|
conn = hopsworks.connection(api_key_value=api_key) |
|
project = conn.get_project(project_name) |
|
return project |
|
|
|
|
|
def fetch_data_from_feature_group(project, feature_group_name, version): |
|
feature_store = project.get_feature_store() |
|
feature_group = feature_store.get_feature_group(name=feature_group_name, version=version) |
|
data = feature_group.read() |
|
return data |
|
|
|
|
|
def save_data_locally(data, filename): |
|
os.makedirs(DATA_DIR, exist_ok=True) |
|
filepath = os.path.join(DATA_DIR, filename) |
|
data.to_csv(filepath, index=False) |
|
|
|
|
|
timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE) |
|
with open(timestamp_path, "w") as f: |
|
f.write(str(datetime.now())) |
|
return filepath |
|
|
|
|
|
def load_local_data(filename): |
|
filepath = os.path.join(DATA_DIR, filename) |
|
if os.path.exists(filepath): |
|
return pd.read_csv(filepath) |
|
else: |
|
return None |
|
|
|
|
|
def is_local_data_valid(): |
|
timestamp_path = os.path.join(DATA_DIR, TIMESTAMP_FILE) |
|
if not os.path.exists(timestamp_path): |
|
return False |
|
try: |
|
with open(timestamp_path, "r") as f: |
|
last_download_time = datetime.fromisoformat(f.read().strip()) |
|
|
|
if datetime.now() - last_download_time > timedelta(days=1): |
|
return False |
|
return True |
|
except Exception as e: |
|
st.warning(f"Error reading timestamp: {e}") |
|
return False |
|
|
|
|
|
def plot_graphs(data): |
|
st.write("### Data Preview") |
|
st.dataframe(data.head()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
st.title("Hopsworks Feature Group Explorer") |
|
|
|
|
|
if "hopsworks_project" not in st.session_state: |
|
st.session_state.hopsworks_project = None |
|
if "data" not in st.session_state: |
|
st.session_state.data = None |
|
|
|
|
|
"""st.sidebar.title("Data Settings") |
|
feature_group_name = st.sidebar.text_input("Feature Group Name", value="predictions") |
|
version = st.sidebar.number_input("Feature Group Version", value=1, min_value=1) |
|
filename = st.sidebar.text_input("Local Filename", value="data.csv") |
|
""" |
|
|
|
if is_local_data_valid(): |
|
st.write("Using cached local data.") |
|
st.session_state.data = load_local_data(filename) |
|
else: |
|
|
|
if st.session_state.hopsworks_project is None: |
|
st.write("Initializing Hopsworks connection...") |
|
st.session_state.hopsworks_project = connect_to_hopsworks() |
|
st.success("Connected to Hopsworks!") |
|
|
|
project = st.session_state.hopsworks_project |
|
data = fetch_data_from_feature_group(project, "predictions", 1) |
|
print(data.head()) |
|
filepath = save_data_locally(data, "./data") |
|
st.session_state.data = data |
|
st.success(f"Data fetched and saved locally at {filepath}") |
|
|
|
|
|
if st.session_state.data is not None: |
|
plot_graphs(st.session_state.data) |
|
|
|
main() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|