import streamlit as st import pandas as pd import requests import plotly.express as px import plotly.graph_objs as go from folium import DivIcon import folium from streamlit_folium import st_folium from sklearn.linear_model import LinearRegression from sklearn.cluster import DBSCAN import matplotlib.cm as cm import matplotlib.colors as mcolors import time import json import pytz from datetime import datetime # Set page layout to wide st.set_page_config(layout="wide", page_title="Real-Time Smart Lamppost Data Dashboard") # Function to fetch JSON data with caching and expiration @st.cache_data(ttl=600) def fetch_data(url): response = requests.get(url) hk_tz = pytz.timezone('Asia/Hong_Kong') fetch_time = datetime.now(hk_tz).strftime('%Y-%m-%dT%H:%M:%S') return json.loads(response.text), fetch_time # Function to calculate "feels like" temperature def feels_like_temperature(temp_celsius, humidity_percent): return temp_celsius - (0.55 - 0.0055 * humidity_percent) * (temp_celsius - 14.5) # Function to process the raw data into a DataFrame def process_data(data): features = data['features'] records = [ { 'latitude': feature['geometry']['coordinates'][1], 'longitude': feature['geometry']['coordinates'][0], 'temperature': feature['properties'].get('Air temperature (°C) / 氣溫 (°C) / 气温 (°C)'), 'humidity': feature['properties'].get('Relative humidity (%) / 相對濕度 (%) / 相对湿度 (%)') } for feature in features ] df = pd.DataFrame(records) # Convert temperature and humidity to numeric, forcing errors to NaN df['temperature'] = pd.to_numeric(df['temperature'], errors='coerce') df['humidity'] = pd.to_numeric(df['humidity'], errors='coerce') # Drop rows with NaN values df = df.dropna(subset=['temperature', 'humidity']) # Calculate "feels like" temperature df['feels_like'] = df.apply(lambda row: feels_like_temperature(row['temperature'], row['humidity']), axis=1) return df # Fetch and process data url = "https://csdi.vercel.app/weather/smls" data, fetch_time = fetch_data(url) df = process_data(data) # Perform clustering using DBSCAN coords = df[['latitude', 'longitude']].values db = DBSCAN(eps=0.01, min_samples=5).fit(coords) df['cluster'] = db.labels_ # Initialize the 'predicted_humidity' column with NaN df['predicted_humidity'] = pd.NA # Perform linear regression for each cluster for cluster in df['cluster'].unique(): cluster_data = df[df['cluster'] == cluster] if len(cluster_data) > 1: # Only perform regression if there are enough points X = cluster_data['temperature'].values.reshape(-1, 1) y = cluster_data['humidity'].values reg = LinearRegression().fit(X, y) df.loc[df['cluster'] == cluster, 'predicted_humidity'] = reg.predict(X) # Calculate temperature statistics temp_stats = df['temperature'].describe() avg_temp = temp_stats['mean'] min_temp = temp_stats['min'] max_temp = temp_stats['max'] std_temp = temp_stats['std'] # Create regression plot using Plotly fig = px.scatter(df, x='temperature', y='humidity', color='cluster', title='Temperature vs. Relative Humidity with Regression by Cluster') # Add regression lines to the plot for cluster in df['cluster'].unique(): cluster_data = df[df['cluster'] == cluster] if 'predicted_humidity' in cluster_data.columns and not cluster_data['predicted_humidity'].isna().all(): fig.add_trace(go.Scatter(x=cluster_data['temperature'], y=cluster_data['predicted_humidity'], mode='lines', name=f'Cluster {cluster}')) # Column 1: Regression Plot, Data, and Statistics col1, col2, col3 = st.columns([1.65, 2, 1.15]) with col1: st.plotly_chart(fig, use_container_width=True, height=300) st.caption(f"Data fetched at: {fetch_time}") # Display temperature statistics col_1, col_2 = st.columns([1, 1]) with col_1: st.metric(label="Average Temperature (°C)", value=f"{avg_temp:.2f}") st.metric(label="Minimum Temperature (°C)", value=f"{min_temp:.2f}") with col_2: st.metric(label="Maximum Temperature (°C)", value=f"{max_temp:.2f}") st.metric(label="Std. Dev (°C)", value=f"{std_temp:.2f}") # Column 2: Map with col2: # Initialize the Folium map m = folium.Map(location=[22.320394086610452, 114.21626912476121], zoom_start=14, tiles='https://landsd.azure-api.net/dev/osm/xyz/basemap/gs/WGS84/tile/{z}/{x}/{y}.png?key=f4d3e21d4fc14954a1d5930d4dde3809',attr="Map infortmation from Lands Department") folium.TileLayer( tiles='https://mapapi.geodata.gov.hk/gs/api/v1.0.0/xyz/label/hk/en/wgs84/{z}/{x}/{y}.png', attr="Map infortmation from Lands Department" ).add_to(m) # Define a color map for clusters unique_clusters = df['cluster'].unique() colors = cm.get_cmap('tab10', len(unique_clusters)) # Using 'tab10' colormap for up to 10 clusters cluster_colors = {cluster: mcolors.to_hex(colors(i)) for i, cluster in enumerate(unique_clusters)} # Plot original data points for _, row in df.iterrows(): folium.CircleMarker( location=[row['latitude'], row['longitude']], radius=5, color=cluster_colors[row['cluster']], fill=True, fill_color=cluster_colors[row['cluster']], fill_opacity=0.7, popup=f"Temp: {row['temperature']} °C
Humidity: {row['humidity']} %
Feels Like: {row['feels_like']:.2f} °C
Cluster: {row['cluster']}" ).add_to(m) # Calculate the average temperature for each cluster cluster_centers = df.groupby('cluster').agg({ 'latitude': 'mean', 'longitude': 'mean', 'temperature': 'mean' }).reset_index() # Plot cluster centers for _, row in cluster_centers.iterrows(): folium.Marker( location=[row['latitude'], row['longitude']], icon=DivIcon( icon_size=(150,36), icon_anchor=(85, 20), # Adjusted anchor position to move text away from the point html=f'
{row["temperature"]:.2f} °C
' ), popup=f"Cluster: {row['cluster']}
Avg Temp: {row['temperature']:.2f} °C" ).add_to(m) # Display the map in Streamlit st_folium(m, use_container_width=True , height=650) # Column 3: Data Table with col3: st.markdown( """ """, unsafe_allow_html=True ) # Display the DataFrame st.dataframe(df[['latitude', 'longitude', 'temperature', 'humidity', 'feels_like', 'cluster']], height=600)