import streamlit as st
import pandas as pd
import requests
import plotly.express as px
import plotly.graph_objs as go
from folium import DivIcon
import folium
from streamlit_folium import st_folium
from sklearn.linear_model import LinearRegression
from sklearn.cluster import DBSCAN
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import time
import json
import pytz
from datetime import datetime
# Set page layout to wide
st.set_page_config(layout="wide", page_title="Real-Time Smart Lamppost Data Dashboard")
# Function to fetch JSON data with caching and expiration
@st.cache_data(ttl=600)
def fetch_data(url):
response = requests.get(url)
hk_tz = pytz.timezone('Asia/Hong_Kong')
fetch_time = datetime.now(hk_tz).strftime('%Y-%m-%dT%H:%M:%S')
return json.loads(response.text), fetch_time
# Function to calculate "feels like" temperature
def feels_like_temperature(temp_celsius, humidity_percent):
return temp_celsius - (0.55 - 0.0055 * humidity_percent) * (temp_celsius - 14.5)
# Function to process the raw data into a DataFrame
def process_data(data):
features = data['features']
records = [
{
'latitude': feature['geometry']['coordinates'][1],
'longitude': feature['geometry']['coordinates'][0],
'temperature': feature['properties'].get('Air temperature (°C) / 氣溫 (°C) / 气温 (°C)'),
'humidity': feature['properties'].get('Relative humidity (%) / 相對濕度 (%) / 相对湿度 (%)')
}
for feature in features
]
df = pd.DataFrame(records)
# Convert temperature and humidity to numeric, forcing errors to NaN
df['temperature'] = pd.to_numeric(df['temperature'], errors='coerce')
df['humidity'] = pd.to_numeric(df['humidity'], errors='coerce')
# Drop rows with NaN values
df = df.dropna(subset=['temperature', 'humidity'])
# Calculate "feels like" temperature
df['feels_like'] = df.apply(lambda row: feels_like_temperature(row['temperature'], row['humidity']), axis=1)
return df
# Fetch and process data
url = "https://csdi.vercel.app/weather/smls"
data, fetch_time = fetch_data(url)
df = process_data(data)
# Perform clustering using DBSCAN
coords = df[['latitude', 'longitude']].values
db = DBSCAN(eps=0.01, min_samples=5).fit(coords)
df['cluster'] = db.labels_
# Initialize the 'predicted_humidity' column with NaN
df['predicted_humidity'] = pd.NA
# Perform linear regression for each cluster
for cluster in df['cluster'].unique():
cluster_data = df[df['cluster'] == cluster]
if len(cluster_data) > 1: # Only perform regression if there are enough points
X = cluster_data['temperature'].values.reshape(-1, 1)
y = cluster_data['humidity'].values
reg = LinearRegression().fit(X, y)
df.loc[df['cluster'] == cluster, 'predicted_humidity'] = reg.predict(X)
# Calculate temperature statistics
temp_stats = df['temperature'].describe()
avg_temp = temp_stats['mean']
min_temp = temp_stats['min']
max_temp = temp_stats['max']
std_temp = temp_stats['std']
# Create regression plot using Plotly
fig = px.scatter(df, x='temperature', y='humidity', color='cluster',
title='Temperature vs. Relative Humidity with Regression by Cluster')
# Add regression lines to the plot
for cluster in df['cluster'].unique():
cluster_data = df[df['cluster'] == cluster]
if 'predicted_humidity' in cluster_data.columns and not cluster_data['predicted_humidity'].isna().all():
fig.add_trace(go.Scatter(x=cluster_data['temperature'], y=cluster_data['predicted_humidity'], mode='lines',
name=f'Cluster {cluster}'))
# Column 1: Regression Plot, Data, and Statistics
col1, col2, col3 = st.columns([1.65, 2, 1.15])
with col1:
st.plotly_chart(fig, use_container_width=True, height=300)
st.caption(f"Data fetched at: {fetch_time}")
# Display temperature statistics
col_1, col_2 = st.columns([1, 1])
with col_1:
st.metric(label="Average Temperature (°C)", value=f"{avg_temp:.2f}")
st.metric(label="Minimum Temperature (°C)", value=f"{min_temp:.2f}")
with col_2:
st.metric(label="Maximum Temperature (°C)", value=f"{max_temp:.2f}")
st.metric(label="Std. Dev (°C)", value=f"{std_temp:.2f}")
# Column 2: Map
with col2:
# Initialize the Folium map
m = folium.Map(location=[22.320394086610452, 114.21626912476121], zoom_start=14, tiles='https://landsd.azure-api.net/dev/osm/xyz/basemap/gs/WGS84/tile/{z}/{x}/{y}.png?key=f4d3e21d4fc14954a1d5930d4dde3809',attr="Map infortmation from Lands Department")
folium.TileLayer(
tiles='https://mapapi.geodata.gov.hk/gs/api/v1.0.0/xyz/label/hk/en/wgs84/{z}/{x}/{y}.png',
attr="Map infortmation from Lands Department"
).add_to(m)
# Define a color map for clusters
unique_clusters = df['cluster'].unique()
colors = cm.get_cmap('tab10', len(unique_clusters)) # Using 'tab10' colormap for up to 10 clusters
cluster_colors = {cluster: mcolors.to_hex(colors(i)) for i, cluster in enumerate(unique_clusters)}
# Plot original data points
for _, row in df.iterrows():
folium.CircleMarker(
location=[row['latitude'], row['longitude']],
radius=5,
color=cluster_colors[row['cluster']],
fill=True,
fill_color=cluster_colors[row['cluster']],
fill_opacity=0.7,
popup=f"Temp: {row['temperature']} °C Humidity: {row['humidity']} % Feels Like: {row['feels_like']:.2f} °C Cluster: {row['cluster']}"
).add_to(m)
# Calculate the average temperature for each cluster
cluster_centers = df.groupby('cluster').agg({
'latitude': 'mean',
'longitude': 'mean',
'temperature': 'mean'
}).reset_index()
# Plot cluster centers
for _, row in cluster_centers.iterrows():
folium.Marker(
location=[row['latitude'], row['longitude']],
icon=DivIcon(
icon_size=(150,36),
icon_anchor=(85, 20), # Adjusted anchor position to move text away from the point
html=f'
{row["temperature"]:.2f} °C
'
),
popup=f"Cluster: {row['cluster']} Avg Temp: {row['temperature']:.2f} °C"
).add_to(m)
# Display the map in Streamlit
st_folium(m, use_container_width=True , height=650)
# Column 3: Data Table
with col3:
st.markdown(
"""
""",
unsafe_allow_html=True
)
# Display the DataFrame
st.dataframe(df[['latitude', 'longitude', 'temperature', 'humidity', 'feels_like', 'cluster']], height=600)