CSDI-Weather / pages /5_Past Records.py
OttoYu's picture
Upload 9 files
d4bea00 verified
import streamlit as st
import pandas as pd
import plotly.express as px
import json
import os
import glob
# Directory paths
data_dir = 'past_temp'
geojson_file = os.path.join(data_dir, 'FavgTS.geojson')
# Load GeoJSON data
def load_geojson():
with open(geojson_file) as f:
return json.load(f)
# Create a dictionary to map short forms to long forms
def create_station_map(geojson):
feature_map = {}
for feature in geojson['features']:
short_name = feature['properties']['WeatherStationShortName']
long_name = feature['properties']['WeatherStationName_en']
feature_map[short_name] = long_name
return feature_map
# Load CSV files
def load_csv_files():
return glob.glob(os.path.join(data_dir, '*.csv'))
# Plot time series
def plot_time_series(df, station_name):
df['Date'] = pd.to_datetime(df['Date'], format='%Y%m%d', errors='coerce')
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
fig_all_years = px.line(df, x='Date', y='Value', color='Year',
title=f'All-Year Temperature Time Series for {station_name}',
labels={'Date': 'Date', 'Value': 'Temperature (°C)', 'Year': 'Year'},
line_shape='linear')
fig_all_years.update_layout(xaxis_title='Date', yaxis_title='Temperature (°C)')
return fig_all_years
# Plot monthly averages
def plot_monthly_averages(df, station_name):
df['Date'] = pd.to_datetime(df['Date'], format='%Y%m%d', errors='coerce')
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
monthly_avg = df.groupby(['Year', 'Month'])['Value'].mean().reset_index()
fig_monthly_avg = px.line(monthly_avg, x='Month', y='Value', color='Year',
title=f'Monthly Average Temperature Time Series for {station_name}',
labels={'Month': 'Month', 'Value': 'Average Temperature (°C)', 'Year': 'Year'},
line_shape='linear')
fig_monthly_avg.update_layout(xaxis_title='Month', yaxis_title='Average Temperature (°C)', xaxis_tickformat='%b')
return fig_monthly_avg
def plot_annual_average(df, station_name):
annual_avg = df.groupby('Year')['Value'].mean().reset_index()
fig_annual_avg = px.line(annual_avg, x='Year', y='Value',
title=f'Annual Average Temperature Trend for {station_name}',
labels={'Year': 'Year', 'Value': 'Average Temperature (°C)'},
line_shape='linear')
fig_annual_avg.update_layout(xaxis_title='Year', yaxis_title='Average Temperature (°C)')
return fig_annual_avg
# Streamlit app layout
st.set_page_config(layout="wide", page_title="Temperature Time Series")
# Load GeoJSON and create mapping
geojson = load_geojson()
station_map = create_station_map(geojson)
# Load all CSV files
csv_files = load_csv_files()
# Initialize data storage for all CSV files
all_data = []
# Process each CSV file
for file in csv_files:
try:
file_name = os.path.basename(file)
short_form = file_name.split('.')[0] # Get the file name without extension
df = pd.read_csv(file)
if df.shape[1] < 2:
st.error(f"File {file} does not have the expected number of columns. Skipping.")
continue
if df.columns[0] != 'Date':
df.columns = ['Date', 'Value']
long_form = station_map.get(short_form, "Unknown Station")
df['Station'] = long_form
all_data.append(df)
except Exception as e:
st.error(f"Error loading or processing file {file}: {e}")
# Combine all data into a single DataFrame
if all_data:
combined_df = pd.concat(all_data, ignore_index=True)
combined_df['Date'] = pd.to_datetime(combined_df['Date'], format='%Y%m%d', errors='coerce')
combined_df = combined_df.dropna(subset=['Date'])
combined_df['Year'] = combined_df['Date'].dt.year
combined_df['Month'] = combined_df['Date'].dt.month
stations = combined_df['Station'].unique()
default_station = stations[0] if len(stations) > 0 else None
if not stations.size:
st.write("No stations available in the data.")
else:
st.subheader('Past Daily Average Temperature Time Series')
selected_station = st.selectbox("Select a Station", options=stations, index=0)
station_data = combined_df[combined_df['Station'] == selected_station]
if not station_data.empty:
# Create two columns for plots
col1, col2 = st.columns([2,1.5])
# Top plot: All-year time series
with col1:
fig_all_years = plot_time_series(station_data, selected_station)
st.plotly_chart(fig_all_years, use_container_width=True)
# Bottom plot: Monthly average temperatures
with col2:
fig_monthly_avg = plot_monthly_averages(station_data, selected_station)
st.plotly_chart(fig_monthly_avg, use_container_width=True)
else:
st.write(f"No data available for the selected station '{selected_station}'.")
else:
st.write("No data to display.")