File size: 4,692 Bytes
35e0c07
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import streamlit as st
import pandas as pd
import plotly.express as px
import json
import os
import glob

# Directory paths
data_dir = 'past_temp'
geojson_file = os.path.join(data_dir, 'FavgTS.geojson')

# Load GeoJSON data
def load_geojson():
    with open(geojson_file) as f:
        return json.load(f)

# Create a dictionary to map short forms to long forms
def create_station_map(geojson):
    feature_map = {}
    for feature in geojson['features']:
        short_name = feature['properties']['WeatherStationShortName']
        long_name = feature['properties']['WeatherStationName_en']
        feature_map[short_name] = long_name
    return feature_map

# Load CSV files
def load_csv_files():
    return glob.glob(os.path.join(data_dir, '*.csv'))

# Plot time series
def plot_time_series(df, station_name):
    df['Date'] = pd.to_datetime(df['Date'], format='%Y%m%d', errors='coerce')
    df['Year'] = df['Date'].dt.year
    df['Month'] = df['Date'].dt.month

    fig_all_years = px.line(df, x='Date', y='Value', color='Year',
                            title=f'All-Year Temperature Time Series for {station_name}',
                            labels={'Date': 'Date', 'Value': 'Temperature (°C)', 'Year': 'Year'},
                            line_shape='linear')
    fig_all_years.update_layout(xaxis_title='Date', yaxis_title='Temperature (°C)')

    return fig_all_years

# Plot monthly averages
def plot_monthly_averages(df, station_name):
    df['Date'] = pd.to_datetime(df['Date'], format='%Y%m%d', errors='coerce')
    df['Year'] = df['Date'].dt.year
    df['Month'] = df['Date'].dt.month

    monthly_avg = df.groupby(['Year', 'Month'])['Value'].mean().reset_index()

    fig_monthly_avg = px.line(monthly_avg, x='Month', y='Value', color='Year',
                              title=f'Monthly Average Temperature Time Series for {station_name}',
                              labels={'Month': 'Month', 'Value': 'Average Temperature (°C)', 'Year': 'Year'},
                              line_shape='linear')
    fig_monthly_avg.update_layout(xaxis_title='Month', yaxis_title='Average Temperature (°C)', xaxis_tickformat='%b')

    return fig_monthly_avg

# Streamlit app layout
st.set_page_config(layout="wide", page_title="Temperature Time Series")

# Load GeoJSON and create mapping
geojson = load_geojson()
station_map = create_station_map(geojson)

# Load all CSV files
csv_files = load_csv_files()

# Initialize data storage for all CSV files
all_data = []

# Process each CSV file
for file in csv_files:
    try:
        file_name = os.path.basename(file)
        short_form = file_name.split('.')[0]  # Get the file name without extension

        df = pd.read_csv(file)

        if df.shape[1] < 2:
            st.error(f"File {file} does not have the expected number of columns. Skipping.")
            continue

        if df.columns[0] != 'Date':
            df.columns = ['Date', 'Value']

        long_form = station_map.get(short_form, "Unknown Station")
        df['Station'] = long_form
        all_data.append(df)

    except Exception as e:
        st.error(f"Error loading or processing file {file}: {e}")

# Combine all data into a single DataFrame
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)
    combined_df['Date'] = pd.to_datetime(combined_df['Date'], format='%Y%m%d', errors='coerce')
    combined_df = combined_df.dropna(subset=['Date'])
    combined_df['Year'] = combined_df['Date'].dt.year
    combined_df['Month'] = combined_df['Date'].dt.month

    stations = combined_df['Station'].unique()
    default_station = stations[0] if len(stations) > 0 else None

    if not stations.size:
        st.write("No stations available in the data.")
    else:
        st.subheader('Past Daily Average Temperature Time Series')
        selected_station = st.selectbox("Select a Station", options=stations, index=0)

        station_data = combined_df[combined_df['Station'] == selected_station]

        if not station_data.empty:
            # Create two columns for plots
            col1, col2 = st.columns([2,1.5])

            # Top plot: All-year time series
            with col1:
                fig_all_years = plot_time_series(station_data, selected_station)
                st.plotly_chart(fig_all_years, use_container_width=True)

            # Bottom plot: Monthly average temperatures
            with col2:
                fig_monthly_avg = plot_monthly_averages(station_data, selected_station)
                st.plotly_chart(fig_monthly_avg, use_container_width=True)
        else:
            st.write(f"No data available for the selected station '{selected_station}'.")
else:
    st.write("No data to display.")