nakas's picture
Update app.py
7f2f72d verified
import gradio as gr
import pandas as pd
import numpy as np
import re
from playwright.sync_api import sync_playwright
import time
import os
import subprocess
import sys
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from windrose import WindroseAxes
from datetime import datetime
# Install Playwright browsers on startup
def install_playwright_browsers():
try:
if not os.path.exists('/home/user/.cache/ms-playwright'):
print("Installing Playwright browsers...")
subprocess.run(
[sys.executable, "-m", "playwright", "install", "chromium"],
check=True,
capture_output=True,
text=True
)
print("Playwright browsers installed successfully")
except Exception as e:
print(f"Error installing browsers: {e}")
# Install browsers when the module loads
install_playwright_browsers()
def scrape_weather_data(site_id, hours=720):
"""Scrape weather data from weather.gov timeseries"""
url = f"https://www.weather.gov/wrh/timeseries?site={site_id}&hours={hours}&units=english&chart=on&headers=on&obs=tabular&hourly=false&pview=full&font=12&plot="
try:
with sync_playwright() as p:
browser = p.chromium.launch(
headless=True,
args=['--no-sandbox', '--disable-dev-shm-usage']
)
context = browser.new_context(
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
)
page = context.new_page()
response = page.goto(url)
print(f"Response status: {response.status}")
page.wait_for_selector('table', timeout=30000)
time.sleep(5)
print("Extracting data...")
content = page.evaluate('''() => {
const getTextContent = () => {
const rows = [];
const tables = document.getElementsByTagName('table');
for (const table of tables) {
if (table.textContent.includes('Date/Time')) {
const headerRow = Array.from(table.querySelectorAll('th'))
.map(th => th.textContent.trim());
const dataRows = Array.from(table.querySelectorAll('tbody tr'))
.map(row => Array.from(row.querySelectorAll('td'))
.map(td => td.textContent.trim()));
return {headers: headerRow, rows: dataRows};
}
}
return null;
};
return getTextContent();
}''')
print(f"Found {len(content['rows'] if content else [])} rows of data")
browser.close()
return content
except Exception as e:
print(f"Error scraping data: {str(e)}")
raise e
def parse_date(date_str):
"""Parse date string to datetime"""
try:
current_year = datetime.now().year
return pd.to_datetime(f"{date_str}, {current_year}", format="%b %d, %I:%M %p, %Y")
except:
return pd.NaT
def parse_weather_data(data):
"""Parse the weather data into a pandas DataFrame"""
if not data or 'rows' not in data:
raise ValueError("No valid weather data found")
df = pd.DataFrame(data['rows'])
columns = ['datetime', 'temp', 'dew_point', 'humidity', 'wind_chill',
'wind_dir', 'wind_speed', 'snow_depth', 'snowfall_3hr',
'snowfall_6hr', 'snowfall_24hr', 'swe']
df = df.iloc[:, :12]
df.columns = columns
numeric_cols = ['temp', 'dew_point', 'humidity', 'wind_chill', 'snow_depth',
'snowfall_3hr', 'snowfall_6hr', 'snowfall_24hr', 'swe']
for col in numeric_cols:
df[col] = pd.to_numeric(df[col], errors='coerce')
def parse_wind(x):
if pd.isna(x): return np.nan, np.nan
match = re.search(r'(\d+)G(\d+)', str(x))
if match:
return float(match.group(1)), float(match.group(2))
try:
return float(x), np.nan
except:
return np.nan, np.nan
wind_data = df['wind_speed'].apply(parse_wind)
df['wind_speed'] = wind_data.apply(lambda x: x[0])
df['wind_gust'] = wind_data.apply(lambda x: x[1])
def parse_direction(direction):
direction_map = {
'N': 0, 'NNE': 22.5, 'NE': 45, 'ENE': 67.5,
'E': 90, 'ESE': 112.5, 'SE': 135, 'SSE': 157.5,
'S': 180, 'SSW': 202.5, 'SW': 225, 'WSW': 247.5,
'W': 270, 'WNW': 292.5, 'NW': 315, 'NNW': 337.5
}
return direction_map.get(direction, np.nan)
df['wind_dir_deg'] = df['wind_dir'].apply(parse_direction)
df['datetime'] = df['datetime'].apply(parse_date)
df['date'] = df['datetime'].dt.date
return df
def calculate_total_new_snow(df):
"""
Calculate total new snow by:
1. Using ONLY the 3-hour snowfall amounts
2. Using 9 AM as the daily reset point
3. Filtering out obvious anomalies (>9 inches in 3 hours)
"""
# Sort by datetime to ensure correct calculation
df = df.sort_values('datetime')
# Create a copy of the dataframe with ONLY datetime and 3-hour snowfall
snow_df = df[['datetime', 'snowfall_3hr']].copy()
# Create a day group that starts at 9 AM instead of midnight
snow_df['day_group'] = snow_df['datetime'].apply(
lambda x: x.date() if x.hour >= 9 else (x - pd.Timedelta(days=1)).date()
)
def process_daily_snow(group):
"""Sum up ONLY the 3-hour snowfall amounts for each day period"""
# Sort by time to ensure proper sequence
group = group.sort_values('datetime')
# Sum only the valid 3-hour amounts, treating NaN as 0
valid_amounts = group['snowfall_3hr'].fillna(0)
daily_total = valid_amounts.sum()
return daily_total
def create_plots(df):
"""Create all weather plots including SWE estimates"""
# Create figure with adjusted height and spacing
fig = plt.figure(figsize=(20, 24))
# Calculate height ratios for different plots
height_ratios = [1, 1, 1, 1, 1] # Equal height for all plots
gs = GridSpec(5, 1, figure=fig, height_ratios=height_ratios)
gs.update(hspace=0.4) # Increase vertical spacing between plots
# Temperature plot
ax1 = fig.add_subplot(gs[0])
ax1.plot(df['datetime'], df['temp'], label='Temperature', color='red')
ax1.plot(df['datetime'], df['wind_chill'], label='Wind Chill', color='blue')
ax1.set_title('Temperature and Wind Chill Over Time', pad=20)
ax1.set_xlabel('Date')
ax1.set_ylabel('Temperature (°F)')
ax1.legend()
ax1.grid(True)
ax1.tick_params(axis='x', rotation=45)
# Wind speed plot
ax2 = fig.add_subplot(gs[1])
ax2.plot(df['datetime'], df['wind_speed'], label='Wind Speed', color='blue')
ax2.plot(df['datetime'], df['wind_gust'], label='Wind Gust', color='orange')
ax2.set_title('Wind Speed and Gusts Over Time', pad=20)
ax2.set_xlabel('Date')
ax2.set_ylabel('Wind Speed (mph)')
ax2.legend()
ax2.grid(True)
ax2.tick_params(axis='x', rotation=45)
# Snow depth plot
ax3 = fig.add_subplot(gs[2])
ax3.plot(df['datetime'], df['snow_depth'], color='blue', label='Snow Depth')
ax3.set_title('Snow Depth Over Time', pad=20)
ax3.set_xlabel('Date')
ax3.set_ylabel('Snow Depth (inches)')
ax3.grid(True)
ax3.tick_params(axis='x', rotation=45)
# Daily new snow bar plot
ax4 = fig.add_subplot(gs[3])
snow_df = df[['datetime', 'snowfall_3hr']].copy()
snow_df['day_group'] = snow_df['datetime'].apply(
lambda x: x.date() if x.hour >= 9 else (x - pd.Timedelta(days=1)).date()
)
daily_snow = snow_df.groupby('day_group').apply(process_daily_snow).reset_index()
daily_snow.columns = ['date', 'new_snow']
# Create the bar plot
ax4.bar(daily_snow['date'], daily_snow['new_snow'], color='blue')
ax4.set_title('Daily New Snow (Sum of 3-hour amounts, 9 AM Reset)', pad=20)
ax4.set_xlabel('Date')
ax4.set_ylabel('New Snow (inches)')
ax4.tick_params(axis='x', rotation=45)
ax4.grid(True, axis='y', linestyle='--', alpha=0.7)
# Add value labels on top of each bar
for i, v in enumerate(daily_snow['new_snow']):
if v > 0: # Only label bars with snow
ax4.text(i, v, f'{v:.1f}"', ha='center', va='bottom')
# SWE bar plot
ax5 = fig.add_subplot(gs[4])
daily_swe = df.groupby('date')['swe'].mean()
ax5.bar(daily_swe.index, daily_swe.values, color='lightblue')
ax5.set_title('Snow/Water Equivalent', pad=20)
ax5.set_xlabel('Date')
ax5.set_ylabel('SWE (inches)')
ax5.tick_params(axis='x', rotation=45)
# Adjust layout
plt.subplots_adjust(top=0.95, bottom=0.05, left=0.1, right=0.95)
# Create separate wind rose figure
fig_rose = plt.figure(figsize=(10, 10))
ax_rose = WindroseAxes.from_ax(fig=fig_rose)
create_wind_rose(df, ax_rose)
fig_rose.subplots_adjust(top=0.95, bottom=0.05, left=0.1, right=0.95)
return fig, fig_rose
def process_daily_snow(group):
"""Sum up ONLY the 3-hour snowfall amounts for each day period"""
# Sort by time to ensure proper sequence
group = group.sort_values('datetime')
# Print debugging information
print(f"\nSnowfall amounts for {group['day_group'].iloc[0]}:")
for _, row in group.iterrows():
if pd.notna(row['snowfall_3hr']):
print(f"{row['datetime'].strftime('%Y-%m-%d %H:%M')}: {row['snowfall_3hr']} inches")
# Sum only the valid 3-hour amounts, treating NaN as 0
valid_amounts = group['snowfall_3hr'].fillna(0)
daily_total = valid_amounts.sum()
print(f"Daily total: {daily_total} inches")
return daily_total
# Calculate daily snow totals
daily_totals = snow_df.groupby('day_group').apply(process_daily_snow)
return daily_totals.sum()
def create_wind_rose(df, ax):
"""Create a wind rose plot"""
if not isinstance(ax, WindroseAxes):
ax = WindroseAxes.from_ax(ax=ax)
ax.bar(df['wind_dir_deg'].dropna(), df['wind_speed'].dropna(),
bins=np.arange(0, 40, 5), normed=True, opening=0.8, edgecolor='white')
ax.set_legend(title='Wind Speed (mph)')
ax.set_title('Wind Rose')
def create_plots(df):
"""Create all weather plots including SWE estimates"""
# Create figure with adjusted height and spacing
fig = plt.figure(figsize=(20, 24))
# Calculate height ratios for different plots
height_ratios = [1, 1, 1, 1, 1] # Equal height for all plots
gs = GridSpec(5, 1, figure=fig, height_ratios=height_ratios)
gs.update(hspace=0.4) # Increase vertical spacing between plots
# Temperature plot
ax1 = fig.add_subplot(gs[0])
ax1.plot(df['datetime'], df['temp'], label='Temperature', color='red')
ax1.plot(df['datetime'], df['wind_chill'], label='Wind Chill', color='blue')
ax1.set_title('Temperature and Wind Chill Over Time', pad=20)
ax1.set_xlabel('Date')
ax1.set_ylabel('Temperature (°F)')
ax1.legend()
ax1.grid(True)
ax1.tick_params(axis='x', rotation=45)
# Wind speed plot
ax2 = fig.add_subplot(gs[1])
ax2.plot(df['datetime'], df['wind_speed'], label='Wind Speed', color='blue')
ax2.plot(df['datetime'], df['wind_gust'], label='Wind Gust', color='orange')
ax2.set_title('Wind Speed and Gusts Over Time', pad=20)
ax2.set_xlabel('Date')
ax2.set_ylabel('Wind Speed (mph)')
ax2.legend()
ax2.grid(True)
ax2.tick_params(axis='x', rotation=45)
# Snow depth plot
ax3 = fig.add_subplot(gs[2])
ax3.plot(df['datetime'], df['snow_depth'], color='blue', label='Snow Depth')
ax3.set_title('Snow Depth Over Time', pad=20)
ax3.set_xlabel('Date')
ax3.set_ylabel('Snow Depth (inches)')
ax3.grid(True)
ax3.tick_params(axis='x', rotation=45)
# Daily new snow bar plot
ax4 = fig.add_subplot(gs[3])
snow_df = df[['datetime', 'snowfall_3hr']].copy()
snow_df['day_group'] = snow_df['datetime'].apply(
lambda x: x.date() if x.hour >= 9 else (x - pd.Timedelta(days=1)).date()
)
daily_snow = snow_df.groupby('day_group').apply(process_daily_snow).reset_index()
daily_snow.columns = ['date', 'new_snow']
ax4.bar(daily_snow['date'], daily_snow['new_snow'], color='blue')
ax4.set_title('Daily New Snow (Sum of 3-hour amounts, 9 AM Reset)', pad=20)
ax4.set_xlabel('Date')
ax4.set_ylabel('New Snow (inches)')
ax4.tick_params(axis='x', rotation=45)
ax4.grid(True, axis='y', linestyle='--', alpha=0.7)
# Add value labels on top of each bar
for i, v in enumerate(daily_snow['new_snow']):
if v > 0: # Only label bars with snow
ax4.text(i, v, f'{v:.1f}"', ha='center', va='bottom')
# SWE bar plot
ax5 = fig.add_subplot(gs[4])
daily_swe = df.groupby('date')['swe'].mean()
ax5.bar(daily_swe.index, daily_swe.values, color='lightblue')
ax5.set_title('Snow/Water Equivalent', pad=20)
ax5.set_xlabel('Date')
ax5.set_ylabel('SWE (inches)')
ax5.tick_params(axis='x', rotation=45)
# Adjust layout
plt.subplots_adjust(top=0.95, bottom=0.05, left=0.1, right=0.95)
# Create separate wind rose figure
fig_rose = plt.figure(figsize=(10, 10))
ax_rose = WindroseAxes.from_ax(fig=fig_rose)
create_wind_rose(df, ax_rose)
fig_rose.subplots_adjust(top=0.95, bottom=0.05, left=0.1, right=0.95)
return fig, fig_rose
def analyze_weather_data(site_id, hours):
"""Analyze weather data and create visualizations"""
try:
print(f"Scraping data for {site_id}...")
raw_data = scrape_weather_data(site_id, hours)
if not raw_data:
return "Error: Could not retrieve weather data.", None, None
print("Parsing data...")
df = parse_weather_data(raw_data)
# Calculate total new snow using the new method
total_new_snow = calculate_total_new_snow(df)
current_swe = df['swe'].iloc[0] # Get most recent SWE measurement
print("Calculating statistics...")
stats = {
'Temperature Range': f"{df['temp'].min():.1f}°F to {df['temp'].max():.1f}°F",
'Average Temperature': f"{df['temp'].mean():.1f}°F",
'Max Wind Speed': f"{df['wind_speed'].max():.1f} mph",
'Max Wind Gust': f"{df['wind_gust'].max():.1f} mph",
'Average Humidity': f"{df['humidity'].mean():.1f}%",
'Current Snow Depth': f"{df['snow_depth'].iloc[0]:.1f} inches",
'Total New Snow': f"{total_new_snow:.1f} inches",
'Current Snow/Water Equivalent': f"{current_swe:.2f} inches"
}
html_output = "<div style='font-size: 16px; line-height: 1.5;'>"
html_output += f"<p><strong>Weather Station:</strong> {site_id}</p>"
html_output += f"<p><strong>Data Range:</strong> {df['datetime'].min().strftime('%Y-%m-%d %H:%M')} to {df['datetime'].max().strftime('%Y-%m-%d %H:%M')}</p>"
for key, value in stats.items():
html_output += f"<p><strong>{key}:</strong> {value}</p>"
html_output += "</div>"
print("Creating plots...")
main_plots, wind_rose = create_plots(df)
return html_output, main_plots, wind_rose
except Exception as e:
print(f"Error in analysis: {str(e)}")
return f"Error analyzing data: {str(e)}", None, None
# Create Gradio interface
with gr.Blocks(title="Weather Station Data Analyzer") as demo:
gr.Markdown("# Weather Station Data Analyzer")
gr.Markdown("""
Enter a weather station ID and number of hours to analyze.
Example station IDs:
- YCTIM (Yellowstone Club - Timber)
- KBZN (Bozeman Airport)
- KSLC (Salt Lake City)
""")
with gr.Row():
site_id = gr.Textbox(
label="Weather Station ID",
value="YCTIM",
placeholder="Enter station ID (e.g., YCTIM)"
)
hours = gr.Number(
label="Hours of Data",
value=720,
minimum=1,
maximum=1440
)
analyze_btn = gr.Button("Fetch and Analyze Weather Data")
with gr.Row():
stats_output = gr.HTML(label="Statistics")
with gr.Row():
weather_plots = gr.Plot(label="Weather Plots")
wind_rose = gr.Plot(label="Wind Rose")
analyze_btn.click(
fn=analyze_weather_data,
inputs=[site_id, hours],
outputs=[stats_output, weather_plots, wind_rose]
)
if __name__ == "__main__":
demo.launch()