import requests import pandas as pd import streamlit as st from io import BytesIO from bs4 import BeautifulSoup @st.cache_data(ttl=3600) def fetch_data(): url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36" } st.write("🔄 Fetching webpage...") try: response = requests.get(url, headers=headers, timeout=10) # Timeout added response.raise_for_status() # Raise an error if request fails except requests.exceptions.RequestException as e: st.error(f"❌ Failed to fetch webpage: {e}") return None, None # Parse the HTML st.write("🔄 Parsing webpage...") soup = BeautifulSoup(response.content, "html.parser") file_url = None for link in soup.find_all("a"): if "Visa decisions made from 1 January 2025" in link.get_text(): file_url = link.get("href") if not file_url.startswith("http"): file_url = requests.compat.urljoin(url, file_url) break if not file_url: st.error("❌ Could not find the visa decisions file link.") return None, None st.write(f"📥 Found file link: {file_url}") # Download the .ods file try: st.write("🔄 Downloading file...") ods_response = requests.get(file_url, headers=headers, timeout=15) # Timeout added ods_response.raise_for_status() except requests.exceptions.RequestException as e: st.error(f"❌ Failed to download the file: {e}") return None, None st.write("📂 Processing file...") ods_file = BytesIO(ods_response.content) # Read Excel file try: df = pd.read_excel(ods_file, engine="odf") except Exception as e: st.error(f"❌ Error reading ODS file: {e}") return None, None # Drop empty rows df.dropna(how="all", inplace=True) df.reset_index(drop=True, inplace=True) # Find header row header_rows = df[df.iloc[:, 0].astype(str).str.contains("Application Number", na=False)].index if len(header_rows) == 0: st.error("❌ Could not find the header row. Check the file format.") return None, None header_index = header_rows[0] df = df.iloc[header_index + 1:].reset_index(drop=True) # Rename columns df.columns = ["Application Number", "Decision"] df.dropna(inplace=True) df["Application Number"] = df["Application Number"].astype(str).str.strip() st.write("✅ Data loaded successfully!") return df, "Visa Decisions Report" precomputed_df, file_name = fetch_data()