import requests import pandas as pd import streamlit as st from io import BytesIO from bs4 import BeautifulSoup from fpdf import FPDF # Function to fetch data @st.cache_data(ttl=3600) def fetch_data(): url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/" headers = { "User-Agent": ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36" ) } # Fetch the webpage response = requests.get(url, headers=headers) if response.status_code != 200: st.error("Failed to fetch the webpage. Please try again later.") return None, None # Parse the HTML to find the .ods link soup = BeautifulSoup(response.content, "html.parser") file_url = None file_name = None for link in soup.find_all("a"): if "Visa decisions made from 1 January 2025" in link.get_text(): file_url = link.get("href") file_name = link.get_text().strip() if not file_url.startswith("http"): file_url = requests.compat.urljoin(url, file_url) break if not file_url or not file_name: st.error("Could not find the visa decisions file link on the website.") return None, None # Fetch the .ods file ods_response = requests.get(file_url, headers=headers) if ods_response.status_code != 200: st.error("Failed to download the visa decisions file.") return None, None # Read .ods file ods_file = BytesIO(ods_response.content) df = pd.read_excel(ods_file, engine="odf", header=None) # Read without headers # Detect header row (find where 'Application Number' is located) header_index = df[df.astype(str).apply(lambda x: x.str.contains("Application Number", na=False)).any(axis=1)].index if len(header_index) == 0: st.error("Could not find the header row containing 'Application Number'. Check the file format.") return None, None header_index = header_index[0] # Get the first matching row index # Trim unnecessary rows and set correct header df = df.iloc[header_index:].reset_index(drop=True) df.columns = df.iloc[0] # Set the first row as column headers df = df[1:].reset_index(drop=True) # Remove the header row from data # Keep only relevant columns if "Application Number" not in df.columns or "Decision" not in df.columns: st.error("Required columns not found in the file.") return None, None df = df[["Application Number", "Decision"]] # Ensure "Application Number" is treated as a string to match user input correctly df["Application Number"] = df["Application Number"].astype(str).str.strip() print("Data fetched successfully.") return df, file_name # Fetch data once and store it globally precomputed_df, file_name = fetch_data() # Function to determine before/after status def check_application_status(application_number): if precomputed_df is None: return "Error fetching data" application_number = str(application_number).strip() # Ensure it's a string min_app_number = precomputed_df["Application Number"].min() max_app_number = precomputed_df["Application Number"].max() if application_number in precomputed_df["Application Number"].values: decision = precomputed_df.loc[precomputed_df["Application Number"] == application_number, "Decision"].values[0] return f"Decision: {decision}" elif application_number < min_app_number: return f"Application number {application_number} is from the past. Decision might not be recorded." elif application_number > max_app_number: return f"Application number {application_number} is in the future. Decision is pending." else: return "No data found." # Function to generate a PDF def generate_pdf(df, title="Visa Decisions"): pdf = FPDF() pdf.set_auto_page_break(auto=True, margin=15) pdf.add_page() pdf.set_font("Arial", size=12) pdf.cell(200, 10, txt=title, ln=True, align="C") pdf.ln(10) for index, row in df.iterrows(): pdf.cell(50, 10, txt="Application Number:", ln=False) pdf.cell(100, 10, txt=str(row["Application Number"]), ln=True) pdf.cell(50, 10, txt="Decision:", ln=False) pdf.cell(100, 10, txt=str(row["Decision"]), ln=True) pdf.ln(5) pdf_output = BytesIO() pdf.output(pdf_output, "F") pdf_output.seek(0) return pdf_output print("Loading File Module: generate_pdf is defined.")