Spaces:

SR05
/

Delhi_Irish_visa_decisions

Running

File size: 9,456 Bytes

import streamlit as st
import pandas as pd
import bisect
import requests
from io import BytesIO
from bs4 import BeautifulSoup
from fpdf import FPDF

# ------------------------------------------------------------------------------------
# Step 1: Load Data (Fetch and Prepare the DataFrame)
# ------------------------------------------------------------------------------------

@st.cache_data(ttl=3600)
def fetch_ods_file():
    """
    Fetches the .ods file from the visa decisions website and returns its binary content.

    Returns:
        - A BytesIO object containing the file content if successful.
        - The file name for naming convention.
        - None, None if the file could not be fetched.
    """
    url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
        )
    }

    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        links = soup.find_all('a')

        # Find the link containing the specific text
        file_url = None
        file_name = None
        for link in links:
            link_text = link.get_text(strip=True)
            if "Visa decisions made from 1 January 2025 to" in link_text:
                file_url = link.get('href')
                file_name = link_text.replace(" ", "_").replace("/", "-") + ".ods"
                break

        if file_url:
            # Resolve relative URLs to absolute
            if not file_url.startswith("http"):
                file_url = requests.compat.urljoin(url, file_url)

            file_response = requests.get(file_url, headers=headers)
            if file_response.status_code == 200:
                return BytesIO(file_response.content), file_name
    return None, None

@st.cache_data
def prepare_dataframe(file):
    """
    Prepares and cleans the DataFrame from the fetched .ods file.

    Args:
        file: The .ods file content as BytesIO.

    Returns:
        A cleaned and sorted DataFrame ready for searching.
    """
    df = pd.read_excel(file, engine='odf')
    df.drop(columns=["Unnamed: 0", "Unnamed: 1"], inplace=True, errors="ignore")
    df.dropna(how="all", inplace=True)
    df.reset_index(drop=True, inplace=True)

    # Identify the header row
    for idx, row in df.iterrows():
        if row["Unnamed: 2"] == "Application Number" and row["Unnamed: 3"] == "Decision":
            df.columns = ["Application Number", "Decision"]
            df = df.iloc[idx + 1:]  # Skip the header row
            break

    # Process application numbers and sort the DataFrame
    df["Application Number"] = df["Application Number"].astype(str).str.strip().astype(int)
    df.sort_values(by="Application Number", inplace=True)
    df.reset_index(drop=True, inplace=True)

    return df

# ------------------------------------------------------------------------------------
# Step 2: Binary Search Utility for Finding Nearest Application Numbers
# ------------------------------------------------------------------------------------

def binary_search_nearest(df, target):
    """
    Uses binary search to find the nearest application numbers in the DataFrame.

    Args:
        df: The DataFrame containing the application numbers.
        target: The target application number to search for.

    Returns:
        Two nearest application numbers (before and after the target).
    """
    application_numbers = df["Application Number"].tolist()
    pos = bisect.bisect_left(application_numbers, target)

    before = application_numbers[pos - 1] if pos > 0 else None
    after = application_numbers[pos] if pos < len(application_numbers) else None

    return before, after

# ------------------------------------------------------------------------------------
# Step 3: Export DataFrame to PDF
# ------------------------------------------------------------------------------------

def export_to_pdf(df, file_name):
    """
    Converts the cleaned DataFrame to a PDF file.

    Args:
        df: The DataFrame to be converted to PDF.
        file_name: The name of the PDF file to create.

    Returns:
        A BytesIO object containing the PDF file.
    """
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    # Title
    pdf.set_font("Arial", style="B", size=16)
    pdf.cell(0, 10, "Visa Decisions Data", ln=True, align="C")
    pdf.ln(10)

    # Table Header
    pdf.set_font("Arial", style="B", size=12)
    for col in df.columns:
        pdf.cell(50, 10, col, border=1)
    pdf.ln()

    # Table Data
    pdf.set_font("Arial", size=12)
    for _, row in df.iterrows():
        for cell in row:
            pdf.cell(50, 10, str(cell), border=1)
        pdf.ln()

    pdf_output = BytesIO()
    pdf.output(pdf_output)
    pdf_output.seek(0)

    return pdf_output

# ------------------------------------------------------------------------------------
# Step 4: Allow User to Download Cleaned DataFrame
# ------------------------------------------------------------------------------------

def download_dataframe(df, file_name):
    """
    Provides a download link for the cleaned DataFrame as CSV or PDF.

    Args:
        df: The cleaned DataFrame to download.
        file_name: The name of the original file for naming convention.
    """
    cleaned_file_name_csv = f"{file_name.replace('.ods', '.csv')}"
    csv_data = df.to_csv(index=False)

    st.sidebar.download_button(
        label="Download Cleaned Data as CSV",
        data=csv_data,
        file_name=cleaned_file_name_csv,
        mime="text/csv"
    )

    cleaned_file_name_pdf = f"{file_name.replace('.ods', '.pdf')}"
    pdf_data = export_to_pdf(df, cleaned_file_name_pdf)

    st.sidebar.download_button(
        label="Download Cleaned Data as PDF",
        data=pdf_data,
        file_name=cleaned_file_name_pdf,
        mime="application/pdf"
    )

# ------------------------------------------------------------------------------------
# Step 5: Search Application Status
# ------------------------------------------------------------------------------------

def search_application(df):
    """
    Handles the user input and searches for the application number in the DataFrame.

    Args:
        df: The DataFrame containing application numbers and decisions.
    """
    user_input = st.text_input("Enter your Application Number (including IRL if applicable):")

    if user_input:
        # Validate user input
        if not user_input.isdigit() or len(user_input) < 8:
            st.warning("Please enter at least 8 digits for your VISA application number.")
            return

        application_number = int(user_input)

        # Search for the application number in the DataFrame
        result = df[df["Application Number"] == application_number]

        if not result.empty:
            decision = result.iloc[0]["Decision"]
            if decision.lower() == "refused":
                st.error(f"Application Number: {application_number}\n\nDecision: **Refused**")
            elif decision.lower() == "approved":
                st.success(f"Application Number: {application_number}\n\nDecision: **Approved**")
            else:
                st.info(f"Application Number: {application_number}\n\nDecision: **{decision}**")
        else:
            st.warning(f"No record found for Application Number: {application_number}.")
            before, after = binary_search_nearest(df, application_number)

            nearest_records = pd.DataFrame({
                "Nearest Application": ["Before", "After"],
                "Application Number": [before, after],
                "Decision": [
                    df[df["Application Number"] == before]["Decision"].values[0] if before else None,
                    df[df["Application Number"] == after]["Decision"].values[0] if after else None
                ],
                "Difference": [
                    application_number - before if before else None,
                    after - application_number if after else None
                ]
            }).dropna()

            if not nearest_records.empty:
                st.subheader("Nearest Application Numbers")
                st.table(nearest_records.reset_index(drop=True))
            else:
                st.info("No nearest application numbers found.")

# ------------------------------------------------------------------------------------
# Main Streamlit Application Logic
# ------------------------------------------------------------------------------------

def main():
    st.title("Visa Application Status Checker")

    # Fetch and prepare the data
    ods_file, original_file_name = fetch_ods_file()
    if ods_file and original_file_name:
        df = prepare_dataframe(ods_file)
        if df is not None:
            # Provide download option for cleaned DataFrame
            st.sidebar.header("Download Options")
            download_dataframe(df, original_file_name)

            # Search application
            search_application(df)
        else:
            st.error("Failed to prepare the data.")
    else:
        st.error("Failed to fetch the .ods file.")

if __name__ == "__main__":
    main()