Delhi_Irish_visa_decisions / visa_checker_with_download.py
SR05's picture
Update visa_checker_with_download.py
dd68b30 verified
import streamlit as st
import pandas as pd
import bisect
import requests
from io import BytesIO
from bs4 import BeautifulSoup
from fpdf import FPDF
# ------------------------------------------------------------------------------------
# Step 1: Load Data (Fetch and Prepare the DataFrame)
# ------------------------------------------------------------------------------------
@st.cache_data(ttl=3600)
def fetch_ods_file():
"""
Fetches the .ods file from the visa decisions website and returns its binary content.
Returns:
- A BytesIO object containing the file content if successful.
- The file name for naming convention.
- None, None if the file could not be fetched.
"""
url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
)
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
links = soup.find_all('a')
# Find the link containing the specific text
file_url = None
file_name = None
for link in links:
link_text = link.get_text(strip=True)
if "Visa decisions made from 1 January 2025 to" in link_text:
file_url = link.get('href')
file_name = link_text.replace(" ", "_").replace("/", "-") + ".ods"
break
if file_url:
# Resolve relative URLs to absolute
if not file_url.startswith("http"):
file_url = requests.compat.urljoin(url, file_url)
file_response = requests.get(file_url, headers=headers)
if file_response.status_code == 200:
return BytesIO(file_response.content), file_name
return None, None
@st.cache_data
def prepare_dataframe(file):
"""
Prepares and cleans the DataFrame from the fetched .ods file.
Args:
file: The .ods file content as BytesIO.
Returns:
A cleaned and sorted DataFrame ready for searching.
"""
df = pd.read_excel(file, engine='odf')
df.drop(columns=["Unnamed: 0", "Unnamed: 1"], inplace=True, errors="ignore")
df.dropna(how="all", inplace=True)
df.reset_index(drop=True, inplace=True)
# Identify the header row
for idx, row in df.iterrows():
if row["Unnamed: 2"] == "Application Number" and row["Unnamed: 3"] == "Decision":
df.columns = ["Application Number", "Decision"]
df = df.iloc[idx + 1:] # Skip the header row
break
# Process application numbers and sort the DataFrame
df["Application Number"] = df["Application Number"].astype(str).str.strip().astype(int)
df.sort_values(by="Application Number", inplace=True)
df.reset_index(drop=True, inplace=True)
return df
# ------------------------------------------------------------------------------------
# Step 2: Binary Search Utility for Finding Nearest Application Numbers
# ------------------------------------------------------------------------------------
def binary_search_nearest(df, target):
"""
Uses binary search to find the nearest application numbers in the DataFrame.
Args:
df: The DataFrame containing the application numbers.
target: The target application number to search for.
Returns:
Two nearest application numbers (before and after the target).
"""
application_numbers = df["Application Number"].tolist()
pos = bisect.bisect_left(application_numbers, target)
before = application_numbers[pos - 1] if pos > 0 else None
after = application_numbers[pos] if pos < len(application_numbers) else None
return before, after
# ------------------------------------------------------------------------------------
# Step 3: Export DataFrame to PDF
# ------------------------------------------------------------------------------------
def export_to_pdf(df, file_name):
"""
Converts the cleaned DataFrame to a PDF file.
Args:
df: The DataFrame to be converted to PDF.
file_name: The name of the PDF file to create.
Returns:
A BytesIO object containing the PDF file.
"""
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()
pdf.set_font("Arial", size=12)
# Title
pdf.set_font("Arial", style="B", size=16)
pdf.cell(0, 10, "Visa Decisions Data", ln=True, align="C")
pdf.ln(10)
# Table Header
pdf.set_font("Arial", style="B", size=12)
for col in df.columns:
pdf.cell(50, 10, col, border=1)
pdf.ln()
# Table Data
pdf.set_font("Arial", size=12)
for _, row in df.iterrows():
for cell in row:
pdf.cell(50, 10, str(cell), border=1)
pdf.ln()
pdf_output = BytesIO()
pdf.output(pdf_output)
pdf_output.seek(0)
return pdf_output
# ------------------------------------------------------------------------------------
# Step 4: Allow User to Download Cleaned DataFrame
# ------------------------------------------------------------------------------------
def download_dataframe(df, file_name):
"""
Provides a download link for the cleaned DataFrame as CSV or PDF.
Args:
df: The cleaned DataFrame to download.
file_name: The name of the original file for naming convention.
"""
cleaned_file_name_csv = f"{file_name.replace('.ods', '.csv')}"
csv_data = df.to_csv(index=False)
st.sidebar.download_button(
label="Download Cleaned Data as CSV",
data=csv_data,
file_name=cleaned_file_name_csv,
mime="text/csv"
)
cleaned_file_name_pdf = f"{file_name.replace('.ods', '.pdf')}"
pdf_data = export_to_pdf(df, cleaned_file_name_pdf)
st.sidebar.download_button(
label="Download Cleaned Data as PDF",
data=pdf_data,
file_name=cleaned_file_name_pdf,
mime="application/pdf"
)
# ------------------------------------------------------------------------------------
# Step 5: Search Application Status
# ------------------------------------------------------------------------------------
def search_application(df):
"""
Handles the user input and searches for the application number in the DataFrame.
Args:
df: The DataFrame containing application numbers and decisions.
"""
user_input = st.text_input("Enter your Application Number (including IRL if applicable):")
if user_input:
# Validate user input
if not user_input.isdigit() or len(user_input) < 8:
st.warning("Please enter at least 8 digits for your VISA application number.")
return
application_number = int(user_input)
# Search for the application number in the DataFrame
result = df[df["Application Number"] == application_number]
if not result.empty:
decision = result.iloc[0]["Decision"]
if decision.lower() == "refused":
st.error(f"Application Number: {application_number}\n\nDecision: **Refused**")
elif decision.lower() == "approved":
st.success(f"Application Number: {application_number}\n\nDecision: **Approved**")
else:
st.info(f"Application Number: {application_number}\n\nDecision: **{decision}**")
else:
st.warning(f"No record found for Application Number: {application_number}.")
before, after = binary_search_nearest(df, application_number)
nearest_records = pd.DataFrame({
"Nearest Application": ["Before", "After"],
"Application Number": [before, after],
"Decision": [
df[df["Application Number"] == before]["Decision"].values[0] if before else None,
df[df["Application Number"] == after]["Decision"].values[0] if after else None
],
"Difference": [
application_number - before if before else None,
after - application_number if after else None
]
}).dropna()
if not nearest_records.empty:
st.subheader("Nearest Application Numbers")
st.table(nearest_records.reset_index(drop=True))
else:
st.info("No nearest application numbers found.")
# ------------------------------------------------------------------------------------
# Main Streamlit Application Logic
# ------------------------------------------------------------------------------------
def main():
st.title("Visa Application Status Checker")
# Fetch and prepare the data
ods_file, original_file_name = fetch_ods_file()
if ods_file and original_file_name:
df = prepare_dataframe(ods_file)
if df is not None:
# Provide download option for cleaned DataFrame
st.sidebar.header("Download Options")
download_dataframe(df, original_file_name)
# Search application
search_application(df)
else:
st.error("Failed to prepare the data.")
else:
st.error("Failed to fetch the .ods file.")
if __name__ == "__main__":
main()