Spaces:

SR05
/

Delhi_Irish_visa_decisions

Running

App Files Files Community

SR05 commited on 21 days ago

Commit

f6064cc

verified ·

1 Parent(s): 80b1886

Update loading_file.py

Browse files

Files changed (1) hide show

loading_file.py +42 -109

loading_file.py CHANGED Viewed

@@ -3,10 +3,8 @@ import pandas as pd
 import streamlit as st
 from io import BytesIO
 from bs4 import BeautifulSoup
-from fpdf import FPDF
-# Function to fetch data
-@st.cache_data(ttl=3600)
 def fetch_data():
     url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
     headers = {
@@ -15,112 +13,47 @@ def fetch_data():
             "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
         )
     }
-    # Fetch the webpage
     response = requests.get(url, headers=headers)
-    if response.status_code != 200:
-        st.error("Failed to fetch the webpage. Please try again later.")
-        return None, None
-    # Parse the HTML to find the .ods link
-    soup = BeautifulSoup(response.content, "html.parser")
-    file_url = None
-    file_name = None
-    for link in soup.find_all("a"):
-        if "Visa decisions made from 1 January 2025" in link.get_text():
-            file_url = link.get("href")
-            file_name = link.get_text().strip()
-            if not file_url.startswith("http"):
-                file_url = requests.compat.urljoin(url, file_url)
-            break
-    if not file_url or not file_name:
-        st.error("Could not find the visa decisions file link on the website.")
-        return None, None
-    # Fetch the .ods file
-    ods_response = requests.get(file_url, headers=headers)
-    if ods_response.status_code != 200:
-        st.error("Failed to download the visa decisions file.")
-        return None, None
-    # Read .ods file
-    ods_file = BytesIO(ods_response.content)
-    df = pd.read_excel(ods_file, engine="odf", header=None)  # Read without headers
-    # Detect header row (find where 'Application Number' is located)
-    header_index = df[df.astype(str).apply(lambda x: x.str.contains("Application Number", na=False)).any(axis=1)].index
-    if len(header_index) == 0:
-        st.error("Could not find the header row containing 'Application Number'. Check the file format.")
-        return None, None
-    header_index = header_index[0]  # Get the first matching row index
-    # Trim unnecessary rows and set correct header
-    df = df.iloc[header_index:].reset_index(drop=True)
-    df.columns = df.iloc[0]  # Set the first row as column headers
-    df = df[1:].reset_index(drop=True)  # Remove the header row from data
-    # Keep only relevant columns
-    if "Application Number" not in df.columns or "Decision" not in df.columns:
-        st.error("Required columns not found in the file.")
-        return None, None
-    df = df[["Application Number", "Decision"]]
-    # Ensure "Application Number" is treated as a string to match user input correctly
-    df["Application Number"] = df["Application Number"].astype(str).str.strip()
-    print("Data fetched successfully.")
-    return df, file_name
-# Fetch data once and store it globally
-precomputed_df, file_name = fetch_data()
-# Function to determine before/after status
-def check_application_status(application_number):
-    if precomputed_df is None:
-        return "Error fetching data"
-    application_number = str(application_number).strip()  # Ensure it's a string
-    min_app_number = precomputed_df["Application Number"].min()
-    max_app_number = precomputed_df["Application Number"].max()
-    if application_number in precomputed_df["Application Number"].values:
-        decision = precomputed_df.loc[precomputed_df["Application Number"] == application_number, "Decision"].values[0]
-        return f"Decision: {decision}"
-    elif application_number < min_app_number:
-        return f"Application number {application_number} is from the past. Decision might not be recorded."
-    elif application_number > max_app_number:
-        return f"Application number {application_number} is in the future. Decision is pending."
     else:
-        return "No data found."
-# Function to generate a PDF
-def generate_pdf(df, title="Visa Decisions"):
-    pdf = FPDF()
-    pdf.set_auto_page_break(auto=True, margin=15)
-    pdf.add_page()
-    pdf.set_font("Arial", size=12)
-    pdf.cell(200, 10, txt=title, ln=True, align="C")
-    pdf.ln(10)
-    for index, row in df.iterrows():
-        pdf.cell(50, 10, txt="Application Number:", ln=False)
-        pdf.cell(100, 10, txt=str(row["Application Number"]), ln=True)
-        pdf.cell(50, 10, txt="Decision:", ln=False)
-        pdf.cell(100, 10, txt=str(row["Decision"]), ln=True)
-        pdf.ln(5)
-    pdf_output = BytesIO()
-    pdf.output(pdf_output, "F")
-    pdf_output.seek(0)
-    return pdf_output
-print("Loading File Module: generate_pdf is defined.")

 import streamlit as st
 from io import BytesIO
 from bs4 import BeautifulSoup
+@st.cache_data(ttl=3600, max_entries=1)
 def fetch_data():
     url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
     headers = {
             "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
         )
     }
     response = requests.get(url, headers=headers)
+    if response.status_code == 200:
+        soup = BeautifulSoup(response.content, "html.parser")
+        # Find the link to download the file
+        file_url = None
+        links = soup.find_all('a')
+        for link in links:
+            link_text = link.get_text(strip=True)
+            if "Visa decisions made from 1 January 2025 to" in link_text:
+                file_url = link.get('href')
+                break
+        if file_url:
+            # Make the link absolute if it's relative
+            if not file_url.startswith('http'):
+                file_url = requests.compat.urljoin(url, file_url)
+            file_response = requests.get(file_url, headers=headers)
+            if file_response.status_code == 200:
+                file_data = BytesIO(file_response.content)
+                df = pd.read_excel(file_data, engine='odf')
+                # Clean up and process the DataFrame
+                df.drop(columns=["Unnamed: 0", "Unnamed: 1"], inplace=True, errors='ignore')
+                df.dropna(how='all', inplace=True)
+                df.reset_index(drop=True, inplace=True)
+                # Assuming the header row is correct
+                df.columns = ['Application Number', 'Decision']
+                # Debugging: Display the first few rows of the dataframe
+                #st.write("First few rows of the data:")
+                #st.write(df.head())
+                return df
+            else:
+                st.error("Failed to download the file.")
+        else:
+            st.error("The file link was not found on the webpage.")
     else:
+        st.error("Failed to retrieve the webpage.")
+    return None