Spaces:

SR05
/

Delhi_Irish_visa_decisions

Running

App Files Files Community

SR05 commited on Nov 24, 2024

Commit

065e3e9

verified ·

1 Parent(s): aadefa7

Update loading_file.py

Browse files

Files changed (1) hide show

loading_file.py +42 -26

loading_file.py CHANGED Viewed

@@ -1,29 +1,45 @@
 import streamlit as st
-import pandas as pd
-# Store cleaned dataset globally for access in other steps
-cleaned_data = None
-def load_and_clean_data(ods_file, file_name):
-    global cleaned_data  # To make it accessible in other files
-    # Load the dataset and clean it as done before
-    df = pd.read_excel(ods_file, engine='odf')
-    df.drop(columns=["Unnamed: 0", "Unnamed: 1"], inplace=True, errors='ignore')
-    df.dropna(how='all', inplace=True)
-    df.reset_index(drop=True, inplace=True)
-    # Clean column names
-    for idx, row in df.iterrows():
-        if row['Unnamed: 2'] == 'Application Number' and row['Unnamed: 3'] == 'Decision':
-            df.columns = ['Application Number', 'Decision']
-            df = df.iloc[idx + 1:]
-            break
-    df.reset_index(drop=True, inplace=True)
-    df['Application Number'] = df['Application Number'].astype(str)
-    # Save the cleaned data globally
-    cleaned_data = df
-    # Display success
-    st.success(f"Data successfully loaded and cleaned: {file_name}")
-    return df

+import requests
+from bs4 import BeautifulSoup
+from io import BytesIO
 import streamlit as st
+# URL of the website to scrape
+url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
+# Headers for the HTTP request
+headers = {
+    "User-Agent": (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+        "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
+    )
+}
+@st.cache_data(ttl=3600, max_entries=1)
+def load_data_file():
+    response = requests.get(url, headers=headers)
+    if response.status_code == 200:
+        soup = BeautifulSoup(response.content, 'html.parser')
+        links = soup.find_all('a')
+        # Look for the link to the .ods file
+        file_url = None
+        file_name = None
+        for link in links:
+            link_text = link.get_text(strip=True)
+            if "Visa decisions made from 1 January 2024 to" in link_text:
+                file_url = link.get('href')
+                file_name = link_text
+                break
+        if file_url:
+            if not file_url.startswith('http'):
+                file_url = requests.compat.urljoin(url, file_url)
+            file_response = requests.get(file_url, headers=headers)
+            if file_response.status_code == 200:
+                return BytesIO(file_response.content), file_name
+            else:
+                st.error(f"Failed to download the file. Status code: {file_response.status_code}")
+    else:
+        st.error(f"Failed to retrieve the webpage. Status code: {response.status_code}")
+    return None, None