SR05 commited on
Commit
065e3e9
·
verified ·
1 Parent(s): aadefa7

Update loading_file.py

Browse files
Files changed (1) hide show
  1. loading_file.py +42 -26
loading_file.py CHANGED
@@ -1,29 +1,45 @@
 
 
 
1
  import streamlit as st
2
- import pandas as pd
3
 
4
- # Store cleaned dataset globally for access in other steps
5
- cleaned_data = None
6
 
7
- def load_and_clean_data(ods_file, file_name):
8
- global cleaned_data # To make it accessible in other files
9
- # Load the dataset and clean it as done before
10
- df = pd.read_excel(ods_file, engine='odf')
11
- df.drop(columns=["Unnamed: 0", "Unnamed: 1"], inplace=True, errors='ignore')
12
- df.dropna(how='all', inplace=True)
13
- df.reset_index(drop=True, inplace=True)
14
-
15
- # Clean column names
16
- for idx, row in df.iterrows():
17
- if row['Unnamed: 2'] == 'Application Number' and row['Unnamed: 3'] == 'Decision':
18
- df.columns = ['Application Number', 'Decision']
19
- df = df.iloc[idx + 1:]
20
- break
21
- df.reset_index(drop=True, inplace=True)
22
- df['Application Number'] = df['Application Number'].astype(str)
23
-
24
- # Save the cleaned data globally
25
- cleaned_data = df
26
-
27
- # Display success
28
- st.success(f"Data successfully loaded and cleaned: {file_name}")
29
- return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ from io import BytesIO
4
  import streamlit as st
 
5
 
6
+ # URL of the website to scrape
7
+ url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
8
 
9
+ # Headers for the HTTP request
10
+ headers = {
11
+ "User-Agent": (
12
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
13
+ "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
14
+ )
15
+ }
16
+
17
+ @st.cache_data(ttl=3600, max_entries=1)
18
+ def load_data_file():
19
+ response = requests.get(url, headers=headers)
20
+ if response.status_code == 200:
21
+ soup = BeautifulSoup(response.content, 'html.parser')
22
+ links = soup.find_all('a')
23
+
24
+ # Look for the link to the .ods file
25
+ file_url = None
26
+ file_name = None
27
+ for link in links:
28
+ link_text = link.get_text(strip=True)
29
+ if "Visa decisions made from 1 January 2024 to" in link_text:
30
+ file_url = link.get('href')
31
+ file_name = link_text
32
+ break
33
+
34
+ if file_url:
35
+ if not file_url.startswith('http'):
36
+ file_url = requests.compat.urljoin(url, file_url)
37
+
38
+ file_response = requests.get(file_url, headers=headers)
39
+ if file_response.status_code == 200:
40
+ return BytesIO(file_response.content), file_name
41
+ else:
42
+ st.error(f"Failed to download the file. Status code: {file_response.status_code}")
43
+ else:
44
+ st.error(f"Failed to retrieve the webpage. Status code: {response.status_code}")
45
+ return None, None