SR05 commited on
Commit
2298f31
·
verified ·
1 Parent(s): 80937eb

Update dataframe.py

Browse files
Files changed (1) hide show
  1. dataframe.py +65 -11
dataframe.py CHANGED
@@ -1,14 +1,68 @@
1
  import pandas as pd
2
- from loading_file import df
3
 
4
- # Precompute for faster access during searches
5
  @st.cache_data(ttl=3600)
6
- def precompute_dataframe():
7
- # Sort by application number for better nearest neighbor calculation
8
- if df is not None:
9
- df["Application Number"] = df["Application Number"].astype(int)
10
- return df.sort_values("Application Number").reset_index(drop=True)
11
- return None
12
-
13
- # Precomputed DataFrame
14
- precomputed_df = precompute_dataframe()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
+ import streamlit as st # <-- Make sure this import is present
3
 
4
+ # Cache the data loading process for efficiency
5
  @st.cache_data(ttl=3600)
6
+ def fetch_data():
7
+ # URL of the website to scrape
8
+ url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
9
+ headers = {
10
+ "User-Agent": (
11
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
12
+ "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
13
+ )
14
+ }
15
+
16
+ # Fetch the webpage
17
+ response = requests.get(url, headers=headers)
18
+ if response.status_code != 200:
19
+ st.error("Failed to fetch the webpage. Please try again later.")
20
+ return None
21
+
22
+ # Parse the HTML to find the .ods link
23
+ soup = BeautifulSoup(response.content, "html.parser")
24
+ file_url = None
25
+ for link in soup.find_all("a"):
26
+ if "Visa decisions made from" in link.get_text():
27
+ file_url = link.get("href")
28
+ if not file_url.startswith("http"):
29
+ file_url = requests.compat.urljoin(url, file_url)
30
+ break
31
+
32
+ if not file_url:
33
+ st.error("Could not find the visa decisions file link on the website.")
34
+ return None
35
+
36
+ # Fetch the .ods file
37
+ ods_response = requests.get(file_url, headers=headers)
38
+ if ods_response.status_code != 200:
39
+ st.error("Failed to download the visa decisions file.")
40
+ return None
41
+
42
+ # Process the .ods file
43
+ ods_file = BytesIO(ods_response.content)
44
+ df = pd.read_excel(ods_file, engine="odf")
45
+
46
+ # Print columns to inspect what they look like
47
+ print("Columns before cleaning:", df.columns.tolist()) # For debugging purposes
48
+
49
+ # Drop unnecessary columns
50
+ df.dropna(how="all", inplace=True) # Drop rows with all NaN values
51
+ df.reset_index(drop=True, inplace=True)
52
+
53
+ # Print columns after cleaning
54
+ print("Columns after cleaning:", df.columns.tolist()) # For debugging purposes
55
+
56
+ # If we have extra columns, drop them
57
+ if len(df.columns) > 2:
58
+ df = df.iloc[:, :2] # Keep only the first two columns
59
+
60
+ # Rename columns if they match the expected ones
61
+ if len(df.columns) == 2:
62
+ df.columns = ["Application Number", "Decision"]
63
+ else:
64
+ st.error("Insufficient data columns detected.")
65
+ return None
66
+
67
+ df["Application Number"] = df["Application Number"].astype(str)
68
+ return df