SR05 commited on
Commit
f6064cc
·
verified ·
1 Parent(s): 80b1886

Update loading_file.py

Browse files
Files changed (1) hide show
  1. loading_file.py +42 -109
loading_file.py CHANGED
@@ -3,10 +3,8 @@ import pandas as pd
3
  import streamlit as st
4
  from io import BytesIO
5
  from bs4 import BeautifulSoup
6
- from fpdf import FPDF
7
 
8
- # Function to fetch data
9
- @st.cache_data(ttl=3600)
10
  def fetch_data():
11
  url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
12
  headers = {
@@ -15,112 +13,47 @@ def fetch_data():
15
  "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
16
  )
17
  }
18
-
19
- # Fetch the webpage
20
  response = requests.get(url, headers=headers)
21
- if response.status_code != 200:
22
- st.error("Failed to fetch the webpage. Please try again later.")
23
- return None, None
24
-
25
- # Parse the HTML to find the .ods link
26
- soup = BeautifulSoup(response.content, "html.parser")
27
- file_url = None
28
- file_name = None
29
- for link in soup.find_all("a"):
30
- if "Visa decisions made from 1 January 2025" in link.get_text():
31
- file_url = link.get("href")
32
- file_name = link.get_text().strip()
33
- if not file_url.startswith("http"):
34
- file_url = requests.compat.urljoin(url, file_url)
35
- break
36
-
37
- if not file_url or not file_name:
38
- st.error("Could not find the visa decisions file link on the website.")
39
- return None, None
40
-
41
- # Fetch the .ods file
42
- ods_response = requests.get(file_url, headers=headers)
43
- if ods_response.status_code != 200:
44
- st.error("Failed to download the visa decisions file.")
45
- return None, None
46
-
47
- # Read .ods file
48
- ods_file = BytesIO(ods_response.content)
49
- df = pd.read_excel(ods_file, engine="odf", header=None) # Read without headers
50
-
51
- # Detect header row (find where 'Application Number' is located)
52
- header_index = df[df.astype(str).apply(lambda x: x.str.contains("Application Number", na=False)).any(axis=1)].index
53
- if len(header_index) == 0:
54
- st.error("Could not find the header row containing 'Application Number'. Check the file format.")
55
- return None, None
56
-
57
- header_index = header_index[0] # Get the first matching row index
58
-
59
- # Trim unnecessary rows and set correct header
60
- df = df.iloc[header_index:].reset_index(drop=True)
61
- df.columns = df.iloc[0] # Set the first row as column headers
62
- df = df[1:].reset_index(drop=True) # Remove the header row from data
63
-
64
- # Keep only relevant columns
65
- if "Application Number" not in df.columns or "Decision" not in df.columns:
66
- st.error("Required columns not found in the file.")
67
- return None, None
68
-
69
- df = df[["Application Number", "Decision"]]
70
-
71
- # Ensure "Application Number" is treated as a string to match user input correctly
72
- df["Application Number"] = df["Application Number"].astype(str).str.strip()
73
-
74
- print("Data fetched successfully.")
75
 
76
- return df, file_name
77
-
78
- # Fetch data once and store it globally
79
- precomputed_df, file_name = fetch_data()
80
-
81
- # Function to determine before/after status
82
- def check_application_status(application_number):
83
- if precomputed_df is None:
84
- return "Error fetching data"
85
-
86
- application_number = str(application_number).strip() # Ensure it's a string
87
-
88
- min_app_number = precomputed_df["Application Number"].min()
89
- max_app_number = precomputed_df["Application Number"].max()
90
-
91
- if application_number in precomputed_df["Application Number"].values:
92
- decision = precomputed_df.loc[precomputed_df["Application Number"] == application_number, "Decision"].values[0]
93
- return f"Decision: {decision}"
94
- elif application_number < min_app_number:
95
- return f"Application number {application_number} is from the past. Decision might not be recorded."
96
- elif application_number > max_app_number:
97
- return f"Application number {application_number} is in the future. Decision is pending."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  else:
99
- return "No data found."
100
-
101
- # Function to generate a PDF
102
- def generate_pdf(df, title="Visa Decisions"):
103
- pdf = FPDF()
104
- pdf.set_auto_page_break(auto=True, margin=15)
105
- pdf.add_page()
106
- pdf.set_font("Arial", size=12)
107
-
108
- pdf.cell(200, 10, txt=title, ln=True, align="C")
109
- pdf.ln(10)
110
-
111
- for index, row in df.iterrows():
112
- pdf.cell(50, 10, txt="Application Number:", ln=False)
113
- pdf.cell(100, 10, txt=str(row["Application Number"]), ln=True)
114
-
115
- pdf.cell(50, 10, txt="Decision:", ln=False)
116
- pdf.cell(100, 10, txt=str(row["Decision"]), ln=True)
117
-
118
- pdf.ln(5)
119
-
120
- pdf_output = BytesIO()
121
- pdf.output(pdf_output, "F")
122
- pdf_output.seek(0)
123
-
124
- return pdf_output
125
-
126
- print("Loading File Module: generate_pdf is defined.")
 
3
  import streamlit as st
4
  from io import BytesIO
5
  from bs4 import BeautifulSoup
 
6
 
7
+ @st.cache_data(ttl=3600, max_entries=1)
 
8
  def fetch_data():
9
  url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
10
  headers = {
 
13
  "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
14
  )
15
  }
 
 
16
  response = requests.get(url, headers=headers)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ if response.status_code == 200:
19
+ soup = BeautifulSoup(response.content, "html.parser")
20
+ # Find the link to download the file
21
+ file_url = None
22
+ links = soup.find_all('a')
23
+ for link in links:
24
+ link_text = link.get_text(strip=True)
25
+ if "Visa decisions made from 1 January 2025 to" in link_text:
26
+ file_url = link.get('href')
27
+ break
28
+
29
+ if file_url:
30
+ # Make the link absolute if it's relative
31
+ if not file_url.startswith('http'):
32
+ file_url = requests.compat.urljoin(url, file_url)
33
+
34
+ file_response = requests.get(file_url, headers=headers)
35
+
36
+ if file_response.status_code == 200:
37
+ file_data = BytesIO(file_response.content)
38
+ df = pd.read_excel(file_data, engine='odf')
39
+
40
+ # Clean up and process the DataFrame
41
+ df.drop(columns=["Unnamed: 0", "Unnamed: 1"], inplace=True, errors='ignore')
42
+ df.dropna(how='all', inplace=True)
43
+ df.reset_index(drop=True, inplace=True)
44
+
45
+ # Assuming the header row is correct
46
+ df.columns = ['Application Number', 'Decision']
47
+
48
+ # Debugging: Display the first few rows of the dataframe
49
+ #st.write("First few rows of the data:")
50
+ #st.write(df.head())
51
+
52
+ return df
53
+ else:
54
+ st.error("Failed to download the file.")
55
+ else:
56
+ st.error("The file link was not found on the webpage.")
57
  else:
58
+ st.error("Failed to retrieve the webpage.")
59
+ return None