File size: 3,945 Bytes
98dd89a aa0cd67 42114b1 e762ee4 98dd89a 10c3ba5 e762ee4 10c3ba5 f609061 2298f31 10c3ba5 2298f31 10c3ba5 2298f31 c5cba5d 2298f31 10c3ba5 2298f31 10c3ba5 2298f31 10c3ba5 2298f31 10c3ba5 2298f31 10c3ba5 2298f31 42114b1 2298f31 42114b1 2298f31 10c3ba5 2298f31 42114b1 10c3ba5 42114b1 10c3ba5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import requests # For making HTTP requests
import pandas as pd
import streamlit as st
from io import BytesIO
from bs4 import BeautifulSoup # Add this import for BeautifulSoup
from fpdf import FPDF # For generating PDFs
# Function to fetch data
def fetch_data():
# URL of the website to scrape
url = ""
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/ Safari/537.36"
# Fetch the webpage
response = requests.get(url, headers=headers)
if response.status_code != 200:
st.error("Failed to fetch the webpage. Please try again later.")
return None, None
# Parse the HTML to find the .ods link
soup = BeautifulSoup(response.content, "html.parser")
file_url = None
file_name = None
for link in soup.find_all("a"):
if "Visa decisions made from 1 January 2025" in link.get_text():
file_url = link.get("href")
file_name = link.get_text().strip() # Extract filename from link text
if not file_url.startswith("http"):
file_url = requests.compat.urljoin(url, file_url)
if not file_url or not file_name:
st.error("Could not find the visa decisions file link on the website.")
return None, None
# Fetch the .ods file
ods_response = requests.get(file_url, headers=headers)
if ods_response.status_code != 200:
st.error("Failed to download the visa decisions file.")
return None, None
# Process the .ods file
ods_file = BytesIO(ods_response.content)
df = pd.read_excel(ods_file, engine="odf")
# Drop unnecessary columns
df.dropna(how="all", inplace=True) # Drop rows with all NaN values
df.reset_index(drop=True, inplace=True)
# Keep only the first two columns
if len(df.columns) > 2:
df = df.iloc[:, :2] # Keep only the first two columns
# Rename columns
if len(df.columns) == 2:
df.columns = ["Application Number", "Decision"]
st.error("Insufficient data columns detected.")
return None, None
df["Application Number"] = df["Application Number"].astype(str)
return df, file_name
# Fetch the data
precomputed_df, file_name = fetch_data()
# Function to generate PDF from DataFrame
def generate_pdf(dataframe, filename):
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=10)
pdf.set_font("Arial", size=12)
pdf.cell(200, 10, txt=filename, ln=True, align="C")
# Table Header
pdf.set_font("Arial", "B", 10)
pdf.cell(50, 10, "Application Number", border=1)
pdf.cell(50, 10, "Decision", border=1)
# Table Data
pdf.set_font("Arial", size=10)
for _, row in dataframe.iterrows():
pdf.cell(50, 10, row["Application Number"], border=1)
pdf.cell(50, 10, row["Decision"], border=1)
pdf_output = BytesIO()
pdf.output(pdf_output, "F")
return pdf_output
# Provide Download Options
if precomputed_df is not None and file_name:
st.success("Data fetched successfully!")
# Excel Download
excel_buffer = BytesIO()
precomputed_df.to_excel(excel_buffer, index=False, engine="openpyxl")
label="Download Excel File",
# PDF Download
pdf_buffer = generate_pdf(precomputed_df, file_name)
label="Download PDF File",