File size: 3,945 Bytes
98dd89a
aa0cd67
42114b1
e762ee4
98dd89a
10c3ba5
e762ee4
10c3ba5
f609061
2298f31
 
 
 
 
 
 
 
 
 
 
 
 
 
10c3ba5
2298f31
 
 
 
10c3ba5
2298f31
c5cba5d
2298f31
10c3ba5
2298f31
 
 
 
10c3ba5
2298f31
10c3ba5
2298f31
 
 
 
 
10c3ba5
2298f31
 
 
 
10c3ba5
2298f31
 
 
 
42114b1
2298f31
 
 
42114b1
2298f31
 
 
 
10c3ba5
2298f31
 
42114b1
10c3ba5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42114b1
10c3ba5
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import requests  # For making HTTP requests
import pandas as pd
import streamlit as st
from io import BytesIO
from bs4 import BeautifulSoup  # Add this import for BeautifulSoup
from fpdf import FPDF  # For generating PDFs

# Function to fetch data
@st.cache_data(ttl=3600)
def fetch_data():
    # URL of the website to scrape
    url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
        )
    }

    # Fetch the webpage
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        st.error("Failed to fetch the webpage. Please try again later.")
        return None, None

    # Parse the HTML to find the .ods link
    soup = BeautifulSoup(response.content, "html.parser")
    file_url = None
    file_name = None
    for link in soup.find_all("a"):
        if "Visa decisions made from 1 January 2025" in link.get_text():
            file_url = link.get("href")
            file_name = link.get_text().strip()  # Extract filename from link text
            if not file_url.startswith("http"):
                file_url = requests.compat.urljoin(url, file_url)
            break

    if not file_url or not file_name:
        st.error("Could not find the visa decisions file link on the website.")
        return None, None

    # Fetch the .ods file
    ods_response = requests.get(file_url, headers=headers)
    if ods_response.status_code != 200:
        st.error("Failed to download the visa decisions file.")
        return None, None

    # Process the .ods file
    ods_file = BytesIO(ods_response.content)
    df = pd.read_excel(ods_file, engine="odf")

    # Drop unnecessary columns
    df.dropna(how="all", inplace=True)  # Drop rows with all NaN values
    df.reset_index(drop=True, inplace=True)

    # Keep only the first two columns
    if len(df.columns) > 2:
        df = df.iloc[:, :2]  # Keep only the first two columns
    
    # Rename columns
    if len(df.columns) == 2:
        df.columns = ["Application Number", "Decision"]
    else:
        st.error("Insufficient data columns detected.")
        return None, None

    df["Application Number"] = df["Application Number"].astype(str)

    return df, file_name

# Fetch the data
precomputed_df, file_name = fetch_data()

# Function to generate PDF from DataFrame
def generate_pdf(dataframe, filename):
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=10)
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    pdf.cell(200, 10, txt=filename, ln=True, align="C")
    pdf.ln(10)

    # Table Header
    pdf.set_font("Arial", "B", 10)
    pdf.cell(50, 10, "Application Number", border=1)
    pdf.cell(50, 10, "Decision", border=1)
    pdf.ln()

    # Table Data
    pdf.set_font("Arial", size=10)
    for _, row in dataframe.iterrows():
        pdf.cell(50, 10, row["Application Number"], border=1)
        pdf.cell(50, 10, row["Decision"], border=1)
        pdf.ln()

    pdf_output = BytesIO()
    pdf.output(pdf_output, "F")
    pdf_output.seek(0)
    return pdf_output

# Provide Download Options
if precomputed_df is not None and file_name:
    st.success("Data fetched successfully!")

    # Excel Download
    excel_buffer = BytesIO()
    precomputed_df.to_excel(excel_buffer, index=False, engine="openpyxl")
    excel_buffer.seek(0)
    st.download_button(
        label="Download Excel File",
        data=excel_buffer,
        file_name=f"{file_name}.xlsx",
        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    )

    # PDF Download
    pdf_buffer = generate_pdf(precomputed_df, file_name)
    st.download_button(
        label="Download PDF File",
        data=pdf_buffer,
        file_name=f"{file_name}.pdf",
        mime="application/pdf",
    )