prithivMLmods commited on
Commit
db48f0c
1 Parent(s): a92276e

Delete standard.txt

Browse files
Files changed (1) hide show
  1. standard.txt +0 -101
standard.txt DELETED
@@ -1,101 +0,0 @@
1
- import os
2
- import requests
3
- from bs4 import BeautifulSoup
4
- from urllib.parse import urljoin, urlparse
5
- from zipfile import ZipFile
6
- from io import BytesIO
7
- import gradio as gr
8
-
9
- def download_file(url, session):
10
- """Download a file and return its content."""
11
- try:
12
- response = session.get(url)
13
- response.raise_for_status()
14
- return response.content
15
- except requests.exceptions.RequestException as e:
16
- print(f"Error downloading {url}: {e}")
17
- return None
18
-
19
- def save_webpage_as_zip(url):
20
- """Save a webpage and its assets as a ZIP file."""
21
- session = requests.Session()
22
- response = session.get(url)
23
- response.raise_for_status()
24
-
25
- soup = BeautifulSoup(response.content, 'html.parser')
26
- temp_dir = 'temp_webpage'
27
- if not os.path.exists(temp_dir):
28
- os.makedirs(temp_dir)
29
-
30
- main_html_path = os.path.join(temp_dir, 'index.html')
31
- with open(main_html_path, 'wb') as f:
32
- f.write(response.content)
33
- assets = []
34
- for tag in soup.find_all(['img', 'link', 'script']):
35
- if tag.name == 'img' and tag.get('src'):
36
- assets.append(tag['src'])
37
- elif tag.name == 'link' and tag.get('href'):
38
- assets.append(tag['href'])
39
- elif tag.name == 'script' and tag.get('src'):
40
- assets.append(tag['src'])
41
-
42
- # Download and save all assets
43
- for asset in assets:
44
- asset_url = urljoin(url, asset)
45
- asset_path = urlparse(asset_url).path.lstrip('/')
46
- asset_full_path = os.path.join(temp_dir, asset_path)
47
-
48
- if asset_path.endswith('/'):
49
- print(f"Skipping directory {asset_full_path}")
50
- continue
51
-
52
-
53
- os.makedirs(os.path.dirname(asset_full_path), exist_ok=True)
54
-
55
-
56
- content = download_file(asset_url, session)
57
- if content:
58
- if os.path.isdir(asset_full_path):
59
- print(f"Skipping directory {asset_full_path}")
60
- continue
61
- with open(asset_full_path, 'wb') as f:
62
- f.write(content)
63
-
64
- zip_buffer = BytesIO()
65
- with ZipFile(zip_buffer, 'w') as zipf:
66
- for root, _, files in os.walk(temp_dir):
67
- for file in files:
68
- file_path = os.path.join(root, file)
69
- zipf.write(file_path, os.path.relpath(file_path, temp_dir))
70
-
71
- for root, _, files in os.walk(temp_dir, topdown=False):
72
- for file in files:
73
- os.remove(os.path.join(root, file))
74
- os.rmdir(root)
75
-
76
- zip_buffer.seek(0)
77
- return zip_buffer
78
-
79
- def generate_zip_file(url):
80
- """Generate ZIP file from a webpage URL."""
81
- zip_buffer = save_webpage_as_zip(url)
82
- temp_zip_path = "webpage.zip"
83
- with open(temp_zip_path, 'wb') as f:
84
- f.write(zip_buffer.read())
85
- return temp_zip_path
86
-
87
- with gr.Blocks(theme="bethecloud/storj_theme") as demo:
88
- gr.Markdown("## Webpage to ZIP Downloader 🔗")
89
- gr.Markdown("Enter a URL to download the webpage and its assets as a ZIP file.")
90
-
91
- url_input = gr.Textbox(label="Website URL", placeholder="Enter a URL (e.g., https://www.example.com)")
92
-
93
- download_button = gr.Button("Download as ZIP")
94
- output_file = gr.File(label="Download")
95
-
96
- def set_example_url(url):
97
- url_input.value = url
98
-
99
- download_button.click(fn=generate_zip_file, inputs=url_input, outputs=output_file)
100
-
101
- demo.launch()