prithivMLmods commited on
Commit
dd55678
1 Parent(s): 2f0546d

Update standard.txt

Browse files
Files changed (1) hide show
  1. standard.txt +101 -0
standard.txt CHANGED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from urllib.parse import urljoin, urlparse
5
+ from zipfile import ZipFile
6
+ from io import BytesIO
7
+ import gradio as gr
8
+
9
+ def download_file(url, session):
10
+ """Download a file and return its content."""
11
+ try:
12
+ response = session.get(url)
13
+ response.raise_for_status()
14
+ return response.content
15
+ except requests.exceptions.RequestException as e:
16
+ print(f"Error downloading {url}: {e}")
17
+ return None
18
+
19
+ def save_webpage_as_zip(url):
20
+ """Save a webpage and its assets as a ZIP file."""
21
+ session = requests.Session()
22
+ response = session.get(url)
23
+ response.raise_for_status()
24
+
25
+ soup = BeautifulSoup(response.content, 'html.parser')
26
+ temp_dir = 'temp_webpage'
27
+ if not os.path.exists(temp_dir):
28
+ os.makedirs(temp_dir)
29
+
30
+ main_html_path = os.path.join(temp_dir, 'index.html')
31
+ with open(main_html_path, 'wb') as f:
32
+ f.write(response.content)
33
+ assets = []
34
+ for tag in soup.find_all(['img', 'link', 'script']):
35
+ if tag.name == 'img' and tag.get('src'):
36
+ assets.append(tag['src'])
37
+ elif tag.name == 'link' and tag.get('href'):
38
+ assets.append(tag['href'])
39
+ elif tag.name == 'script' and tag.get('src'):
40
+ assets.append(tag['src'])
41
+
42
+ # Download and save all assets
43
+ for asset in assets:
44
+ asset_url = urljoin(url, asset)
45
+ asset_path = urlparse(asset_url).path.lstrip('/')
46
+ asset_full_path = os.path.join(temp_dir, asset_path)
47
+
48
+ if asset_path.endswith('/'):
49
+ print(f"Skipping directory {asset_full_path}")
50
+ continue
51
+
52
+
53
+ os.makedirs(os.path.dirname(asset_full_path), exist_ok=True)
54
+
55
+
56
+ content = download_file(asset_url, session)
57
+ if content:
58
+ if os.path.isdir(asset_full_path):
59
+ print(f"Skipping directory {asset_full_path}")
60
+ continue
61
+ with open(asset_full_path, 'wb') as f:
62
+ f.write(content)
63
+
64
+ zip_buffer = BytesIO()
65
+ with ZipFile(zip_buffer, 'w') as zipf:
66
+ for root, _, files in os.walk(temp_dir):
67
+ for file in files:
68
+ file_path = os.path.join(root, file)
69
+ zipf.write(file_path, os.path.relpath(file_path, temp_dir))
70
+
71
+ for root, _, files in os.walk(temp_dir, topdown=False):
72
+ for file in files:
73
+ os.remove(os.path.join(root, file))
74
+ os.rmdir(root)
75
+
76
+ zip_buffer.seek(0)
77
+ return zip_buffer
78
+
79
+ def generate_zip_file(url):
80
+ """Generate ZIP file from a webpage URL."""
81
+ zip_buffer = save_webpage_as_zip(url)
82
+ temp_zip_path = "webpage.zip"
83
+ with open(temp_zip_path, 'wb') as f:
84
+ f.write(zip_buffer.read())
85
+ return temp_zip_path
86
+
87
+ with gr.Blocks(theme="bethecloud/storj_theme") as demo:
88
+ gr.Markdown("## Webpage to ZIP Downloader 🔗")
89
+ gr.Markdown("Enter a URL to download the webpage and its assets as a ZIP file.")
90
+
91
+ url_input = gr.Textbox(label="Website URL", placeholder="Enter a URL (e.g., https://www.example.com)")
92
+
93
+ download_button = gr.Button("Download as ZIP")
94
+ output_file = gr.File(label="Download")
95
+
96
+ def set_example_url(url):
97
+ url_input.value = url
98
+
99
+ download_button.click(fn=generate_zip_file, inputs=url_input, outputs=output_file)
100
+
101
+ demo.launch()