Spaces:
Sleeping
Sleeping
Update scrape_3gpp.py
Browse files- scrape_3gpp.py +8 -4
scrape_3gpp.py
CHANGED
@@ -76,7 +76,8 @@ def scrape(url, excel_file, folder_name, status_list, progress=gr.Progress()):
|
|
76 |
download_directory = folder_name
|
77 |
if not os.path.exists(download_directory):
|
78 |
os.makedirs(download_directory)
|
79 |
-
|
|
|
80 |
print(f'filenames: {status_filenames}')
|
81 |
if not filenames and not status_filenames:
|
82 |
print("No Excel file provided, or no valid URLs found in the file.")
|
@@ -91,8 +92,7 @@ def scrape(url, excel_file, folder_name, status_list, progress=gr.Progress()):
|
|
91 |
|
92 |
# Filtrer les liens se terminant par ".zip"
|
93 |
zip_links = [link['href'] for link in links if link['href'].endswith('.zip')]
|
94 |
-
|
95 |
-
pourcentss = 0.1
|
96 |
# Télécharger chaque fichier zip
|
97 |
for zip_link in zip_links:
|
98 |
if download_num%10 == 0:
|
@@ -121,7 +121,11 @@ def scrape(url, excel_file, folder_name, status_list, progress=gr.Progress()):
|
|
121 |
for file_url in status_filenames:
|
122 |
filename = os.path.basename(file_url)
|
123 |
save_path = os.path.join(download_directory, filename)
|
124 |
-
|
|
|
|
|
|
|
|
|
125 |
try:
|
126 |
with requests.get(file_url, stream=True) as r:
|
127 |
r.raise_for_status()
|
|
|
76 |
download_directory = folder_name
|
77 |
if not os.path.exists(download_directory):
|
78 |
os.makedirs(download_directory)
|
79 |
+
download_num = 0
|
80 |
+
pourcentss = 0.1
|
81 |
print(f'filenames: {status_filenames}')
|
82 |
if not filenames and not status_filenames:
|
83 |
print("No Excel file provided, or no valid URLs found in the file.")
|
|
|
92 |
|
93 |
# Filtrer les liens se terminant par ".zip"
|
94 |
zip_links = [link['href'] for link in links if link['href'].endswith('.zip')]
|
95 |
+
|
|
|
96 |
# Télécharger chaque fichier zip
|
97 |
for zip_link in zip_links:
|
98 |
if download_num%10 == 0:
|
|
|
121 |
for file_url in status_filenames:
|
122 |
filename = os.path.basename(file_url)
|
123 |
save_path = os.path.join(download_directory, filename)
|
124 |
+
if download_num%10 == 0:
|
125 |
+
pourcentss = pourcentss + download_num/500
|
126 |
+
progress(pourcentss,desc='Telechargement')
|
127 |
+
download_num = 0
|
128 |
+
download_num+=1
|
129 |
try:
|
130 |
with requests.get(file_url, stream=True) as r:
|
131 |
r.raise_for_status()
|