|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import requests |
|
import tarfile |
|
import zipfile |
|
|
|
def download_and_extract(url, dest_path, rename_to=None): |
|
if not os.path.exists(dest_path): |
|
os.makedirs(dest_path) |
|
filename = url.split('/')[-1] |
|
filepath = os.path.join(dest_path, filename) |
|
|
|
try: |
|
with requests.get(url, stream=True) as r: |
|
r.raise_for_status() |
|
with open(filepath, 'wb') as f: |
|
for chunk in r.iter_content(chunk_size=8192): |
|
f.write(chunk) |
|
print(f"Downloaded {filename} successfully.") |
|
|
|
extracted_folder = None |
|
if filepath.endswith('.tar.gz') or filepath.endswith('.tgz'): |
|
with tarfile.open(filepath, 'r:gz') as tar: |
|
tar.extractall(path=dest_path) |
|
extracted_folder = tar.getnames()[0].split('/')[0] |
|
print(f"Extracted {filename} successfully.") |
|
elif filepath.endswith('.zip'): |
|
with zipfile.ZipFile(filepath, 'r') as zip_ref: |
|
zip_ref.extractall(dest_path) |
|
extracted_folder = zip_ref.namelist()[0].split('/')[0] |
|
print(f"Extracted {filename} successfully.") |
|
|
|
os.remove(filepath) |
|
print(f"Removed {filename} successfully.") |
|
|
|
if rename_to and extracted_folder: |
|
extracted_folder_path = os.path.join(dest_path, extracted_folder) |
|
new_folder = os.path.join(dest_path, rename_to) |
|
if os.path.exists(extracted_folder_path): |
|
os.rename(extracted_folder_path, new_folder) |
|
print(f"Renamed {extracted_folder_path} to {new_folder} successfully.") |
|
else: |
|
print(f"Extracted folder {extracted_folder_path} does not exist.") |
|
except Exception as e: |
|
print(f"An error occurred: {e}") |
|
|
|
datasets = { |
|
'image_classification': 'http://download.tensorflow.org/example_images/flower_photos.tgz', |
|
'human_activity_recognition': 'https://www.cis.fordham.edu/wisdm/includes/datasets/latest/WISDM_ar_latest.tar.gz', |
|
'hand_posture': 'https://raw.githubusercontent.com/STMicroelectronics/stm32ai-modelzoo-services/main/hand_posture/datasets/ST_VL53L8CX_handposture_dataset.zip', |
|
'audio_event_detection': 'https://github.com/karolpiczak/ESC-50/archive/master.zip' |
|
} |
|
|
|
for dataset, url in datasets.items(): |
|
print(f"Processing {dataset}...") |
|
if dataset == 'audio_event_detection': |
|
download_and_extract(url, f'/home/appuser/datasets/{dataset}', rename_to='ESC-50') |
|
else: |
|
download_and_extract(url, f'/home/appuser/datasets/{dataset}') |
|
print(f"Finished processing {dataset}.") |