Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
""" | |
download_images | |
Script to retrieve images for the 2023 FathomNet out-of-sample challenge as part of FGVC 10. | |
Assumes COCO formated annotation file has been download from http://www.kaggle.com/competitions/fathomnet-out-of-sample-detection | |
""" | |
# Author: Eric Orenstein (eorenstein@mbari.org) | |
import os | |
import sys | |
import glob | |
import json | |
import requests | |
import logging | |
import argparse | |
import progressbar | |
import pandas as pd | |
from shutil import copyfileobj | |
def download_imgs(imgs, outdir=None): | |
""" | |
Download images to an output dir | |
:param imgs: list of urls | |
:param outdir: desired directory [default to working directory] | |
:return : | |
""" | |
# set the out directory to default if not specified | |
if not outdir: | |
outdir = os.path.join(os.getcwd(), 'images') | |
# make the directory if it does not exist | |
if not os.path.exists(outdir): | |
os.mkdir(outdir) | |
logging.info(f"Created directory {outdir}") | |
flag = 0 # keep track of how many image downloaded | |
for name, url in progressbar.progressbar(imgs): | |
file_name = os.path.join( | |
outdir, name | |
) | |
# only download if the image does not exist in the outdir | |
if not os.path.exists(file_name): | |
resp = requests.get(url, stream=True) | |
resp.raw.decode_content = True | |
with open(file_name, 'wb') as f: | |
copyfileobj(resp.raw, f) | |
flag += 1 | |
logging.info(f"Downloaded {flag} new images to {outdir}") | |
if __name__=="__main__": | |
parser = argparse.ArgumentParser(description="Download images from a COCO annotation file") | |
parser.add_argument('dataset', type=str, help='Path to json COCO annotation file') | |
parser.add_argument('--outpath', type=str, default=None, help='Path to desired output folder') | |
args = parser.parse_args() | |
logging.basicConfig(level=logging.INFO) | |
logging.info(f'opening {args.dataset}') | |
with open(args.dataset, 'r') as ff: | |
dataset = json.load(ff) | |
ims = pd.DataFrame(dataset['images']) | |
logging.info(f'retrieving {ims.shape[0]} images') | |
ims = zip(ims['file_name'].to_list(), ims['coco_url'].to_list()) | |
# download images | |
download_imgs(ims, outdir=args.outpath) | |