MLR-Copilot / benchmarks /fathomnet /env /download_images.py
Lim0011's picture
Upload 251 files
85e3d20 verified
raw
history blame
2.28 kB
# -*- coding: utf-8 -*-
"""
download_images
Script to retrieve images for the 2023 FathomNet out-of-sample challenge as part of FGVC 10.
Assumes COCO formated annotation file has been download from http://www.kaggle.com/competitions/fathomnet-out-of-sample-detection
"""
# Author: Eric Orenstein (eorenstein@mbari.org)
import os
import sys
import glob
import json
import requests
import logging
import argparse
import progressbar
import pandas as pd
from shutil import copyfileobj
def download_imgs(imgs, outdir=None):
"""
Download images to an output dir
:param imgs: list of urls
:param outdir: desired directory [default to working directory]
:return :
"""
# set the out directory to default if not specified
if not outdir:
outdir = os.path.join(os.getcwd(), 'images')
# make the directory if it does not exist
if not os.path.exists(outdir):
os.mkdir(outdir)
logging.info(f"Created directory {outdir}")
flag = 0 # keep track of how many image downloaded
for name, url in progressbar.progressbar(imgs):
file_name = os.path.join(
outdir, name
)
# only download if the image does not exist in the outdir
if not os.path.exists(file_name):
resp = requests.get(url, stream=True)
resp.raw.decode_content = True
with open(file_name, 'wb') as f:
copyfileobj(resp.raw, f)
flag += 1
logging.info(f"Downloaded {flag} new images to {outdir}")
if __name__=="__main__":
parser = argparse.ArgumentParser(description="Download images from a COCO annotation file")
parser.add_argument('dataset', type=str, help='Path to json COCO annotation file')
parser.add_argument('--outpath', type=str, default=None, help='Path to desired output folder')
args = parser.parse_args()
logging.basicConfig(level=logging.INFO)
logging.info(f'opening {args.dataset}')
with open(args.dataset, 'r') as ff:
dataset = json.load(ff)
ims = pd.DataFrame(dataset['images'])
logging.info(f'retrieving {ims.shape[0]} images')
ims = zip(ims['file_name'].to_list(), ims['coco_url'].to_list())
# download images
download_imgs(ims, outdir=args.outpath)