""" This code is to get a json of the images of smaller size. The record of all the images should be there in a dataframe. The dataframe either : 1. should have a column named `small_size` which contains the flag 1 if the size is less than 100*100 else 0. Or 2. should have a column named `dimensions` containing the size tuple. """ import json import os import pandas as pd import argparse def main(args): # load json with open(args.json_path, "r") as fp: data_from_json = json.load(fp) # load the df data_from_df = pd.read_csv(args.dataframe_path) # get the names of those files with size < 100*100 if args.column: col_to_check = args.column files_small_size = data_from_df.loc[data_from_df[col_to_check] == 1]["fpath"] else: assert "dimensions" in data_from_df.columns, "Either column should be given or a column named 'dimension' should be there in the df." # Get image names where any dimension is less than 100 files_small_size = data_from_df[data_from_df['dimensions'].apply(lambda x: any(dimension < 100 for dimension in x))]["fpath"] # creating a list of just the basenames of the files files_small_size_basename = [os.path.basename(filename) for filename in files_small_size] # got the names of the files with small sizes # to create a json for them now print("Creating json records with the images with small size.") data_small_imgs = [record for record in data_from_json if os.path.basename(record["image"]) in files_small_size_basename] print(f"Saving the json at {args.save_json}.") with open(args.save_json, "w") as fp: json.dump(data_small_imgs, fp, indent=4) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--json-path", type=str, help="Path of the json containing images data.") parser.add_argument("--dataframe-path", type=str, help="Path of the dataframe of all the images containing info about dimension.") parser.add_argument("--column", type=str, help="Name of the column if any, to consider instead of `dimensions` column. This column should contain flag 1 or 0 when the dimension is < 100*100 respectively.") parser.add_argument("--save-json", type=str, help="Path of the json to be saved with all the files with size < 100*100.") args = parser.parse_args() main(args)