import sys from pathlib import Path sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) import argparse import os from jean_zay.launch import JeanZayExperiment def parse_mode(): parser = argparse.ArgumentParser( description="Extract embeddings from a dataset using DINOv2" ) parser.add_argument( "--launch", action="store_true", help="Launch the experiment", ) parser.add_argument( "--number_of_splits", type=int, help="Number of splits to process", default=1, ) parser.add_argument( "--input_path", type=str, help="Path to the input dataset", ) parser.add_argument( "--output_path", type=str, help="Path to the output dataset", ) args = parser.parse_args() return args args = parse_mode() cmd_modifiers = [] exps = [] exp_name = f"preprocess_data" job_name = f"preprocess_data" jz_exp = JeanZayExperiment( exp_name, job_name, slurm_array_nb_jobs=args.number_of_splits, cmd_path="data/extract_embeddings/dino_v2.py", num_nodes=1, num_gpus_per_node=1, qos="t3", account="mya", gpu_type="h100", time="02:00:00", ) exps.append(jz_exp) trainer_modifiers = {} exp_modifier = { "--input_path": args.input_path, "--output_path": args.output_path, "--number_of_splits": args.number_of_splits, "--split_index": "${SLURM_ARRAY_TASK_ID}", } cmd_modifiers.append(dict(trainer_modifiers, **exp_modifier)) if __name__ == "__main__": for exp, cmd_modifier in zip(exps, cmd_modifiers): exp.build_cmd(cmd_modifier) if args.launch == True: exp.launch()