multimodal / open_flamingo /tools /make_soft_link_laion.py
Li
init
5282eae
raw
history blame
No virus
797 Bytes
import os
import shutil
import glob
import random
from pprint import pprint
DIR_COCO_VG = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw"
DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/"
OUT_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining/laion_synthetic_filtered_large/all"
if __name__ == "__main__":
os.makedirs(OUT_DIR, exist_ok=True)
tars = []
for i in range(10):
laion_part_tars = glob.glob(os.path.join(DIR, "laion_synthetic_filtered_large", f"part{i}", "*.tar"))
tars.extend(laion_part_tars)
print(len(tars))
pprint(tars[:20])
for i, tar in enumerate(tars):
dst = os.path.join(OUT_DIR, f"{str(i).zfill(6)}.tar")
# print(tar, dst)
os.symlink(tar, dst)