File size: 533 Bytes
9c88e2b
 
f8a2041
9c88e2b
 
 
f8a2041
9c88e2b
f8a2041
 
9c88e2b
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# import datasets
# import logging
import os
import json
# from tqdm import tqdm
# dataset_subs = os.listdir(PATH)

# print(dataset_subs)


# for ds in tqdm(dataset_subs):
#     try:
#         print(ds)
#         dataset = datasets.load_dataset("CarperAI/pile-v2-small-filtered",data_files=f"data/{ds}/data.json", split="train")
#         dataset.save_to_disk(f"cache_ds/{ds}")
#     except:
#         print(f"Error at {ds}")

ds_subsets = os.listdir("cache_ds")

with open("documentation.json","w") as f:
    json.dump(ds_subsets,f)