enhance prepare datalist file
Browse files- configs/metadata.json +2 -1
- scripts/prepare_datalist.py +7 -7
configs/metadata.json
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
{
|
2 |
"schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20220324.json",
|
3 |
-
"version": "0.4.
|
4 |
"changelog": {
|
|
|
5 |
"0.4.6": "add dataset dir example",
|
6 |
"0.4.5": "update ONNX-TensorRT descriptions",
|
7 |
"0.4.4": "update error links",
|
|
|
1 |
{
|
2 |
"schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20220324.json",
|
3 |
+
"version": "0.4.7",
|
4 |
"changelog": {
|
5 |
+
"0.4.7": "enhance prepare datalist file",
|
6 |
"0.4.6": "add dataset dir example",
|
7 |
"0.4.5": "update ONNX-TensorRT descriptions",
|
8 |
"0.4.4": "update error links",
|
scripts/prepare_datalist.py
CHANGED
@@ -26,18 +26,18 @@ def produce_sample_dict(line: str):
|
|
26 |
return {"label": seg[0], "image": t1ce + t1 + t2 + flair}
|
27 |
|
28 |
|
29 |
-
def produce_datalist(dataset_dir: str):
|
30 |
"""
|
31 |
This function is used to split the dataset.
|
32 |
-
It will produce
|
33 |
-
into val and test sets.
|
34 |
"""
|
35 |
|
36 |
samples = sorted(glob.glob(os.path.join(dataset_dir, "*", "*"), recursive=True))
|
37 |
datalist = []
|
38 |
for line in samples:
|
39 |
datalist.append(produce_sample_dict(line))
|
40 |
-
train_list, other_list = train_test_split(datalist, train_size=
|
41 |
val_list, test_list = train_test_split(other_list, train_size=0.5)
|
42 |
|
43 |
return {"training": train_list, "validation": val_list, "testing": test_list}
|
@@ -48,11 +48,10 @@ def main(args):
|
|
48 |
split the dataset and output the data list into a json file.
|
49 |
"""
|
50 |
data_file_base_dir = os.path.join(os.path.abspath(args.path), "training")
|
51 |
-
output_json = args.output
|
52 |
# produce deterministic data splits
|
53 |
monai.utils.set_determinism(seed=123)
|
54 |
-
datalist = produce_datalist(dataset_dir=data_file_base_dir)
|
55 |
-
with open(
|
56 |
json.dump(datalist, f)
|
57 |
|
58 |
|
@@ -67,6 +66,7 @@ if __name__ == "__main__":
|
|
67 |
parser.add_argument(
|
68 |
"--output", type=str, default="configs/datalist.json", help="relative path of output datalist json file."
|
69 |
)
|
|
|
70 |
args = parser.parse_args()
|
71 |
|
72 |
main(args)
|
|
|
26 |
return {"label": seg[0], "image": t1ce + t1 + t2 + flair}
|
27 |
|
28 |
|
29 |
+
def produce_datalist(dataset_dir: str, train_size: int = 200):
|
30 |
"""
|
31 |
This function is used to split the dataset.
|
32 |
+
It will produce "train_size" number of samples for training, and the other samples
|
33 |
+
are divided equally into val and test sets.
|
34 |
"""
|
35 |
|
36 |
samples = sorted(glob.glob(os.path.join(dataset_dir, "*", "*"), recursive=True))
|
37 |
datalist = []
|
38 |
for line in samples:
|
39 |
datalist.append(produce_sample_dict(line))
|
40 |
+
train_list, other_list = train_test_split(datalist, train_size=train_size)
|
41 |
val_list, test_list = train_test_split(other_list, train_size=0.5)
|
42 |
|
43 |
return {"training": train_list, "validation": val_list, "testing": test_list}
|
|
|
48 |
split the dataset and output the data list into a json file.
|
49 |
"""
|
50 |
data_file_base_dir = os.path.join(os.path.abspath(args.path), "training")
|
|
|
51 |
# produce deterministic data splits
|
52 |
monai.utils.set_determinism(seed=123)
|
53 |
+
datalist = produce_datalist(dataset_dir=data_file_base_dir, train_size=args.train_size)
|
54 |
+
with open(args.output, "w") as f:
|
55 |
json.dump(datalist, f)
|
56 |
|
57 |
|
|
|
66 |
parser.add_argument(
|
67 |
"--output", type=str, default="configs/datalist.json", help="relative path of output datalist json file."
|
68 |
)
|
69 |
+
parser.add_argument("--train_size", type=int, default=200, help="number of training samples.")
|
70 |
args = parser.parse_args()
|
71 |
|
72 |
main(args)
|