monai
medical
katielink commited on
Commit
9feef16
1 Parent(s): 0e72621

enhance prepare datalist file

Browse files
configs/metadata.json CHANGED
@@ -1,7 +1,8 @@
1
  {
2
  "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20220324.json",
3
- "version": "0.4.6",
4
  "changelog": {
 
5
  "0.4.6": "add dataset dir example",
6
  "0.4.5": "update ONNX-TensorRT descriptions",
7
  "0.4.4": "update error links",
 
1
  {
2
  "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20220324.json",
3
+ "version": "0.4.7",
4
  "changelog": {
5
+ "0.4.7": "enhance prepare datalist file",
6
  "0.4.6": "add dataset dir example",
7
  "0.4.5": "update ONNX-TensorRT descriptions",
8
  "0.4.4": "update error links",
scripts/prepare_datalist.py CHANGED
@@ -26,18 +26,18 @@ def produce_sample_dict(line: str):
26
  return {"label": seg[0], "image": t1ce + t1 + t2 + flair}
27
 
28
 
29
- def produce_datalist(dataset_dir: str):
30
  """
31
  This function is used to split the dataset.
32
- It will produce 200 samples for training, and the other samples are divided equally
33
- into val and test sets.
34
  """
35
 
36
  samples = sorted(glob.glob(os.path.join(dataset_dir, "*", "*"), recursive=True))
37
  datalist = []
38
  for line in samples:
39
  datalist.append(produce_sample_dict(line))
40
- train_list, other_list = train_test_split(datalist, train_size=200)
41
  val_list, test_list = train_test_split(other_list, train_size=0.5)
42
 
43
  return {"training": train_list, "validation": val_list, "testing": test_list}
@@ -48,11 +48,10 @@ def main(args):
48
  split the dataset and output the data list into a json file.
49
  """
50
  data_file_base_dir = os.path.join(os.path.abspath(args.path), "training")
51
- output_json = args.output
52
  # produce deterministic data splits
53
  monai.utils.set_determinism(seed=123)
54
- datalist = produce_datalist(dataset_dir=data_file_base_dir)
55
- with open(output_json, "w") as f:
56
  json.dump(datalist, f)
57
 
58
 
@@ -67,6 +66,7 @@ if __name__ == "__main__":
67
  parser.add_argument(
68
  "--output", type=str, default="configs/datalist.json", help="relative path of output datalist json file."
69
  )
 
70
  args = parser.parse_args()
71
 
72
  main(args)
 
26
  return {"label": seg[0], "image": t1ce + t1 + t2 + flair}
27
 
28
 
29
+ def produce_datalist(dataset_dir: str, train_size: int = 200):
30
  """
31
  This function is used to split the dataset.
32
+ It will produce "train_size" number of samples for training, and the other samples
33
+ are divided equally into val and test sets.
34
  """
35
 
36
  samples = sorted(glob.glob(os.path.join(dataset_dir, "*", "*"), recursive=True))
37
  datalist = []
38
  for line in samples:
39
  datalist.append(produce_sample_dict(line))
40
+ train_list, other_list = train_test_split(datalist, train_size=train_size)
41
  val_list, test_list = train_test_split(other_list, train_size=0.5)
42
 
43
  return {"training": train_list, "validation": val_list, "testing": test_list}
 
48
  split the dataset and output the data list into a json file.
49
  """
50
  data_file_base_dir = os.path.join(os.path.abspath(args.path), "training")
 
51
  # produce deterministic data splits
52
  monai.utils.set_determinism(seed=123)
53
+ datalist = produce_datalist(dataset_dir=data_file_base_dir, train_size=args.train_size)
54
+ with open(args.output, "w") as f:
55
  json.dump(datalist, f)
56
 
57
 
 
66
  parser.add_argument(
67
  "--output", type=str, default="configs/datalist.json", help="relative path of output datalist json file."
68
  )
69
+ parser.add_argument("--train_size", type=int, default=200, help="number of training samples.")
70
  args = parser.parse_args()
71
 
72
  main(args)