|
### Best practice to generate data list |
|
User can use monai to generate the 5-fold data lists. Full exampls can be found in VISTA3D open source [codebase](https://github.com/Project-MONAI/VISTA/blob/main/vista3d/data/make_datalists.py) |
|
```python |
|
from monai.data.utils import partition_dataset |
|
from monai.bundle import ConfigParser |
|
base_url = "/path_to_your_folder/" |
|
json_name = "./your_5_folds.json" |
|
# create matching image and label lists. |
|
# The code to generate the lists is based on your local data structure. |
|
# You can use glob.glob("**.nii.gz") e.t.c. |
|
image_list = ['images/1.nii.gz', 'images/2.nii.gz', ...] |
|
label_list = ['labels/1.nii.gz', 'labels/2.nii.gz', ...] |
|
items = [{"image": img, "label": lab} for img, lab in zip(image_list, label_list)] |
|
# 80% for training 20% for testing. |
|
train_test = partition_dataset(items, ratios=[0.8, 0.2], shuffle=True, seed=0) |
|
print(f"training: {len(train_test[0])}, testing: {len(train_test[1])}") |
|
# num_partitions-fold split for the training set. |
|
train_val = partition_dataset(train_test[0], num_partitions=5, shuffle=True, seed=0) |
|
print(f"training validation folds sizes: {[len(x) for x in train_val]}") |
|
# add the fold index to each training data. |
|
training = [] |
|
for f, x in enumerate(train_val): |
|
for item in x: |
|
item["fold"] = f |
|
training.append(item) |
|
# save json file |
|
parser = ConfigParser({}) |
|
parser["training"] = training |
|
parser["testing"] = train_test[1] |
|
print(f"writing {json_name}\n\n") |
|
if os.path.exists(json_name): |
|
logger.warning(f"rewrite existing datalist file: {json_name}") |
|
ConfigParser.export_config_file(parser.config, json_name, indent=4) |
|
``` |
|
|