| data: | |
| format: zarr | |
| resolution: n320 | |
| frequency: 6h | |
| timestep: 6h | |
| forcing: | |
| - cos_latitude | |
| - cos_longitude | |
| - sin_latitude | |
| - sin_longitude | |
| - cos_julian_day | |
| - cos_local_time | |
| - sin_julian_day | |
| - sin_local_time | |
| - insolation | |
| - lsm | |
| - sdor | |
| - slor | |
| - z | |
| diagnostic: | |
| - tp | |
| - cp | |
| - sf | |
| - tcc | |
| - hcc | |
| - lcc | |
| - mcc | |
| - ro | |
| - ssrd | |
| - strd | |
| - 100u | |
| - 100v | |
| remapped: null | |
| normalizer: | |
| default: mean-std | |
| remap: | |
| cp: tp | |
| sf: tp | |
| std: | |
| - tp | |
| - cp | |
| - sf | |
| - ro | |
| - tcw | |
| - ssrd | |
| - q_50 | |
| - q_100 | |
| - q_150 | |
| - q_200 | |
| - q_250 | |
| - q_300 | |
| - q_400 | |
| - q_500 | |
| - q_600 | |
| - q_700 | |
| - q_850 | |
| - q_925 | |
| - q_1000 | |
| min-max: null | |
| max: | |
| - sdor | |
| - slor | |
| - z | |
| none: | |
| - cos_latitude | |
| - cos_longitude | |
| - sin_latitude | |
| - sin_longitude | |
| - cos_julian_day | |
| - cos_local_time | |
| - sin_julian_day | |
| - sin_local_time | |
| - insolation | |
| - lsm | |
| - tcc | |
| - mcc | |
| - hcc | |
| - lcc | |
| - swvl1 | |
| - swvl2 | |
| imputer: | |
| default: none | |
| minimum: | |
| - swvl1 | |
| - swvl2 | |
| - ro | |
| mean: | |
| - stl1 | |
| - stl2 | |
| remapper: | |
| default: none | |
| processors: | |
| imputer: | |
| _target_: anemoi.models.preprocessing.imputer.InputImputer | |
| _convert_: all | |
| config: ${data.imputer} | |
| normalizer: | |
| _target_: anemoi.models.preprocessing.normalizer.InputNormalizer | |
| config: ${data.normalizer} | |
| num_features: null | |
| dataloader: | |
| prefetch_factor: 2 | |
| pin_memory: true | |
| read_group_size: ${hardware.num_gpus_per_model} | |
| num_workers: | |
| training: 8 | |
| validation: 8 | |
| test: 1 | |
| predict: 1 | |
| batch_size: | |
| training: 1 | |
| validation: 1 | |
| test: 4 | |
| predict: 4 | |
| limit_batches: | |
| training: null | |
| validation: null | |
| test: 20 | |
| predict: 20 | |
| grid_indices: | |
| _target_: anemoi.training.data.grid_indices.FullGrid | |
| nodes_name: ${graph.data} | |
| dataset: ${hardware.paths.data}/${hardware.files.dataset} | |
| training: | |
| dataset: | |
| - dataset: ${hardware.paths.data}/${hardware.files.dataset} | |
| start: null | |
| end: 2022 | |
| frequency: ${data.frequency} | |
| start: null | |
| end: 2022 | |
| drop: [] | |
| validation: | |
| dataset: | |
| - dataset: ${hardware.paths.data}/${hardware.files.dataset} | |
| start: 2022 | |
| end: 2024 | |
| frequency: ${data.frequency} | |
| start: 2022 | |
| end: 2024 | |
| drop: [] | |
| test: | |
| dataset: | |
| - dataset: ${hardware.paths.data}/${hardware.files.dataset} | |
| start: 2022 | |
| end: null | |
| frequency: ${data.frequency} | |
| start: 2022 | |
| end: null | |
| drop: [] | |
| diagnostics: | |
| plot: | |
| asynchronous: true | |
| datashader: true | |
| frequency: | |
| batch: 750 | |
| epoch: 5 | |
| parameters: | |
| - z_500 | |
| - t_850 | |
| - u_850 | |
| - v_850 | |
| - 2t | |
| - 10u | |
| - 10v | |
| - sp | |
| - tp | |
| - cp | |
| sample_idx: 0 | |
| precip_and_related_fields: | |
| - tp | |
| - cp | |
| colormaps: | |
| default: | |
| _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormap | |
| name: viridis | |
| error: | |
| _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormap | |
| name: bwr | |
| precip: | |
| _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormapClevels | |
| clevels: | |
| - '#ffffff' | |
| - '#04e9e7' | |
| - '#019ff4' | |
| - '#0300f4' | |
| - '#02fd02' | |
| - '#01c501' | |
| - '#008e00' | |
| - '#fdf802' | |
| - '#e5bc00' | |
| - '#fd9500' | |
| - '#fd0000' | |
| - '#d40000' | |
| - '#bc0000' | |
| - '#f800fd' | |
| variables: ${diagnostics.plot.precip_and_related_fields} | |
| callbacks: [] | |
| callbacks: [] | |
| benchmark_profiler: | |
| memory: | |
| enabled: true | |
| steps: 5 | |
| warmup: 2 | |
| extra_plots: false | |
| trace_rank0_only: false | |
| time: | |
| enabled: true | |
| verbose: false | |
| speed: | |
| enabled: true | |
| system: | |
| enabled: true | |
| model_summary: | |
| enabled: true | |
| snapshot: | |
| enabled: true | |
| steps: 4 | |
| warmup: 0 | |
| debug: | |
| anomaly_detection: false | |
| profiler: false | |
| enable_checkpointing: true | |
| checkpoint: | |
| every_n_minutes: | |
| save_frequency: 30 | |
| num_models_saved: 3 | |
| every_n_epochs: | |
| save_frequency: 1 | |
| num_models_saved: -1 | |
| every_n_train_steps: | |
| save_frequency: null | |
| num_models_saved: 0 | |
| log: | |
| wandb: | |
| enabled: false | |
| offline: false | |
| log_model: false | |
| project: Anemoi | |
| entity: ??? | |
| gradients: false | |
| parameters: false | |
| tensorboard: | |
| enabled: false | |
| mlflow: | |
| enabled: false | |
| offline: false | |
| authentication: false | |
| log_model: false | |
| tracking_uri: ??? | |
| experiment_name: ??? | |
| project_name: ??? | |
| system: true | |
| terminal: true | |
| run_name: null | |
| on_resume_create_child: true | |
| expand_hyperparams: | |
| - config | |
| http_max_retries: 35 | |
| interval: 100 | |
| enable_progress_bar: true | |
| print_memory_summary: false | |
| hardware: | |
| paths: | |
| data: ${oc.decode:${oc.env:DATASETS_PATH}} | |
| output: ${oc.decode:${oc.env:OUTPUT_PATH}} | |
| logs: | |
| base: ${hardware.paths.output}logs/ | |
| wandb: ${hardware.paths.logs.base} | |
| mlflow: ${hardware.paths.logs.base}mlflow/ | |
| tensorboard: ${hardware.paths.logs.base}tensorboard/ | |
| checkpoints: ${hardware.paths.output}checkpoint/ | |
| plots: ${hardware.paths.output}plots/ | |
| profiler: ${hardware.paths.output}profiler/ | |
| graph: ${hardware.paths.output}graphs/ | |
| files: | |
| dataset: aifs-ea-an-oper-0001-mars-${data.resolution}-1979-2024-6h-v1-aifs-single-v1.zarr | |
| graph: graph_enc_proc_dec_${data.resolution}.pt | |
| truncation: null | |
| truncation_inv: null | |
| checkpoint: | |
| every_n_epochs: aifs-by_epoch-epoch_{epoch:03d}-val_wmse_{val_wmse:.3e} | |
| every_n_train_steps: aifs-by_step-epoch_{epoch:03d}-step_{step:06d} | |
| every_n_minutes: aifs-by_time-epoch_{epoch:03d}-step_{step:06d} | |
| warm_start: null | |
| accelerator: auto | |
| num_gpus_per_node: 4 | |
| num_nodes: 16 | |
| num_gpus_per_model: 4 | |
| graph: | |
| overwrite: true | |
| data: data | |
| hidden: hidden | |
| nodes: | |
| data: | |
| node_builder: | |
| _target_: anemoi.graphs.nodes.ZarrDatasetNodes | |
| dataset: ${dataloader.dataset} | |
| attributes: ${graph.attributes.nodes} | |
| hidden: | |
| node_builder: | |
| _target_: anemoi.graphs.nodes.ReducedGaussianGridNodes | |
| grid: o96 | |
| edges: | |
| - source_name: ${graph.data} | |
| target_name: ${graph.hidden} | |
| edge_builders: | |
| - _target_: anemoi.graphs.edges.CutOffEdges | |
| cutoff_factor: 0.6 | |
| source_mask_attr_name: null | |
| target_mask_attr_name: null | |
| attributes: ${graph.attributes.edges} | |
| - source_name: ${graph.hidden} | |
| target_name: ${graph.data} | |
| edge_builders: | |
| - _target_: anemoi.graphs.edges.KNNEdges | |
| num_nearest_neighbours: 3 | |
| source_mask_attr_name: null | |
| target_mask_attr_name: null | |
| attributes: ${graph.attributes.edges} | |
| attributes: | |
| nodes: | |
| area_weight: | |
| _target_: anemoi.graphs.nodes.attributes.SphericalAreaWeights | |
| norm: unit-max | |
| fill_value: 0 | |
| edges: | |
| edge_length: | |
| _target_: anemoi.graphs.edges.attributes.EdgeLength | |
| norm: unit-std | |
| edge_dirs: | |
| _target_: anemoi.graphs.edges.attributes.EdgeDirection | |
| norm: unit-std | |
| post_processors: [] | |
| model: | |
| activation: GELU | |
| num_channels: 1024 | |
| cpu_offload: false | |
| output_mask: null | |
| model: | |
| _target_: anemoi.models.models.encoder_processor_decoder.AnemoiModelEncProcDec | |
| layer_kernels: | |
| processor: | |
| LayerNorm: | |
| _target_: torch.nn.LayerNorm | |
| _partial_: true | |
| Linear: | |
| _target_: torch.nn.Linear | |
| _partial_: true | |
| QueryNorm: | |
| _target_: anemoi.models.layers.normalization.AutocastLayerNorm | |
| _partial_: true | |
| bias: false | |
| KeyNorm: | |
| _target_: anemoi.models.layers.normalization.AutocastLayerNorm | |
| _partial_: true | |
| bias: false | |
| encoder: | |
| LayerNorm: | |
| _target_: torch.nn.LayerNorm | |
| _partial_: true | |
| Linear: | |
| _target_: torch.nn.Linear | |
| _partial_: true | |
| decoder: | |
| LayerNorm: | |
| _target_: torch.nn.LayerNorm | |
| _partial_: true | |
| Linear: | |
| _target_: torch.nn.Linear | |
| _partial_: true | |
| processor: | |
| _target_: anemoi.models.layers.processor.TransformerProcessor | |
| activation: ${model.activation} | |
| num_layers: 16 | |
| num_chunks: 2 | |
| mlp_hidden_ratio: 4 | |
| num_heads: 16 | |
| window_size: 1120 | |
| dropout_p: 0.0 | |
| attention_implementation: flash_attention | |
| qk_norm: false | |
| softcap: 0.0 | |
| use_alibi_slopes: false | |
| cpu_offload: ${model.cpu_offload} | |
| encoder: | |
| _target_: anemoi.models.layers.mapper.GraphTransformerForwardMapper | |
| trainable_size: ${model.trainable_parameters.data2hidden} | |
| sub_graph_edge_attributes: ${model.attributes.edges} | |
| activation: ${model.activation} | |
| num_chunks: 1 | |
| mlp_hidden_ratio: 4 | |
| num_heads: 16 | |
| qk_norm: false | |
| cpu_offload: ${model.cpu_offload} | |
| decoder: | |
| _target_: anemoi.models.layers.mapper.GraphTransformerBackwardMapper | |
| trainable_size: ${model.trainable_parameters.hidden2data} | |
| sub_graph_edge_attributes: ${model.attributes.edges} | |
| activation: ${model.activation} | |
| num_chunks: 1 | |
| mlp_hidden_ratio: 4 | |
| num_heads: 16 | |
| initialise_data_extractor_zero: false | |
| qk_norm: false | |
| cpu_offload: ${model.cpu_offload} | |
| trainable_parameters: | |
| data: 8 | |
| hidden: 8 | |
| data2hidden: 8 | |
| hidden2data: 8 | |
| attributes: | |
| edges: | |
| - edge_length | |
| - edge_dirs | |
| nodes: [] | |
| bounding: | |
| - _target_: anemoi.models.layers.bounding.ReluBounding | |
| variables: | |
| - tp | |
| - ro | |
| - tcw | |
| - ssrd | |
| - ro | |
| - q_50 | |
| - q_100 | |
| - q_150 | |
| - q_200 | |
| - q_250 | |
| - q_300 | |
| - q_400 | |
| - q_500 | |
| - q_600 | |
| - q_700 | |
| - q_850 | |
| - q_925 | |
| - q_1000 | |
| - _target_: anemoi.models.layers.bounding.HardtanhBounding | |
| variables: | |
| - tcc | |
| - swvl1 | |
| - swvl2 | |
| min_val: 0 | |
| max_val: 1 | |
| - _target_: anemoi.models.layers.bounding.FractionBounding | |
| variables: | |
| - cp | |
| - sf | |
| min_val: 0 | |
| max_val: 1 | |
| total_var: tp | |
| - _target_: anemoi.models.layers.bounding.FractionBounding | |
| variables: | |
| - lcc | |
| - mcc | |
| - hcc | |
| min_val: 0 | |
| max_val: 1 | |
| total_var: tcc | |
| training: | |
| run_id: null | |
| fork_run_id: null | |
| transfer_learning: false | |
| load_weights_only: false | |
| deterministic: false | |
| precision: 16-mixed | |
| multistep_input: 2 | |
| accum_grad_batches: 1 | |
| num_sanity_val_steps: 6 | |
| gradient_clip: | |
| val: 32.0 | |
| algorithm: value | |
| swa: | |
| enabled: false | |
| lr: 0.0001 | |
| optimizer: | |
| zero: false | |
| kwargs: | |
| betas: | |
| - 0.9 | |
| - 0.95 | |
| model_task: anemoi.training.train.forecaster.GraphForecaster | |
| strategy: | |
| _target_: anemoi.training.distributed.strategy.DDPGroupStrategy | |
| num_gpus_per_model: ${hardware.num_gpus_per_model} | |
| read_group_size: ${dataloader.read_group_size} | |
| loss_gradient_scaling: false | |
| training_loss: | |
| _target_: anemoi.training.losses.mse.WeightedMSELoss | |
| scalars: | |
| - variable | |
| - loss_weights_mask | |
| ignore_nans: false | |
| validation_metrics: | |
| - _target_: anemoi.training.losses.mse.WeightedMSELoss | |
| scalars: [] | |
| ignore_nans: true | |
| scale_validation_metrics: | |
| scalars_to_apply: | |
| - variable | |
| metrics: | |
| - all | |
| rollout: | |
| start: 1 | |
| epoch_increment: 0 | |
| max: 1 | |
| max_epochs: null | |
| max_steps: 260000 | |
| lr: | |
| warmup: 1000 | |
| rate: 3.125e-05 | |
| iterations: 260000 | |
| min: 3.0e-07 | |
| variable_loss_scaling: | |
| default: 1 | |
| pl: | |
| q: 0.6 | |
| t: 6 | |
| u: 0.8 | |
| v: 0.5 | |
| w: 0.001 | |
| z: 12 | |
| sfc: | |
| sp: 10 | |
| 10u: 0.5 | |
| 10v: 0.5 | |
| 100u: 0.1 | |
| 100v: 0.1 | |
| 2d: 0.5 | |
| tp: 0.025 | |
| cp: 0.0025 | |
| ro: 0.0025 | |
| sf: 0.025 | |
| tcc: 0.1 | |
| mcc: 0.1 | |
| lcc: 0.1 | |
| hcc: 0.1 | |
| swvl2: 2 | |
| swvl1: 1 | |
| stl2: 10 | |
| stl1: 1 | |
| ssrd: 0.05 | |
| strd: 0.1 | |
| metrics: | |
| - z_500 | |
| - t_850 | |
| - u_850 | |
| - v_850 | |
| pressure_level_scaler: | |
| _target_: anemoi.training.data.scaling.ReluPressureLevelScaler | |
| minimum: 0.2 | |
| slope: 0.001 | |
| node_loss_weights: | |
| _target_: anemoi.training.losses.nodeweights.GraphNodeAttribute | |
| target_nodes: ${graph.data} | |
| node_attribute: area_weight | |
| submodules_to_freeze: [] | |