data: format: zarr resolution: n320 frequency: 6h timestep: 6h forcing: - cos_latitude - cos_longitude - sin_latitude - sin_longitude - cos_julian_day - cos_local_time - sin_julian_day - sin_local_time - insolation - lsm - sdor - slor - z diagnostic: - tp - cp - sf - tcc - hcc - lcc - mcc - ro - ssrd - strd - 100u - 100v remapped: null normalizer: default: mean-std remap: cp: tp sf: tp std: - tp - cp - sf - ro - tcw - ssrd - q_50 - q_100 - q_150 - q_200 - q_250 - q_300 - q_400 - q_500 - q_600 - q_700 - q_850 - q_925 - q_1000 min-max: null max: - sdor - slor - z none: - cos_latitude - cos_longitude - sin_latitude - sin_longitude - cos_julian_day - cos_local_time - sin_julian_day - sin_local_time - insolation - lsm - tcc - mcc - hcc - lcc - swvl1 - swvl2 imputer: default: none minimum: - swvl1 - swvl2 - ro mean: - stl1 - stl2 remapper: default: none processors: imputer: _target_: anemoi.models.preprocessing.imputer.InputImputer _convert_: all config: ${data.imputer} normalizer: _target_: anemoi.models.preprocessing.normalizer.InputNormalizer config: ${data.normalizer} num_features: null dataloader: prefetch_factor: 2 pin_memory: true read_group_size: ${hardware.num_gpus_per_model} num_workers: training: 8 validation: 8 test: 1 predict: 1 batch_size: training: 1 validation: 1 test: 4 predict: 4 limit_batches: training: null validation: null test: 20 predict: 20 grid_indices: _target_: anemoi.training.data.grid_indices.FullGrid nodes_name: ${graph.data} dataset: ${hardware.paths.data}/${hardware.files.dataset} training: dataset: - dataset: ${hardware.paths.data}/${hardware.files.dataset} start: null end: 2022 frequency: ${data.frequency} start: null end: 2022 drop: [] validation: dataset: - dataset: ${hardware.paths.data}/${hardware.files.dataset} start: 2022 end: 2024 frequency: ${data.frequency} start: 2022 end: 2024 drop: [] test: dataset: - dataset: ${hardware.paths.data}/${hardware.files.dataset} start: 2022 end: null frequency: ${data.frequency} start: 2022 end: null drop: [] diagnostics: plot: asynchronous: true datashader: true frequency: batch: 750 epoch: 5 parameters: - z_500 - t_850 - u_850 - v_850 - 2t - 10u - 10v - sp - tp - cp sample_idx: 0 precip_and_related_fields: - tp - cp colormaps: default: _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormap name: viridis error: _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormap name: bwr precip: _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormapClevels clevels: - '#ffffff' - '#04e9e7' - '#019ff4' - '#0300f4' - '#02fd02' - '#01c501' - '#008e00' - '#fdf802' - '#e5bc00' - '#fd9500' - '#fd0000' - '#d40000' - '#bc0000' - '#f800fd' variables: ${diagnostics.plot.precip_and_related_fields} callbacks: [] callbacks: [] benchmark_profiler: memory: enabled: true steps: 5 warmup: 2 extra_plots: false trace_rank0_only: false time: enabled: true verbose: false speed: enabled: true system: enabled: true model_summary: enabled: true snapshot: enabled: true steps: 4 warmup: 0 debug: anomaly_detection: false profiler: false enable_checkpointing: true checkpoint: every_n_minutes: save_frequency: 30 num_models_saved: 3 every_n_epochs: save_frequency: 1 num_models_saved: -1 every_n_train_steps: save_frequency: null num_models_saved: 0 log: wandb: enabled: false offline: false log_model: false project: Anemoi entity: ??? gradients: false parameters: false tensorboard: enabled: false mlflow: enabled: false offline: false authentication: false log_model: false tracking_uri: ??? experiment_name: ??? project_name: ??? system: true terminal: true run_name: null on_resume_create_child: true expand_hyperparams: - config http_max_retries: 35 interval: 100 enable_progress_bar: true print_memory_summary: false hardware: paths: data: ${oc.decode:${oc.env:DATASETS_PATH}} output: ${oc.decode:${oc.env:OUTPUT_PATH}} logs: base: ${hardware.paths.output}logs/ wandb: ${hardware.paths.logs.base} mlflow: ${hardware.paths.logs.base}mlflow/ tensorboard: ${hardware.paths.logs.base}tensorboard/ checkpoints: ${hardware.paths.output}checkpoint/ plots: ${hardware.paths.output}plots/ profiler: ${hardware.paths.output}profiler/ graph: ${hardware.paths.output}graphs/ files: dataset: aifs-ea-an-oper-0001-mars-${data.resolution}-1979-2024-6h-v1-aifs-single-v1.zarr graph: graph_enc_proc_dec_${data.resolution}.pt truncation: null truncation_inv: null checkpoint: every_n_epochs: aifs-by_epoch-epoch_{epoch:03d}-val_wmse_{val_wmse:.3e} every_n_train_steps: aifs-by_step-epoch_{epoch:03d}-step_{step:06d} every_n_minutes: aifs-by_time-epoch_{epoch:03d}-step_{step:06d} warm_start: null accelerator: auto num_gpus_per_node: 4 num_nodes: 16 num_gpus_per_model: 4 graph: overwrite: true data: data hidden: hidden nodes: data: node_builder: _target_: anemoi.graphs.nodes.ZarrDatasetNodes dataset: ${dataloader.dataset} attributes: ${graph.attributes.nodes} hidden: node_builder: _target_: anemoi.graphs.nodes.ReducedGaussianGridNodes grid: o96 edges: - source_name: ${graph.data} target_name: ${graph.hidden} edge_builders: - _target_: anemoi.graphs.edges.CutOffEdges cutoff_factor: 0.6 source_mask_attr_name: null target_mask_attr_name: null attributes: ${graph.attributes.edges} - source_name: ${graph.hidden} target_name: ${graph.data} edge_builders: - _target_: anemoi.graphs.edges.KNNEdges num_nearest_neighbours: 3 source_mask_attr_name: null target_mask_attr_name: null attributes: ${graph.attributes.edges} attributes: nodes: area_weight: _target_: anemoi.graphs.nodes.attributes.SphericalAreaWeights norm: unit-max fill_value: 0 edges: edge_length: _target_: anemoi.graphs.edges.attributes.EdgeLength norm: unit-std edge_dirs: _target_: anemoi.graphs.edges.attributes.EdgeDirection norm: unit-std post_processors: [] model: activation: GELU num_channels: 1024 cpu_offload: false output_mask: null model: _target_: anemoi.models.models.encoder_processor_decoder.AnemoiModelEncProcDec layer_kernels: processor: LayerNorm: _target_: torch.nn.LayerNorm _partial_: true Linear: _target_: torch.nn.Linear _partial_: true QueryNorm: _target_: anemoi.models.layers.normalization.AutocastLayerNorm _partial_: true bias: false KeyNorm: _target_: anemoi.models.layers.normalization.AutocastLayerNorm _partial_: true bias: false encoder: LayerNorm: _target_: torch.nn.LayerNorm _partial_: true Linear: _target_: torch.nn.Linear _partial_: true decoder: LayerNorm: _target_: torch.nn.LayerNorm _partial_: true Linear: _target_: torch.nn.Linear _partial_: true processor: _target_: anemoi.models.layers.processor.TransformerProcessor activation: ${model.activation} num_layers: 16 num_chunks: 2 mlp_hidden_ratio: 4 num_heads: 16 window_size: 1120 dropout_p: 0.0 attention_implementation: flash_attention qk_norm: false softcap: 0.0 use_alibi_slopes: false cpu_offload: ${model.cpu_offload} encoder: _target_: anemoi.models.layers.mapper.GraphTransformerForwardMapper trainable_size: ${model.trainable_parameters.data2hidden} sub_graph_edge_attributes: ${model.attributes.edges} activation: ${model.activation} num_chunks: 1 mlp_hidden_ratio: 4 num_heads: 16 qk_norm: false cpu_offload: ${model.cpu_offload} decoder: _target_: anemoi.models.layers.mapper.GraphTransformerBackwardMapper trainable_size: ${model.trainable_parameters.hidden2data} sub_graph_edge_attributes: ${model.attributes.edges} activation: ${model.activation} num_chunks: 1 mlp_hidden_ratio: 4 num_heads: 16 initialise_data_extractor_zero: false qk_norm: false cpu_offload: ${model.cpu_offload} trainable_parameters: data: 8 hidden: 8 data2hidden: 8 hidden2data: 8 attributes: edges: - edge_length - edge_dirs nodes: [] bounding: - _target_: anemoi.models.layers.bounding.ReluBounding variables: - tp - ro - tcw - ssrd - ro - q_50 - q_100 - q_150 - q_200 - q_250 - q_300 - q_400 - q_500 - q_600 - q_700 - q_850 - q_925 - q_1000 - _target_: anemoi.models.layers.bounding.HardtanhBounding variables: - tcc - swvl1 - swvl2 min_val: 0 max_val: 1 - _target_: anemoi.models.layers.bounding.FractionBounding variables: - cp - sf min_val: 0 max_val: 1 total_var: tp - _target_: anemoi.models.layers.bounding.FractionBounding variables: - lcc - mcc - hcc min_val: 0 max_val: 1 total_var: tcc training: run_id: null fork_run_id: null transfer_learning: false load_weights_only: false deterministic: false precision: 16-mixed multistep_input: 2 accum_grad_batches: 1 num_sanity_val_steps: 6 gradient_clip: val: 32.0 algorithm: value swa: enabled: false lr: 0.0001 optimizer: zero: false kwargs: betas: - 0.9 - 0.95 model_task: anemoi.training.train.forecaster.GraphForecaster strategy: _target_: anemoi.training.distributed.strategy.DDPGroupStrategy num_gpus_per_model: ${hardware.num_gpus_per_model} read_group_size: ${dataloader.read_group_size} loss_gradient_scaling: false training_loss: _target_: anemoi.training.losses.mse.WeightedMSELoss scalars: - variable - loss_weights_mask ignore_nans: false validation_metrics: - _target_: anemoi.training.losses.mse.WeightedMSELoss scalars: [] ignore_nans: true scale_validation_metrics: scalars_to_apply: - variable metrics: - all rollout: start: 1 epoch_increment: 0 max: 1 max_epochs: null max_steps: 260000 lr: warmup: 1000 rate: 3.125e-05 iterations: 260000 min: 3.0e-07 variable_loss_scaling: default: 1 pl: q: 0.6 t: 6 u: 0.8 v: 0.5 w: 0.001 z: 12 sfc: sp: 10 10u: 0.5 10v: 0.5 100u: 0.1 100v: 0.1 2d: 0.5 tp: 0.025 cp: 0.0025 ro: 0.0025 sf: 0.025 tcc: 0.1 mcc: 0.1 lcc: 0.1 hcc: 0.1 swvl2: 2 swvl1: 1 stl2: 10 stl1: 1 ssrd: 0.05 strd: 0.1 metrics: - z_500 - t_850 - u_850 - v_850 pressure_level_scaler: _target_: anemoi.training.data.scaling.ReluPressureLevelScaler minimum: 0.2 slope: 0.001 node_loss_weights: _target_: anemoi.training.losses.nodeweights.GraphNodeAttribute target_nodes: ${graph.data} node_attribute: area_weight submodules_to_freeze: []