ecmwf
/

aifs-single-1.1

Graph Machine Learning

AnemoI

English

Model card Files Files and versions

xet

Community

anaprietonem commited on about 1 month ago

Commit

598f35c

verified ·

1 Parent(s): 7976552

Upload 2 files

Browse files

Files changed (2) hide show

config_finetuning.yaml +350 -277
config_pretraining.yaml +369 -304

config_finetuning.yaml CHANGED Viewed

@@ -4,8 +4,66 @@ data:
   frequency: 6h
   timestep: 6h
   forcing:
     - cos_latitude
-    - cos_longitude
     - sin_latitude
     - sin_longitude
     - cos_julian_day
@@ -14,136 +72,41 @@ data:
     - sin_local_time
     - insolation
     - lsm
-    - sdor
-    - slor
-    - z
-  diagnostic:
-    - tp
-    - cp
-    - sf
     - tcc
     - hcc
     - lcc
-    - mcc
-    - ro
-    - ssrd
-    - strd
-    - 100u
-    - 100v
-  remapped: null
-  normalizer:
-    default: mean-std
-    remap:
-      cp: tp
-      sf: tp
-    std:
-      - tp
-      - cp
-      - sf
-      - ro
-      - tcw
-      - ssrd
-      - q_50
-      - q_100
-      - q_150
-      - q_200
-      - q_250
-      - q_300
-      - q_400
-      - q_500
-      - q_600
-      - q_700
-      - q_850
-      - q_925
-      - q_1000
-    min-max: null
-    max:
-      - sdor
-      - slor
-      - z
-    none:
-      - cos_latitude
-      - cos_longitude
-      - sin_latitude
-      - sin_longitude
-      - cos_julian_day
-      - cos_local_time
-      - sin_julian_day
-      - sin_local_time
-      - insolation
-      - lsm
-      - tcc
-      - mcc
-      - hcc
-      - lcc
-      - swvl1
-      - swvl2
   imputer:
     default: none
   remapper:
     default: none
   processors:
     normalizer:
       _target_: anemoi.models.preprocessing.normalizer.InputNormalizer
-      _convert_: all
-      config:
-        default: mean-std
-        remap:
-          cp: tp
-          sf: tp
-        std:
-          - tp
-          - cp
-          - sf
-          - ro
-          - tcw
-          - ssrd
-          - q_50
-          - q_100
-          - q_150
-          - q_200
-          - q_250
-          - q_300
-          - q_400
-          - q_500
-          - q_600
-          - q_700
-          - q_850
-          - q_925
-          - q_1000
-        min-max: null
-        max:
-          - sdor
-          - slor
-          - z
-        none:
-          - cos_latitude
-          - cos_longitude
-          - sin_latitude
-          - sin_longitude
-          - cos_julian_day
-          - cos_local_time
-          - sin_julian_day
-          - sin_local_time
-          - insolation
-          - lsm
-          - tcc
-          - mcc
-          - hcc
-          - lcc
-          - swvl1
-          - swvl2
-  num_features: 115
 dataloader:
   prefetch_factor: 2
-  pin_memory: True
-  read_group_size: 4
   num_workers:
     training: 8
     validation: 8
-    test: 8
-    predict: 8
   batch_size:
     training: 1
     validation: 1
@@ -154,118 +117,165 @@ dataloader:
     validation: 10
     test: 20
     predict: 20
   dataset: ${hardware.paths.data}/${hardware.files.dataset}
-  land_dataset: ${hardware.paths.data}/${hardware.files.dataset_land}
-  land_variables: [100u, 100v, swvl1, swvl2, stl1, stl2, tcc, lcc, mcc, hcc, sf, ro, strd, ssrd]
   training:
     dataset:
-      - dataset: ${dataloader.dataset}
-        start: null
-        end: 2022
-        frequency: ${data.frequency}
-        drop: []
-      - dataset: ${dataloader.land_dataset}
-        start: null
-        end: 2022
-        frequency: ${data.frequency}
-        select: ${dataloader.land_variables}
     start: null
     end: 2022
     drop: []
   validation:
     dataset:
-      - dataset: ${dataloader.dataset}
-        start: 2022
-        end: 2022
-        frequency: ${data.frequency}
-        drop: []
-      - dataset: ${dataloader.land_dataset}
-        start: 2022
-        end: 2022
-        frequency: ${data.frequency}
-        select: ${dataloader.land_variables}
     start: 2022
-    end: 2022
     drop: []
-  validation_rollout: 1
 diagnostics:
   plot:
-    asynchronous: False
-    datashader: True
     frequency:
       batch: 750
-      epoch: 10
-    parameters: [tp]
     sample_idx: 0
-    precip_and_related_fields: [tp, cp]
     callbacks: []
-    enabled: True
-    scatter: False
-    mode: asyncio
-  callbacks: {}
   benchmark_profiler:
     memory:
-      enabled: True
       steps: 5
       warmup: 2
-      extra_plots: False
-      trace_rank0_only: False
     time:
-      enabled: True
-      verbose: False
     speed:
-      enabled: True
     system:
-      enabled: True
     model_summary:
-      enabled: True
     snapshot:
-      enabled: True
       steps: 4
       warmup: 0
   debug:
-    anomaly_detection: False
-  profiler: False
-  enable_checkpointing: True
   checkpoint:
     every_n_minutes:
       save_frequency: 30
       num_models_saved: 3
     every_n_epochs:
       save_frequency: 1
-      num_models_saved: 3
     every_n_train_steps:
       save_frequency: null
       num_models_saved: 0
   log:
     wandb:
-      enabled: False
     tensorboard:
-      enabled: False
     mlflow:
-      enabled: False
     interval: 100
-  enable_progress_bar: True
-  print_memory_summary: False
 hardware:
   paths:
     data: ${oc.decode:${oc.env:DATASETS_PATH}}
-    output: ${oc.decode:${oc.env:OUTPUT_DIR}}
     logs:
-      base: ${hardware.paths.output}/logs
-      wandb: ${hardware.paths.output}/logs/wandb
-      mlflow: ${hardware.paths.output}/logs/mlflow
-      tensorboard: ${hardware.paths.output}/logs/tensorboard
-    checkpoints: ${hardware.paths.output}/checkpoint/
-    plots: ${hardware.paths.output}/plots/
-    profiler: ${hardware.paths.output}/profiler/
-    graph: ${hardware.paths.output}/graphs/
   files:
-    dataset: aifs-od-an-oper-0001-mars-n320-2016-2023-6h-v6.zarr
-    dataset_land: aifs-od-an-oper-0001-mars-n320-2016-2023-6h-v1-land.zarr
-    graph: graph_enc_proc_dec_n320.pt
     checkpoint:
       every_n_epochs: aifs-by_epoch-epoch_{epoch:03d}-val_wmse_{val_wmse:.3e}
       every_n_train_steps: aifs-by_step-epoch_{epoch:03d}-step_{step:06d}
@@ -275,9 +285,8 @@ hardware:
   num_gpus_per_node: 4
   num_nodes: 16
   num_gpus_per_model: 4
 graph:
-  overwrite: True
   data: data
   hidden: hidden
   nodes:
@@ -285,44 +294,34 @@ graph:
       node_builder:
         _target_: anemoi.graphs.nodes.ZarrDatasetNodes
         dataset: ${dataloader.dataset}
-      attributes:
-        area_weight:
-          _target_: anemoi.graphs.nodes.attributes.AreaWeights
-          norm: unit-max
     hidden:
       node_builder:
         _target_: anemoi.graphs.nodes.ReducedGaussianGridNodes
         grid: o96
   edges:
-    - source_name: data
-      target_name: hidden
-      edge_builder:
-        _target_: anemoi.graphs.edges.CutOffEdges
-        cutoff_factor: 0.6
-      attributes:
-        edge_length:
-          _target_: anemoi.graphs.edges.attributes.EdgeLength
-          norm: unit-std
-        edge_dirs:
-          _target_: anemoi.graphs.edges.attributes.EdgeDirection
-          norm: unit-std
-    - source_name: hidden
-      target_name: data
-      edge_builder:
-        _target_: anemoi.graphs.edges.KNNEdges
-        num_nearest_neighbours: 3
-      attributes:
-        edge_length:
-          _target_: anemoi.graphs.edges.attributes.EdgeLength
-          norm: unit-std
-        edge_dirs:
-          _target_: anemoi.graphs.edges.attributes.EdgeDirection
-          norm: unit-std
   attributes:
     nodes:
       area_weight:
-        _target_: anemoi.graphs.nodes.attributes.AreaWeights
         norm: unit-max
     edges:
       edge_length:
         _target_: anemoi.graphs.edges.attributes.EdgeLength
@@ -330,89 +329,138 @@ graph:
       edge_dirs:
         _target_: anemoi.graphs.edges.attributes.EdgeDirection
         norm: unit-std
 model:
   activation: GELU
   num_channels: 1024
   model:
     _target_: anemoi.models.models.encoder_processor_decoder.AnemoiModelEncProcDec
   processor:
     _target_: anemoi.models.layers.processor.TransformerProcessor
-    _convert_: all
-    activation: GELU
     num_layers: 16
     num_chunks: 2
     mlp_hidden_ratio: 4
     num_heads: 16
     window_size: 1120
     dropout_p: 0.0
   encoder:
     _target_: anemoi.models.layers.mapper.GraphTransformerForwardMapper
-    _convert_: all
-    trainable_size: 8
-    sub_graph_edge_attributes: [edge_length, edge_dirs]
-    activation: GELU
     num_chunks: 1
     mlp_hidden_ratio: 4
     num_heads: 16
   decoder:
     _target_: anemoi.models.layers.mapper.GraphTransformerBackwardMapper
-    _convert_: all
-    trainable_size: 8
-    sub_graph_edge_attributes: [edge_length, edge_dirs]
-    activation: GELU
     num_chunks: 1
     mlp_hidden_ratio: 4
     num_heads: 16
   trainable_parameters:
     data: 8
     hidden: 8
     data2hidden: 8
     hidden2data: 8
   attributes:
-    edges: [edge_length, edge_dirs]
     nodes: []
-  node_loss_weight: area_weight
   bounding:
-    - _target_: anemoi.models.layers.bounding.ReluBounding
-      variables:
-        - tp
-        - ro
-        - tcw
-        - ssrd
-        - q_50
-        - q_100
-        - q_150
-        - q_200
-        - q_250
-        - q_300
-        - q_400
-        - q_500
-        - q_600
-        - q_700
-        - q_850
-        - q_925
-        - q_1000
-    - _target_: anemoi.models.layers.bounding.HardtanhBounding
-      variables: [tcc, swvl1, swvl2]
-      min_val: 0
-      max_val: 1
-    - _target_: anemoi.models.layers.bounding.FractionBounding
-      variables: [cp, sf]
-      min_val: 0
-      max_val: 1
-      total_var: tp
-    - _target_: anemoi.models.layers.bounding.FractionBounding
-      variables: [lcc, mcc, hcc]
-      min_val: 0
-      max_val: 1
-      total_var: tcc
 training:
   run_id: null
   fork_run_id: ${oc.decode:${oc.env:PRETRAINING_RUN_ID}}
-  load_weights_only: True
-  deterministic: False
   precision: 16-mixed
   multistep_input: 2
   accum_grad_batches: 1
@@ -421,20 +469,35 @@ training:
     val: 32.0
     algorithm: value
   swa:
-    enabled: False
     lr: 0.0001
-  zero_optimizer: False
   training_loss:
     _target_: anemoi.training.losses.mse.WeightedMSELoss
     scalars:
-      - variable
-      - loss_weights_mask
-    ignore_nans: False
-  loss_gradient_scaling: False
   validation_metrics:
-    - _target_: anemoi.training.losses.mse.WeightedMSELoss
-      scalars: []
-      ignore_nans: True
   rollout:
     start: 1
     epoch_increment: 1
@@ -442,9 +505,10 @@ training:
   max_epochs: 13
   max_steps: 150000
   lr:
-    rate: 8.0e-7
     iterations: 7900
-    min: 3.0e-7
     warmup_t: 100
   variable_loss_scaling:
     default: 1
@@ -464,20 +528,29 @@ training:
       2d: 0.5
       tp: 0.025
       cp: 0.0025
-      ro: 0.005
       sf: 0.025
       tcc: 0.1
       mcc: 0.1
       lcc: 0.1
       hcc: 0.1
-      swvl2: 2.0
-      swvl1: 1.0
       stl2: 10
       stl1: 1
       ssrd: 0.05
       strd: 0.1
-  metrics: [z_500, t_850, u_850, v_850]
   pressure_level_scaler:
     _target_: anemoi.training.data.scaling.ReluPressureLevelScaler
     minimum: 0.2
-    slope: 0.001

   frequency: 6h
   timestep: 6h
   forcing:
+  - cos_latitude
+  - cos_longitude
+  - sin_latitude
+  - sin_longitude
+  - cos_julian_day
+  - cos_local_time
+  - sin_julian_day
+  - sin_local_time
+  - insolation
+  - lsm
+  - sdor
+  - slor
+  - z
+  diagnostic:
+  - tp
+  - cp
+  - sf
+  - tcc
+  - hcc
+  - lcc
+  - mcc
+  - ro
+  - ssrd
+  - strd
+  - 100u
+  - 100v
+  remapped: null
+  normalizer:
+    default: mean-std
+    remap:
+      cp: tp
+      sf: tp
+    std:
+    - tp
+    - cp
+    - sf
+    - ro
+    - tcw
+    - ssrd
+    - q_50
+    - q_100
+    - q_150
+    - q_200
+    - q_250
+    - q_300
+    - q_400
+    - q_500
+    - q_600
+    - q_700
+    - q_850
+    - q_925
+    - q_1000
+    min-max: null
+    max:
+    - sdor
+    - slor
+    - z
+    none:
     - cos_latitude
+    - cos_longitude
     - sin_latitude
     - sin_longitude
     - cos_julian_day
     - sin_local_time
     - insolation
     - lsm
     - tcc
+    - mcc
     - hcc
     - lcc
+    - swvl1
+    - swvl2
   imputer:
     default: none
+    minimum:
+    - swvl1
+    - swvl2
+    - ro
+    mean:
+    - stl1
+    - stl2
   remapper:
     default: none
   processors:
+    imputer:
+      _target_: anemoi.models.preprocessing.imputer.InputImputer
+      _convert_: all
+      config: ${data.imputer}
     normalizer:
       _target_: anemoi.models.preprocessing.normalizer.InputNormalizer
+      config: ${data.normalizer}
+  num_features: null
 dataloader:
   prefetch_factor: 2
+  pin_memory: true
+  read_group_size: ${hardware.num_gpus_per_model}
   num_workers:
     training: 8
     validation: 8
+    test: 1
+    predict: 1
   batch_size:
     training: 1
     validation: 1
     validation: 10
     test: 20
     predict: 20
+  grid_indices:
+    _target_: anemoi.training.data.grid_indices.FullGrid
+    nodes_name: ${graph.data}
   dataset: ${hardware.paths.data}/${hardware.files.dataset}
   training:
     dataset:
+    - dataset: ${hardware.paths.data}/${hardware.files.dataset}
+      start: null
+      end: 2022
+      frequency: ${data.frequency}
     start: null
     end: 2022
     drop: []
   validation:
     dataset:
+    - dataset: ${hardware.paths.data}/${hardware.files.dataset}
+      start: 2022
+      end: 2024
+      frequency: ${data.frequency}
     start: 2022
+    end: 2024
+    drop: []
+  test:
+    dataset:
+    - dataset: ${hardware.paths.data}/${hardware.files.dataset}
+      start: 2022
+      end: null
+      frequency: ${data.frequency}
+    start: 2022
+    end: null
     drop: []
 diagnostics:
   plot:
+    asynchronous: true
+    datashader: true
     frequency:
       batch: 750
+      epoch: 5
+    parameters:
+    - z_500
+    - t_850
+    - u_850
+    - v_850
+    - 2t
+    - 10u
+    - 10v
+    - sp
+    - tp
+    - cp
     sample_idx: 0
+    precip_and_related_fields:
+    - tp
+    - cp
+    colormaps:
+      default:
+        _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormap
+        name: viridis
+      error:
+        _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormap
+        name: bwr
+      precip:
+        _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormapClevels
+        clevels:
+        - '#ffffff'
+        - '#04e9e7'
+        - '#019ff4'
+        - '#0300f4'
+        - '#02fd02'
+        - '#01c501'
+        - '#008e00'
+        - '#fdf802'
+        - '#e5bc00'
+        - '#fd9500'
+        - '#fd0000'
+        - '#d40000'
+        - '#bc0000'
+        - '#f800fd'
+        variables: ${diagnostics.plot.precip_and_related_fields}
     callbacks: []
+  callbacks: []
   benchmark_profiler:
     memory:
+      enabled: true
       steps: 5
       warmup: 2
+      extra_plots: false
+      trace_rank0_only: false
     time:
+      enabled: true
+      verbose: false
     speed:
+      enabled: true
     system:
+      enabled: true
     model_summary:
+      enabled: true
     snapshot:
+      enabled: true
       steps: 4
       warmup: 0
   debug:
+    anomaly_detection: false
+  profiler: false
+  enable_checkpointing: true
   checkpoint:
     every_n_minutes:
       save_frequency: 30
       num_models_saved: 3
     every_n_epochs:
       save_frequency: 1
+      num_models_saved: -1
     every_n_train_steps:
       save_frequency: null
       num_models_saved: 0
   log:
     wandb:
+      enabled: false
+      offline: false
+      log_model: false
+      project: Anemoi
+      entity: ???
+      gradients: false
+      parameters: false
     tensorboard:
+      enabled: false
     mlflow:
+      enabled: false
+      offline: false
+      authentication: false
+      log_model: false
+      tracking_uri: ???
+      experiment_name: ???
+      project_name: ???
+      system: true
+      terminal: true
+      run_name: null
+      on_resume_create_child: true
+      expand_hyperparams:
+      - config
+      http_max_retries: 35
     interval: 100
+  enable_progress_bar: true
+  print_memory_summary: false
 hardware:
   paths:
     data: ${oc.decode:${oc.env:DATASETS_PATH}}
+    output: ${oc.decode:${oc.env:OUTPUT_PATH}}
     logs:
+      base: ${hardware.paths.output}logs/
+      wandb: ${hardware.paths.logs.base}
+      mlflow: ${hardware.paths.logs.base}mlflow/
+      tensorboard: ${hardware.paths.logs.base}tensorboard/
+    checkpoints: ${hardware.paths.output}checkpoint/
+    plots: ${hardware.paths.output}plots/
+    profiler: ${hardware.paths.output}profiler/
+    graph: ${hardware.paths.output}graphs/
   files:
+    dataset: aifs-ea-an-oper-0001-mars-${data.resolution}-1979-2024-6h-v1-aifs-single-v1.zarr
+    graph: graph_enc_proc_dec_${data.resolution}.pt
     checkpoint:
       every_n_epochs: aifs-by_epoch-epoch_{epoch:03d}-val_wmse_{val_wmse:.3e}
       every_n_train_steps: aifs-by_step-epoch_{epoch:03d}-step_{step:06d}
   num_gpus_per_node: 4
   num_nodes: 16
   num_gpus_per_model: 4
 graph:
+  overwrite: true
   data: data
   hidden: hidden
   nodes:
       node_builder:
         _target_: anemoi.graphs.nodes.ZarrDatasetNodes
         dataset: ${dataloader.dataset}
+      attributes: ${graph.attributes.nodes}
     hidden:
       node_builder:
         _target_: anemoi.graphs.nodes.ReducedGaussianGridNodes
         grid: o96
   edges:
+  - source_name: ${graph.data}
+    target_name: ${graph.hidden}
+    edge_builders:
+    - _target_: anemoi.graphs.edges.CutOffEdges
+      cutoff_factor: 0.6
+      source_mask_attr_name: null
+      target_mask_attr_name: null
+    attributes: ${graph.attributes.edges}
+  - source_name: ${graph.hidden}
+    target_name: ${graph.data}
+    edge_builders:
+    - _target_: anemoi.graphs.edges.KNNEdges
+      num_nearest_neighbours: 3
+      source_mask_attr_name: null
+      target_mask_attr_name: null
+    attributes: ${graph.attributes.edges}
   attributes:
     nodes:
       area_weight:
+        _target_: anemoi.graphs.nodes.attributes.SphericalAreaWeights
         norm: unit-max
+        fill_value: 0
     edges:
       edge_length:
         _target_: anemoi.graphs.edges.attributes.EdgeLength
       edge_dirs:
         _target_: anemoi.graphs.edges.attributes.EdgeDirection
         norm: unit-std
+  post_processors: []
 model:
   activation: GELU
   num_channels: 1024
+  cpu_offload: false
+  output_mask: null
   model:
     _target_: anemoi.models.models.encoder_processor_decoder.AnemoiModelEncProcDec
+  layer_kernels:
+    processor:
+      LayerNorm:
+        _target_: torch.nn.LayerNorm
+        _partial_: true
+      Linear:
+        _target_: torch.nn.Linear
+        _partial_: true
+      QueryNorm:
+        _target_: anemoi.models.layers.normalization.AutocastLayerNorm
+        _partial_: true
+        bias: false
+      KeyNorm:
+        _target_: anemoi.models.layers.normalization.AutocastLayerNorm
+        _partial_: true
+        bias: false
+    encoder:
+      LayerNorm:
+        _target_: torch.nn.LayerNorm
+        _partial_: true
+      Linear:
+        _target_: torch.nn.Linear
+        _partial_: true
+    decoder:
+      LayerNorm:
+        _target_: torch.nn.LayerNorm
+        _partial_: true
+      Linear:
+        _target_: torch.nn.Linear
+        _partial_: true
   processor:
     _target_: anemoi.models.layers.processor.TransformerProcessor
+    activation: ${model.activation}
     num_layers: 16
     num_chunks: 2
     mlp_hidden_ratio: 4
     num_heads: 16
     window_size: 1120
     dropout_p: 0.0
+    attention_implementation: flash_attention
+    qk_norm: false
+    softcap: 0.0
+    use_alibi_slopes: false
+    cpu_offload: ${model.cpu_offload}
   encoder:
     _target_: anemoi.models.layers.mapper.GraphTransformerForwardMapper
+    trainable_size: ${model.trainable_parameters.data2hidden}
+    sub_graph_edge_attributes: ${model.attributes.edges}
+    activation: ${model.activation}
     num_chunks: 1
     mlp_hidden_ratio: 4
     num_heads: 16
+    qk_norm: false
+    cpu_offload: ${model.cpu_offload}
   decoder:
     _target_: anemoi.models.layers.mapper.GraphTransformerBackwardMapper
+    trainable_size: ${model.trainable_parameters.hidden2data}
+    sub_graph_edge_attributes: ${model.attributes.edges}
+    activation: ${model.activation}
     num_chunks: 1
     mlp_hidden_ratio: 4
     num_heads: 16
+    initialise_data_extractor_zero: false
+    qk_norm: false
+    cpu_offload: ${model.cpu_offload}
   trainable_parameters:
     data: 8
     hidden: 8
     data2hidden: 8
     hidden2data: 8
   attributes:
+    edges:
+    - edge_length
+    - edge_dirs
     nodes: []
   bounding:
+  - _target_: anemoi.models.layers.bounding.ReluBounding
+    variables:
+    - tp
+    - ro
+    - tcw
+    - ssrd
+    - ro
+    - q_50
+    - q_100
+    - q_150
+    - q_200
+    - q_250
+    - q_300
+    - q_400
+    - q_500
+    - q_600
+    - q_700
+    - q_850
+    - q_925
+    - q_1000
+  - _target_: anemoi.models.layers.bounding.HardtanhBounding
+    variables:
+    - tcc
+    - swvl1
+    - swvl2
+    min_val: 0
+    max_val: 1
+  - _target_: anemoi.models.layers.bounding.FractionBounding
+    variables:
+    - cp
+    - sf
+    min_val: 0
+    max_val: 1
+    total_var: tp
+  - _target_: anemoi.models.layers.bounding.FractionBounding
+    variables:
+    - lcc
+    - mcc
+    - hcc
+    min_val: 0
+    max_val: 1
+    total_var: tcc
 training:
   run_id: null
   fork_run_id: ${oc.decode:${oc.env:PRETRAINING_RUN_ID}}
+  transfer_learning: false
+  load_weights_only: true
+  deterministic: false
   precision: 16-mixed
   multistep_input: 2
   accum_grad_batches: 1
     val: 32.0
     algorithm: value
   swa:
+    enabled: false
     lr: 0.0001
+  optimizer:
+    zero: false
+    kwargs:
+      betas:
+      - 0.9
+      - 0.95
+  model_task: anemoi.training.train.forecaster.GraphForecaster
+  strategy:
+    _target_: anemoi.training.distributed.strategy.DDPGroupStrategy
+    num_gpus_per_model: ${hardware.num_gpus_per_model}
+    read_group_size: ${dataloader.read_group_size}
+  loss_gradient_scaling: false
   training_loss:
     _target_: anemoi.training.losses.mse.WeightedMSELoss
     scalars:
+    - variable
+    - loss_weights_mask
+    ignore_nans: false
   validation_metrics:
+  - _target_: anemoi.training.losses.mse.WeightedMSELoss
+    scalars: []
+    ignore_nans: true
+  scale_validation_metrics:
+    scalars_to_apply:
+    - variable
+    metrics:
+    - all
   rollout:
     start: 1
     epoch_increment: 1
   max_epochs: 13
   max_steps: 150000
   lr:
+    warmup: 1000
+    rate: 8.0e-07
     iterations: 7900
+    min: 3.0e-07
     warmup_t: 100
   variable_loss_scaling:
     default: 1
       2d: 0.5
       tp: 0.025
       cp: 0.0025
+      ro: 0.0025
       sf: 0.025
       tcc: 0.1
       mcc: 0.1
       lcc: 0.1
       hcc: 0.1
+      swvl2: 2
+      swvl1: 1
       stl2: 10
       stl1: 1
       ssrd: 0.05
       strd: 0.1
+  metrics:
+  - z_500
+  - t_850
+  - u_850
+  - v_850
   pressure_level_scaler:
     _target_: anemoi.training.data.scaling.ReluPressureLevelScaler
     minimum: 0.2
+    slope: 0.001
+  node_loss_weights:
+    _target_: anemoi.training.losses.nodeweights.GraphNodeAttribute
+    target_nodes: ${graph.data}
+    node_attribute: area_weight
+  submodules_to_freeze: []

config_pretraining.yaml CHANGED Viewed

@@ -4,6 +4,64 @@ data:
   frequency: 6h
   timestep: 6h
   forcing:
     - cos_latitude
     - cos_longitude
     - sin_latitude
@@ -14,136 +72,41 @@ data:
     - sin_local_time
     - insolation
     - lsm
-    - sdor
-    - slor
-    - z
-  diagnostic:
-    - tp
-    - cp
-    - sf
     - tcc
     - hcc
     - lcc
-    - mcc
-    - ro
-    - ssrd
-    - strd
-    - 100u
-    - 100v
-  remapped: null
-  normalizer:
-    default: mean-std
-    remap:
-      cp: tp
-      sf: tp
-    std:
-      - tp
-      - cp
-      - sf
-      - ro
-      - tcw
-      - ssrd
-      - q_50
-      - q_100
-      - q_150
-      - q_200
-      - q_250
-      - q_300
-      - q_400
-      - q_500
-      - q_600
-      - q_700
-      - q_850
-      - q_925
-      - q_1000
-    min-max: null
-    max:
-      - sdor
-      - slor
-      - z
-    none:
-      - cos_latitude
-      - cos_longitude
-      - sin_latitude
-      - sin_longitude
-      - cos_julian_day
-      - cos_local_time
-      - sin_julian_day
-      - sin_local_time
-      - insolation
-      - lsm
-      - tcc
-      - mcc
-      - hcc
-      - lcc
-      - swvl1
-      - swvl2
   imputer:
     default: none
   remapper:
     default: none
   processors:
     normalizer:
       _target_: anemoi.models.preprocessing.normalizer.InputNormalizer
-      _convert_: all
-      config:
-        default: mean-std
-        remap:
-          cp: tp
-          sf: tp
-        std:
-          - tp
-          - cp
-          - sf
-          - ro
-          - tcw
-          - ssrd
-          - q_50
-          - q_100
-          - q_150
-          - q_200
-          - q_250
-          - q_300
-          - q_400
-          - q_500
-          - q_600
-          - q_700
-          - q_850
-          - q_925
-          - q_1000
-        min-max: null
-        max:
-          - sdor
-          - slor
-          - z
-        none:
-          - cos_latitude
-          - cos_longitude
-          - sin_latitude
-          - sin_longitude
-          - cos_julian_day
-          - cos_local_time
-          - sin_julian_day
-          - sin_local_time
-          - insolation
-          - lsm
-          - tcc
-          - mcc
-          - hcc
-          - lcc
-          - swvl1
-          - swvl2
-  num_features: 115
 dataloader:
   prefetch_factor: 2
-  pin_memory: True
-  read_group_size: 4
   num_workers:
-    training: 4
-    validation: 4
-    test: 8
-    predict: 8
   batch_size:
     training: 1
     validation: 1
@@ -151,145 +114,170 @@ dataloader:
     predict: 4
   limit_batches:
     training: null
-    validation: 10
     test: 20
     predict: 20
   dataset: ${hardware.paths.data}/${hardware.files.dataset}
-  land_dataset: ${hardware.paths.data}/${hardware.files.dataset_land}
-  land_variables: [100u, 100v, swvl1, swvl2, stl1, stl2, tcc, lcc, mcc, hcc, sf, ro, strd, ssrd]
   training:
     dataset:
-      - dataset: ${dataloader.dataset}
-        start: null
-        end: 2022
-        frequency: ${data.frequency}
-        drop: []
-      - dataset: ${dataloader.land_dataset}
-        start: null
-        end: 2022
-        frequency: ${data.frequency}
-        select: ${dataloader.land_variables}
     start: null
     end: 2022
     drop: []
   validation:
     dataset:
-      - dataset: ${dataloader.dataset}
-        start: 2022
-        end: 2022
-        frequency: ${data.frequency}
-        drop: []
-      - dataset: ${dataloader.land_dataset}
-        start: 2022
-        end: 2022
-        frequency: ${data.frequency}
-        select: ${dataloader.land_variables}
     start: 2022
-    end: 2022
     drop: []
-  validation_rollout: 1
 diagnostics:
   plot:
-    asynchronous: False
-    datashader: True
     frequency:
       batch: 750
-      epoch: 10
-    parameters: [tp]
     sample_idx: 0
-    callbacks:
-      - _target_: anemoi.training.diagnostics.callbacks.plot.PlotLoss
-        parameter_groups:
-          moisture: [tp, cp, tcw]
-          sfc_wind: [10u, 10v]
-      - _target_: anemoi.training.diagnostics.callbacks.plot.PlotSample
-        sample_idx: 0
-        per_sample: 6
-        parameters: [tp]
-        accumulation_levels_plot: [0, 0.05, 0.1, 0.25, 0.5, 1, 1.5, 2, 3, 4, 5, 6, 7, 100]
-        cmap_accumulation:
-          - "#ffffff"
-          - "#04e9e7"
-          - "#019ff4"
-          - "#0300f4"
-          - "#02fd02"
-          - "#01c501"
-          - "#008e00"
-          - "#fdf802"
-          - "#e5bc00"
-          - "#fd9500"
-          - "#fd0000"
-          - "#d40000"
-          - "#bc0000"
-          - "#f800fd"
-        precip_and_related_fields: [tp, cp]
-    enabled: True
-    scatter: False
-    mode: asyncio
-  callbacks: {}
   benchmark_profiler:
     memory:
-      enabled: True
       steps: 5
       warmup: 2
-      extra_plots: False
-      trace_rank0_only: False
     time:
-      enabled: True
-      verbose: False
     speed:
-      enabled: True
     system:
-      enabled: True
     model_summary:
-      enabled: True
     snapshot:
-      enabled: True
       steps: 4
       warmup: 0
   debug:
-    anomaly_detection: False
-  profiler: False
-  enable_checkpointing: True
   checkpoint:
     every_n_minutes:
       save_frequency: 30
       num_models_saved: 3
     every_n_epochs:
       save_frequency: 1
-      num_models_saved: 3
     every_n_train_steps:
       save_frequency: null
       num_models_saved: 0
   log:
     wandb:
-      enabled: False
     tensorboard:
-      enabled: False
     mlflow:
-      enabled: False
     interval: 100
-  enable_progress_bar: True
-  print_memory_summary: False
 hardware:
   paths:
     data: ${oc.decode:${oc.env:DATASETS_PATH}}
-    output: ${oc.decode:${oc.env:OUTPUT_DIR}}
     logs:
-      base: ${hardware.paths.output}/logs
-      wandb: ${hardware.paths.output}/logs/wandb
-      mlflow: ${hardware.paths.output}/logs/mlflow
-      tensorboard: ${hardware.paths.output}/logs/tensorboard
-    checkpoints: ${hardware.paths.output}/checkpoint
-    plots: ${hardware.paths.output}/plots
-    profiler: ${hardware.paths.output}/profiler
-    graph: ${hardware.paths.output}/graphs
   files:
-    dataset: aifs-ea-an-oper-0001-mars-n320-1979-2022-6h-v6.zarr
-    dataset_land: aifs-ea-an-oper-0001-mars-n320-1979-2023-6h-v1-land.zarr
-    graph: graph_enc_proc_dec_n320.pt
     checkpoint:
       every_n_epochs: aifs-by_epoch-epoch_{epoch:03d}-val_wmse_{val_wmse:.3e}
       every_n_train_steps: aifs-by_step-epoch_{epoch:03d}-step_{step:06d}
@@ -299,9 +287,8 @@ hardware:
   num_gpus_per_node: 4
   num_nodes: 16
   num_gpus_per_model: 4
 graph:
-  overwrite: True
   data: data
   hidden: hidden
   nodes:
@@ -309,142 +296,210 @@ graph:
       node_builder:
         _target_: anemoi.graphs.nodes.ZarrDatasetNodes
         dataset: ${dataloader.dataset}
-      attributes:
-        area_weight:
-          _target_: anemoi.graphs.nodes.attributes.AreaWeights
-          norm: unit-max
     hidden:
       node_builder:
         _target_: anemoi.graphs.nodes.ReducedGaussianGridNodes
         grid: o96
   edges:
-    - source_name: data
-      target_name: hidden
-      edge_builder:
-        _target_: anemoi.graphs.edges.CutOffEdges
-        cutoff_factor: 0.6
-      attributes:
-        edge_length:
-          _target_: anemoi.graphs.edges.attributes.EdgeLength
-          norm: unit-std
-        edge_dirs:
-          _target_: anemoi.graphs.edges.attributes.EdgeDirection
-          norm: unit-std
-    - source_name: hidden
-      target_name: data
-      edge_builder:
-        _target_: anemoi.graphs.edges.KNNEdges
-        num_nearest_neighbours: 3
-      attributes:
-        edge_length:
-          _target_: anemoi.graphs.edges.attributes.EdgeLength
-          norm: unit-std
-        edge_dirs:
-          _target_: anemoi.graphs.edges.attributes.EdgeDirection
-          norm: unit-std
 model:
   activation: GELU
   num_channels: 1024
   model:
     _target_: anemoi.models.models.encoder_processor_decoder.AnemoiModelEncProcDec
   processor:
     _target_: anemoi.models.layers.processor.TransformerProcessor
-    _convert_: all
-    activation: GELU
     num_layers: 16
     num_chunks: 2
     mlp_hidden_ratio: 4
     num_heads: 16
     window_size: 1120
-    dropout_p: 0
   encoder:
     _target_: anemoi.models.layers.mapper.GraphTransformerForwardMapper
-    _convert_: all
-    trainable_size: 8
-    sub_graph_edge_attributes: [edge_length, edge_dirs]
-    activation: GELU
     num_chunks: 1
     mlp_hidden_ratio: 4
     num_heads: 16
   decoder:
     _target_: anemoi.models.layers.mapper.GraphTransformerBackwardMapper
-    _convert_: all
-    trainable_size: 8
-    sub_graph_edge_attributes: [edge_length, edge_dirs]
-    activation: GELU
     num_chunks: 1
     mlp_hidden_ratio: 4
     num_heads: 16
   trainable_parameters:
     data: 8
     hidden: 8
     data2hidden: 8
     hidden2data: 8
   attributes:
-    edges: [edge_length, edge_dirs]
     nodes: []
-  node_loss_weight: area_weight
   bounding:
-    - _target_: anemoi.models.layers.bounding.ReluBounding
-      variables:
-        - tp
-        - ro
-        - tcw
-        - ssrd
-        - q_50
-        - q_100
-        - q_150
-        - q_200
-        - q_250
-        - q_300
-        - q_400
-        - q_500
-        - q_600
-        - q_700
-        - q_850
-        - q_925
-        - q_1000
-    - _target_: anemoi.models.layers.bounding.HardtanhBounding
-      variables: [tcc, swvl1, swvl2]
-      min_val: 0
-      max_val: 1
-    - _target_: anemoi.models.layers.bounding.FractionBounding
-      variables: [cp, sf]
-      min_val: 0
-      max_val: 1
-      total_var: tp
-    - _target_: anemoi.models.layers.bounding.FractionBounding
-      variables: [lcc, mcc, hcc]
-      min_val: 0
-      max_val: 1
-      total_var: tcc
 training:
   run_id: null
   fork_run_id: null
-  load_weights_only: null
-  deterministic: False
   precision: 16-mixed
   multistep_input: 2
   accum_grad_batches: 1
   num_sanity_val_steps: 6
   gradient_clip:
-    val: 32
     algorithm: value
   swa:
-    enabled: False
     lr: 0.0001
-  zero_optimizer: False
   training_loss:
     _target_: anemoi.training.losses.mse.WeightedMSELoss
-    scalars: [variable, loss_weights_mask]
-    ignore_nans: False
-  loss_gradient_scaling: False
   validation_metrics:
-    - _target_: anemoi.training.losses.mse.WeightedMSELoss
-      scalars: []
-      ignore_nans: True
   rollout:
     start: 1
     epoch_increment: 0
@@ -452,9 +507,10 @@ training:
   max_epochs: null
   max_steps: 260000
   lr:
-    rate: 0.00003125
     iterations: 260000
-    min: 3.0e-7
   variable_loss_scaling:
     default: 1
     pl:
@@ -473,20 +529,29 @@ training:
       2d: 0.5
       tp: 0.025
       cp: 0.0025
-      ro: 0.005
       sf: 0.025
       tcc: 0.1
       mcc: 0.1
       lcc: 0.1
       hcc: 0.1
-      swvl2: 2.0
-      swvl1: 1.0
       stl2: 10
       stl1: 1
       ssrd: 0.05
       strd: 0.1
-  metrics: [z_500, t_850, u_850, v_850]
   pressure_level_scaler:
     _target_: anemoi.training.data.scaling.ReluPressureLevelScaler
     minimum: 0.2
-    slope: 0.001

   frequency: 6h
   timestep: 6h
   forcing:
+  - cos_latitude
+  - cos_longitude
+  - sin_latitude
+  - sin_longitude
+  - cos_julian_day
+  - cos_local_time
+  - sin_julian_day
+  - sin_local_time
+  - insolation
+  - lsm
+  - sdor
+  - slor
+  - z
+  diagnostic:
+  - tp
+  - cp
+  - sf
+  - tcc
+  - hcc
+  - lcc
+  - mcc
+  - ro
+  - ssrd
+  - strd
+  - 100u
+  - 100v
+  remapped: null
+  normalizer:
+    default: mean-std
+    remap:
+      cp: tp
+      sf: tp
+    std:
+    - tp
+    - cp
+    - sf
+    - ro
+    - tcw
+    - ssrd
+    - q_50
+    - q_100
+    - q_150
+    - q_200
+    - q_250
+    - q_300
+    - q_400
+    - q_500
+    - q_600
+    - q_700
+    - q_850
+    - q_925
+    - q_1000
+    min-max: null
+    max:
+    - sdor
+    - slor
+    - z
+    none:
     - cos_latitude
     - cos_longitude
     - sin_latitude
     - sin_local_time
     - insolation
     - lsm
     - tcc
+    - mcc
     - hcc
     - lcc
+    - swvl1
+    - swvl2
   imputer:
     default: none
+    minimum:
+    - swvl1
+    - swvl2
+    - ro
+    mean:
+    - stl1
+    - stl2
   remapper:
     default: none
   processors:
+    imputer:
+      _target_: anemoi.models.preprocessing.imputer.InputImputer
+      _convert_: all
+      config: ${data.imputer}
     normalizer:
       _target_: anemoi.models.preprocessing.normalizer.InputNormalizer
+      config: ${data.normalizer}
+  num_features: null
 dataloader:
   prefetch_factor: 2
+  pin_memory: true
+  read_group_size: ${hardware.num_gpus_per_model}
   num_workers:
+    training: 8
+    validation: 8
+    test: 1
+    predict: 1
   batch_size:
     training: 1
     validation: 1
     predict: 4
   limit_batches:
     training: null
+    validation: null
     test: 20
     predict: 20
+  grid_indices:
+    _target_: anemoi.training.data.grid_indices.FullGrid
+    nodes_name: ${graph.data}
   dataset: ${hardware.paths.data}/${hardware.files.dataset}
   training:
     dataset:
+    - dataset: ${hardware.paths.data}/${hardware.files.dataset}
+      start: null
+      end: 2022
+      frequency: ${data.frequency}
     start: null
     end: 2022
     drop: []
   validation:
     dataset:
+    - dataset: ${hardware.paths.data}/${hardware.files.dataset}
+      start: 2022
+      end: 2024
+      frequency: ${data.frequency}
     start: 2022
+    end: 2024
+    drop: []
+  test:
+    dataset:
+    - dataset: ${hardware.paths.data}/${hardware.files.dataset}
+      start: 2022
+      end: null
+      frequency: ${data.frequency}
+    start: 2022
+    end: null
     drop: []
 diagnostics:
   plot:
+    asynchronous: true
+    datashader: true
     frequency:
       batch: 750
+      epoch: 5
+    parameters:
+    - z_500
+    - t_850
+    - u_850
+    - v_850
+    - 2t
+    - 10u
+    - 10v
+    - sp
+    - tp
+    - cp
     sample_idx: 0
+    precip_and_related_fields:
+    - tp
+    - cp
+    colormaps:
+      default:
+        _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormap
+        name: viridis
+      error:
+        _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormap
+        name: bwr
+      precip:
+        _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormapClevels
+        clevels:
+        - '#ffffff'
+        - '#04e9e7'
+        - '#019ff4'
+        - '#0300f4'
+        - '#02fd02'
+        - '#01c501'
+        - '#008e00'
+        - '#fdf802'
+        - '#e5bc00'
+        - '#fd9500'
+        - '#fd0000'
+        - '#d40000'
+        - '#bc0000'
+        - '#f800fd'
+        variables: ${diagnostics.plot.precip_and_related_fields}
+    callbacks: []
+  callbacks: []
   benchmark_profiler:
     memory:
+      enabled: true
       steps: 5
       warmup: 2
+      extra_plots: false
+      trace_rank0_only: false
     time:
+      enabled: true
+      verbose: false
     speed:
+      enabled: true
     system:
+      enabled: true
     model_summary:
+      enabled: true
     snapshot:
+      enabled: true
       steps: 4
       warmup: 0
   debug:
+    anomaly_detection: false
+  profiler: false
+  enable_checkpointing: true
   checkpoint:
     every_n_minutes:
       save_frequency: 30
       num_models_saved: 3
     every_n_epochs:
       save_frequency: 1
+      num_models_saved: -1
     every_n_train_steps:
       save_frequency: null
       num_models_saved: 0
   log:
     wandb:
+      enabled: false
+      offline: false
+      log_model: false
+      project: Anemoi
+      entity: ???
+      gradients: false
+      parameters: false
     tensorboard:
+      enabled: false
     mlflow:
+      enabled: false
+      offline: false
+      authentication: false
+      log_model: false
+      tracking_uri: ???
+      experiment_name: ???
+      project_name: ???
+      system: true
+      terminal: true
+      run_name: null
+      on_resume_create_child: true
+      expand_hyperparams:
+      - config
+      http_max_retries: 35
     interval: 100
+  enable_progress_bar: true
+  print_memory_summary: false
 hardware:
   paths:
     data: ${oc.decode:${oc.env:DATASETS_PATH}}
+    output: ${oc.decode:${oc.env:OUTPUT_PATH}}
     logs:
+      base: ${hardware.paths.output}logs/
+      wandb: ${hardware.paths.logs.base}
+      mlflow: ${hardware.paths.logs.base}mlflow/
+      tensorboard: ${hardware.paths.logs.base}tensorboard/
+    checkpoints: ${hardware.paths.output}checkpoint/
+    plots: ${hardware.paths.output}plots/
+    profiler: ${hardware.paths.output}profiler/
+    graph: ${hardware.paths.output}graphs/
   files:
+    dataset: aifs-ea-an-oper-0001-mars-${data.resolution}-1979-2024-6h-v1-aifs-single-v1.zarr
+    graph: graph_enc_proc_dec_${data.resolution}.pt
+    truncation: null
+    truncation_inv: null
     checkpoint:
       every_n_epochs: aifs-by_epoch-epoch_{epoch:03d}-val_wmse_{val_wmse:.3e}
       every_n_train_steps: aifs-by_step-epoch_{epoch:03d}-step_{step:06d}
   num_gpus_per_node: 4
   num_nodes: 16
   num_gpus_per_model: 4
 graph:
+  overwrite: true
   data: data
   hidden: hidden
   nodes:
       node_builder:
         _target_: anemoi.graphs.nodes.ZarrDatasetNodes
         dataset: ${dataloader.dataset}
+      attributes: ${graph.attributes.nodes}
     hidden:
       node_builder:
         _target_: anemoi.graphs.nodes.ReducedGaussianGridNodes
         grid: o96
   edges:
+  - source_name: ${graph.data}
+    target_name: ${graph.hidden}
+    edge_builders:
+    - _target_: anemoi.graphs.edges.CutOffEdges
+      cutoff_factor: 0.6
+      source_mask_attr_name: null
+      target_mask_attr_name: null
+    attributes: ${graph.attributes.edges}
+  - source_name: ${graph.hidden}
+    target_name: ${graph.data}
+    edge_builders:
+    - _target_: anemoi.graphs.edges.KNNEdges
+      num_nearest_neighbours: 3
+      source_mask_attr_name: null
+      target_mask_attr_name: null
+    attributes: ${graph.attributes.edges}
+  attributes:
+    nodes:
+      area_weight:
+        _target_: anemoi.graphs.nodes.attributes.SphericalAreaWeights
+        norm: unit-max
+        fill_value: 0
+    edges:
+      edge_length:
+        _target_: anemoi.graphs.edges.attributes.EdgeLength
+        norm: unit-std
+      edge_dirs:
+        _target_: anemoi.graphs.edges.attributes.EdgeDirection
+        norm: unit-std
+  post_processors: []
 model:
   activation: GELU
   num_channels: 1024
+  cpu_offload: false
+  output_mask: null
   model:
     _target_: anemoi.models.models.encoder_processor_decoder.AnemoiModelEncProcDec
+  layer_kernels:
+    processor:
+      LayerNorm:
+        _target_: torch.nn.LayerNorm
+        _partial_: true
+      Linear:
+        _target_: torch.nn.Linear
+        _partial_: true
+      QueryNorm:
+        _target_: anemoi.models.layers.normalization.AutocastLayerNorm
+        _partial_: true
+        bias: false
+      KeyNorm:
+        _target_: anemoi.models.layers.normalization.AutocastLayerNorm
+        _partial_: true
+        bias: false
+    encoder:
+      LayerNorm:
+        _target_: torch.nn.LayerNorm
+        _partial_: true
+      Linear:
+        _target_: torch.nn.Linear
+        _partial_: true
+    decoder:
+      LayerNorm:
+        _target_: torch.nn.LayerNorm
+        _partial_: true
+      Linear:
+        _target_: torch.nn.Linear
+        _partial_: true
   processor:
     _target_: anemoi.models.layers.processor.TransformerProcessor
+    activation: ${model.activation}
     num_layers: 16
     num_chunks: 2
     mlp_hidden_ratio: 4
     num_heads: 16
     window_size: 1120
+    dropout_p: 0.0
+    attention_implementation: flash_attention
+    qk_norm: false
+    softcap: 0.0
+    use_alibi_slopes: false
+    cpu_offload: ${model.cpu_offload}
   encoder:
     _target_: anemoi.models.layers.mapper.GraphTransformerForwardMapper
+    trainable_size: ${model.trainable_parameters.data2hidden}
+    sub_graph_edge_attributes: ${model.attributes.edges}
+    activation: ${model.activation}
     num_chunks: 1
     mlp_hidden_ratio: 4
     num_heads: 16
+    qk_norm: false
+    cpu_offload: ${model.cpu_offload}
   decoder:
     _target_: anemoi.models.layers.mapper.GraphTransformerBackwardMapper
+    trainable_size: ${model.trainable_parameters.hidden2data}
+    sub_graph_edge_attributes: ${model.attributes.edges}
+    activation: ${model.activation}
     num_chunks: 1
     mlp_hidden_ratio: 4
     num_heads: 16
+    initialise_data_extractor_zero: false
+    qk_norm: false
+    cpu_offload: ${model.cpu_offload}
   trainable_parameters:
     data: 8
     hidden: 8
     data2hidden: 8
     hidden2data: 8
   attributes:
+    edges:
+    - edge_length
+    - edge_dirs
     nodes: []
   bounding:
+  - _target_: anemoi.models.layers.bounding.ReluBounding
+    variables:
+    - tp
+    - ro
+    - tcw
+    - ssrd
+    - ro
+    - q_50
+    - q_100
+    - q_150
+    - q_200
+    - q_250
+    - q_300
+    - q_400
+    - q_500
+    - q_600
+    - q_700
+    - q_850
+    - q_925
+    - q_1000
+  - _target_: anemoi.models.layers.bounding.HardtanhBounding
+    variables:
+    - tcc
+    - swvl1
+    - swvl2
+    min_val: 0
+    max_val: 1
+  - _target_: anemoi.models.layers.bounding.FractionBounding
+    variables:
+    - cp
+    - sf
+    min_val: 0
+    max_val: 1
+    total_var: tp
+  - _target_: anemoi.models.layers.bounding.FractionBounding
+    variables:
+    - lcc
+    - mcc
+    - hcc
+    min_val: 0
+    max_val: 1
+    total_var: tcc
 training:
   run_id: null
   fork_run_id: null
+  transfer_learning: false
+  load_weights_only: false
+  deterministic: false
   precision: 16-mixed
   multistep_input: 2
   accum_grad_batches: 1
   num_sanity_val_steps: 6
   gradient_clip:
+    val: 32.0
     algorithm: value
   swa:
+    enabled: false
     lr: 0.0001
+  optimizer:
+    zero: false
+    kwargs:
+      betas:
+      - 0.9
+      - 0.95
+  model_task: anemoi.training.train.forecaster.GraphForecaster
+  strategy:
+    _target_: anemoi.training.distributed.strategy.DDPGroupStrategy
+    num_gpus_per_model: ${hardware.num_gpus_per_model}
+    read_group_size: ${dataloader.read_group_size}
+  loss_gradient_scaling: false
   training_loss:
     _target_: anemoi.training.losses.mse.WeightedMSELoss
+    scalars:
+    - variable
+    - loss_weights_mask
+    ignore_nans: false
   validation_metrics:
+  - _target_: anemoi.training.losses.mse.WeightedMSELoss
+    scalars: []
+    ignore_nans: true
+  scale_validation_metrics:
+    scalars_to_apply:
+    - variable
+    metrics:
+    - all
   rollout:
     start: 1
     epoch_increment: 0
   max_epochs: null
   max_steps: 260000
   lr:
+    warmup: 1000
+    rate: 3.125e-05
     iterations: 260000
+    min: 3.0e-07
   variable_loss_scaling:
     default: 1
     pl:
       2d: 0.5
       tp: 0.025
       cp: 0.0025
+      ro: 0.0025
       sf: 0.025
       tcc: 0.1
       mcc: 0.1
       lcc: 0.1
       hcc: 0.1
+      swvl2: 2
+      swvl1: 1
       stl2: 10
       stl1: 1
       ssrd: 0.05
       strd: 0.1
+  metrics:
+  - z_500
+  - t_850
+  - u_850
+  - v_850
   pressure_level_scaler:
     _target_: anemoi.training.data.scaling.ReluPressureLevelScaler
     minimum: 0.2
+    slope: 0.001
+  node_loss_weights:
+    _target_: anemoi.training.losses.nodeweights.GraphNodeAttribute
+    target_nodes: ${graph.data}
+    node_attribute: area_weight
+  submodules_to_freeze: []