data:
  format: zarr
  resolution: n320
  frequency: 6h
  timestep: 6h
  forcing:
  - cos_latitude
  - cos_longitude
  - sin_latitude
  - sin_longitude
  - cos_julian_day
  - cos_local_time
  - sin_julian_day
  - sin_local_time
  - insolation
  - lsm
  - sdor
  - slor
  - z
  diagnostic:
  - tp
  - cp
  - sf
  - tcc
  - hcc
  - lcc
  - mcc
  - ro
  - ssrd
  - strd
  - 100u
  - 100v
  remapped: null
  normalizer:
    default: mean-std
    remap:
      cp: tp
      sf: tp
    std:
    - tp
    - cp
    - sf
    - ro
    - tcw
    - ssrd
    - q_50
    - q_100
    - q_150
    - q_200
    - q_250
    - q_300
    - q_400
    - q_500
    - q_600
    - q_700
    - q_850
    - q_925
    - q_1000
    min-max: null
    max:
    - sdor
    - slor
    - z
    none:
    - cos_latitude
    - cos_longitude
    - sin_latitude
    - sin_longitude
    - cos_julian_day
    - cos_local_time
    - sin_julian_day
    - sin_local_time
    - insolation
    - lsm
    - tcc
    - mcc
    - hcc
    - lcc
    - swvl1
    - swvl2
  imputer:
    default: none
    minimum:
    - swvl1
    - swvl2
    - ro
    mean:
    - stl1
    - stl2
  remapper:
    default: none
  processors:
    imputer:
      _target_: anemoi.models.preprocessing.imputer.InputImputer
      _convert_: all
      config: ${data.imputer}
    normalizer:
      _target_: anemoi.models.preprocessing.normalizer.InputNormalizer
      config: ${data.normalizer}
  num_features: null
dataloader:
  prefetch_factor: 2
  pin_memory: true
  read_group_size: ${hardware.num_gpus_per_model}
  num_workers:
    training: 8
    validation: 8
    test: 1
    predict: 1
  batch_size:
    training: 1
    validation: 1
    test: 4
    predict: 4
  limit_batches:
    training: null
    validation: null
    test: 20
    predict: 20
  grid_indices:
    _target_: anemoi.training.data.grid_indices.FullGrid
    nodes_name: ${graph.data}
  dataset: ${hardware.paths.data}/${hardware.files.dataset}
  training:
    dataset:
    - dataset: ${hardware.paths.data}/${hardware.files.dataset}
      start: null
      end: 2022
      frequency: ${data.frequency}
    start: null
    end: 2022
    drop: []
  validation:
    dataset:
    - dataset: ${hardware.paths.data}/${hardware.files.dataset}
      start: 2022
      end: 2024
      frequency: ${data.frequency}
    start: 2022
    end: 2024
    drop: []
  test:
    dataset:
    - dataset: ${hardware.paths.data}/${hardware.files.dataset}
      start: 2022
      end: null
      frequency: ${data.frequency}
    start: 2022
    end: null
    drop: []
diagnostics:
  plot:
    asynchronous: true
    datashader: true
    frequency:
      batch: 750
      epoch: 5
    parameters:
    - z_500
    - t_850
    - u_850
    - v_850
    - 2t
    - 10u
    - 10v
    - sp
    - tp
    - cp
    sample_idx: 0
    precip_and_related_fields:
    - tp
    - cp
    colormaps:
      default:
        _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormap
        name: viridis
      error:
        _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormap
        name: bwr
      precip:
        _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormapClevels
        clevels:
        - '#ffffff'
        - '#04e9e7'
        - '#019ff4'
        - '#0300f4'
        - '#02fd02'
        - '#01c501'
        - '#008e00'
        - '#fdf802'
        - '#e5bc00'
        - '#fd9500'
        - '#fd0000'
        - '#d40000'
        - '#bc0000'
        - '#f800fd'
        variables: ${diagnostics.plot.precip_and_related_fields}
    callbacks: []
  callbacks: []
  benchmark_profiler:
    memory:
      enabled: true
      steps: 5
      warmup: 2
      extra_plots: false
      trace_rank0_only: false
    time:
      enabled: true
      verbose: false
    speed:
      enabled: true
    system:
      enabled: true
    model_summary:
      enabled: true
    snapshot:
      enabled: true
      steps: 4
      warmup: 0
  debug:
    anomaly_detection: false
  profiler: false
  enable_checkpointing: true
  checkpoint:
    every_n_minutes:
      save_frequency: 30
      num_models_saved: 3
    every_n_epochs:
      save_frequency: 1
      num_models_saved: -1
    every_n_train_steps:
      save_frequency: null
      num_models_saved: 0
  log:
    wandb:
      enabled: false
      offline: false
      log_model: false
      project: Anemoi
      entity: ???
      gradients: false
      parameters: false
    tensorboard:
      enabled: false
    mlflow:
      enabled: false
      offline: false
      authentication: false
      log_model: false
      tracking_uri: ???
      experiment_name: ???
      project_name: ???
      system: true
      terminal: true
      run_name: null
      on_resume_create_child: true
      expand_hyperparams:
      - config
      http_max_retries: 35
    interval: 100
  enable_progress_bar: true
  print_memory_summary: false
hardware:
  paths:
    data: ${oc.decode:${oc.env:DATASETS_PATH}}
    output: ${oc.decode:${oc.env:OUTPUT_PATH}}
    logs:
      base: ${hardware.paths.output}logs/
      wandb: ${hardware.paths.logs.base}
      mlflow: ${hardware.paths.logs.base}mlflow/
      tensorboard: ${hardware.paths.logs.base}tensorboard/
    checkpoints: ${hardware.paths.output}checkpoint/
    plots: ${hardware.paths.output}plots/
    profiler: ${hardware.paths.output}profiler/
    graph: ${hardware.paths.output}graphs/
  files:
    dataset: aifs-ea-an-oper-0001-mars-${data.resolution}-1979-2024-6h-v1-aifs-single-v1.zarr
    graph: graph_enc_proc_dec_${data.resolution}.pt
    truncation: null
    truncation_inv: null
    checkpoint:
      every_n_epochs: aifs-by_epoch-epoch_{epoch:03d}-val_wmse_{val_wmse:.3e}
      every_n_train_steps: aifs-by_step-epoch_{epoch:03d}-step_{step:06d}
      every_n_minutes: aifs-by_time-epoch_{epoch:03d}-step_{step:06d}
    warm_start: null
  accelerator: auto
  num_gpus_per_node: 4
  num_nodes: 16
  num_gpus_per_model: 4
graph:
  overwrite: true
  data: data
  hidden: hidden
  nodes:
    data:
      node_builder:
        _target_: anemoi.graphs.nodes.ZarrDatasetNodes
        dataset: ${dataloader.dataset}
      attributes: ${graph.attributes.nodes}
    hidden:
      node_builder:
        _target_: anemoi.graphs.nodes.ReducedGaussianGridNodes
        grid: o96
  edges:
  - source_name: ${graph.data}
    target_name: ${graph.hidden}
    edge_builders:
    - _target_: anemoi.graphs.edges.CutOffEdges
      cutoff_factor: 0.6
      source_mask_attr_name: null
      target_mask_attr_name: null
    attributes: ${graph.attributes.edges}
  - source_name: ${graph.hidden}
    target_name: ${graph.data}
    edge_builders:
    - _target_: anemoi.graphs.edges.KNNEdges
      num_nearest_neighbours: 3
      source_mask_attr_name: null
      target_mask_attr_name: null
    attributes: ${graph.attributes.edges}
  attributes:
    nodes:
      area_weight:
        _target_: anemoi.graphs.nodes.attributes.SphericalAreaWeights
        norm: unit-max
        fill_value: 0
    edges:
      edge_length:
        _target_: anemoi.graphs.edges.attributes.EdgeLength
        norm: unit-std
      edge_dirs:
        _target_: anemoi.graphs.edges.attributes.EdgeDirection
        norm: unit-std
  post_processors: []
model:
  activation: GELU
  num_channels: 1024
  cpu_offload: false
  output_mask: null
  model:
    _target_: anemoi.models.models.encoder_processor_decoder.AnemoiModelEncProcDec
  layer_kernels:
    processor:
      LayerNorm:
        _target_: torch.nn.LayerNorm
        _partial_: true
      Linear:
        _target_: torch.nn.Linear
        _partial_: true
      QueryNorm:
        _target_: anemoi.models.layers.normalization.AutocastLayerNorm
        _partial_: true
        bias: false
      KeyNorm:
        _target_: anemoi.models.layers.normalization.AutocastLayerNorm
        _partial_: true
        bias: false
    encoder:
      LayerNorm:
        _target_: torch.nn.LayerNorm
        _partial_: true
      Linear:
        _target_: torch.nn.Linear
        _partial_: true
    decoder:
      LayerNorm:
        _target_: torch.nn.LayerNorm
        _partial_: true
      Linear:
        _target_: torch.nn.Linear
        _partial_: true
  processor:
    _target_: anemoi.models.layers.processor.TransformerProcessor
    activation: ${model.activation}
    num_layers: 16
    num_chunks: 2
    mlp_hidden_ratio: 4
    num_heads: 16
    window_size: 1120
    dropout_p: 0.0
    attention_implementation: flash_attention
    qk_norm: false
    softcap: 0.0
    use_alibi_slopes: false
    cpu_offload: ${model.cpu_offload}
  encoder:
    _target_: anemoi.models.layers.mapper.GraphTransformerForwardMapper
    trainable_size: ${model.trainable_parameters.data2hidden}
    sub_graph_edge_attributes: ${model.attributes.edges}
    activation: ${model.activation}
    num_chunks: 1
    mlp_hidden_ratio: 4
    num_heads: 16
    qk_norm: false
    cpu_offload: ${model.cpu_offload}
  decoder:
    _target_: anemoi.models.layers.mapper.GraphTransformerBackwardMapper
    trainable_size: ${model.trainable_parameters.hidden2data}
    sub_graph_edge_attributes: ${model.attributes.edges}
    activation: ${model.activation}
    num_chunks: 1
    mlp_hidden_ratio: 4
    num_heads: 16
    initialise_data_extractor_zero: false
    qk_norm: false
    cpu_offload: ${model.cpu_offload}
  trainable_parameters:
    data: 8
    hidden: 8
    data2hidden: 8
    hidden2data: 8
  attributes:
    edges:
    - edge_length
    - edge_dirs
    nodes: []
  bounding:
  - _target_: anemoi.models.layers.bounding.ReluBounding
    variables:
    - tp
    - ro
    - tcw
    - ssrd
    - ro
    - q_50
    - q_100
    - q_150
    - q_200
    - q_250
    - q_300
    - q_400
    - q_500
    - q_600
    - q_700
    - q_850
    - q_925
    - q_1000
  - _target_: anemoi.models.layers.bounding.HardtanhBounding
    variables:
    - tcc
    - swvl1
    - swvl2
    min_val: 0
    max_val: 1
  - _target_: anemoi.models.layers.bounding.FractionBounding
    variables:
    - cp
    - sf
    min_val: 0
    max_val: 1
    total_var: tp
  - _target_: anemoi.models.layers.bounding.FractionBounding
    variables:
    - lcc
    - mcc
    - hcc
    min_val: 0
    max_val: 1
    total_var: tcc
training:
  run_id: null
  fork_run_id: null
  transfer_learning: false
  load_weights_only: false
  deterministic: false
  precision: 16-mixed
  multistep_input: 2
  accum_grad_batches: 1
  num_sanity_val_steps: 6
  gradient_clip:
    val: 32.0
    algorithm: value
  swa:
    enabled: false
    lr: 0.0001
  optimizer:
    zero: false
    kwargs:
      betas:
      - 0.9
      - 0.95
  model_task: anemoi.training.train.forecaster.GraphForecaster
  strategy:
    _target_: anemoi.training.distributed.strategy.DDPGroupStrategy
    num_gpus_per_model: ${hardware.num_gpus_per_model}
    read_group_size: ${dataloader.read_group_size}
  loss_gradient_scaling: false
  training_loss:
    _target_: anemoi.training.losses.mse.WeightedMSELoss
    scalars:
    - variable
    - loss_weights_mask
    ignore_nans: false
  validation_metrics:
  - _target_: anemoi.training.losses.mse.WeightedMSELoss
    scalars: []
    ignore_nans: true
  scale_validation_metrics:
    scalars_to_apply:
    - variable
    metrics:
    - all
  rollout:
    start: 1
    epoch_increment: 0
    max: 1
  max_epochs: null
  max_steps: 260000
  lr:
    warmup: 1000
    rate: 3.125e-05
    iterations: 260000
    min: 3.0e-07
  variable_loss_scaling:
    default: 1
    pl:
      q: 0.6
      t: 6
      u: 0.8
      v: 0.5
      w: 0.001
      z: 12
    sfc:
      sp: 10
      10u: 0.5
      10v: 0.5
      100u: 0.1
      100v: 0.1
      2d: 0.5
      tp: 0.025
      cp: 0.0025
      ro: 0.0025
      sf: 0.025
      tcc: 0.1
      mcc: 0.1
      lcc: 0.1
      hcc: 0.1
      swvl2: 2
      swvl1: 1
      stl2: 10
      stl1: 1
      ssrd: 0.05
      strd: 0.1
  metrics:
  - z_500
  - t_850
  - u_850
  - v_850
  pressure_level_scaler:
    _target_: anemoi.training.data.scaling.ReluPressureLevelScaler
    minimum: 0.2
    slope: 0.001
  node_loss_weights:
    _target_: anemoi.training.losses.nodeweights.GraphNodeAttribute
    target_nodes: ${graph.data}
    node_attribute: area_weight
  submodules_to_freeze: []