{ "model_id": "meta-llama/Llama-3.2-3B", "dtype": "bfloat16", "max_seq_length": 768, "batch_size": 4, "batch_size_eval": 50, "max_steps": 5000, "eval_steps": 250, "compile": false, "seed": 0, "grad_norm_clip": 1.0, "optimizer_type": "AdamW", "optimizer_kwargs": { "lr": 1e-4, "weight_decay": 0.1 }, "lr_scheduler": "cosine", "use_amp": false, "autocast_adapter_dtype": true, "attn_implementation": null, "generation_kwargs": { "max_length": 800, "max_new_tokens": 300 }, "query_template": "Question: {query} Think step by step.\nAnswer:" }