{ "run_info": { "created_at": "2025-06-19T23:12:19+00:00", "total_time": 2209.243281380004, "experiment_name": "adalora/llama-3.2-3B-rank32", "peft_branch": "main", "train_config": { "model_id": "meta-llama/Llama-3.2-3B", "dtype": "bfloat16", "max_seq_length": 768, "batch_size": 4, "batch_size_eval": 50, "max_steps": 5000, "eval_steps": 250, "compile": false, "query_template": "Question: {query} Think step by step.\nAnswer:", "seed": 0, "grad_norm_clip": 1.0, "optimizer_type": "AdamW", "optimizer_kwargs": { "lr": 0.0001, "weight_decay": 0.1 }, "lr_scheduler": "cosine", "use_amp": false, "autocast_adapter_dtype": true, "generation_kwargs": { "max_length": 800, "max_new_tokens": 300 }, "attn_implementation": null }, "peft_config": { "task_type": null, "peft_type": "ADALORA", "auto_mapping": null, "base_model_name_or_path": "meta-llama/Llama-3.2-3B", "revision": null, "inference_mode": false, "r": 8, "target_modules": [ "q_proj", "v_proj" ], "exclude_modules": null, "lora_alpha": 8, "lora_dropout": 0.0, "fan_in_fan_out": false, "bias": "none", "use_rslora": false, "modules_to_save": null, "init_lora_weights": true, "layers_to_transform": null, "layers_pattern": null, "rank_pattern": { "model.layers.0.self_attn.q_proj.lora_E": [ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false ], "model.layers.0.self_attn.v_proj.lora_E": [ true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, false, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true ], "model.layers.1.self_attn.q_proj.lora_E": [ false, false, true, true, false, true, true, false, false, false, false, true, false, false, true, true, true, true, false, false, false, false, false, false, true, false, true, true, false, false, true, true, true, false, true, true, false, false, true, true, true, false, false, false, true, false, true, true, true, true, false, true, true, true, false, false, true, true, false, false, true, true, false, false ], "model.layers.1.self_attn.v_proj.lora_E": [ true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true ], "model.layers.2.self_attn.q_proj.lora_E": [ true, false, true, false, false, false, true, true, true, true, false, true, true, true, false, false, true, false, false, true, false, false, false, false, true, true, false, false, false, false, true, false, false, false, false, false, false, true, true, true, false, false, false, true, true, false, false, false, true, false, true, true, false, true, false, false, false, true, true, false, true, true, false, false ], "model.layers.2.self_attn.v_proj.lora_E": [ true, false, false, false, true, true, true, true, false, true, true, true, false, true, false, true, false, true, false, true, false, true, true, true, true, true, false, true, false, false, false, false, true, false, false, false, false, false, true, true, false, false, true, true, false, true, true, true, true, false, false, true, false, true, false, false, false, true, true, false, false, false, true, true ], "model.layers.3.self_attn.q_proj.lora_E": [ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false ], "model.layers.3.self_attn.v_proj.lora_E": [ false, false, false, false, false, true, false, true, false, false, false, false, true, false, false, false, false, true, true, true, true, false, true, false, false, false, false, false, false, false, false, false, true, false, false, true, false, false, true, false, true, false, true, true, false, true, false, false, true, false, false, false, false, false, true, false, true, false, false, false, false, true, true, true ], "model.layers.4.self_attn.q_proj.lora_E": [ false, false, false, false, false, true, false, false, true, true, false, true, false, false, false, false, false, false, false, true, false, false, true, false, true, false, false, false, false, false, true, false, false, false, false, false, false, false, true, true, false, false, true, false, false, false, true, true, false, false, false, false, false, false, false, false, false, false, false, false, true, true, false, false ], "model.layers.4.self_attn.v_proj.lora_E": [ true, false, true, true, false, false, true, false, false, false, true, false, true, true, false, true, false, true, true, false, true, true, false, false, true, true, true, true, false, false, false, false, false, false, true, false, true, false, false, true, true, true, true, true, false, false, false, false, false, true, false, true, true, true, true, true, false, true, true, false, true, true, true, true ], "model.layers.5.self_attn.q_proj.lora_E": [ false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false ], "model.layers.5.self_attn.v_proj.lora_E": [ true, true, true, true, true, true, true, true, false, true, false, false, true, false, false, true, false, true, false, false, false, false, true, true, false, false, false, false, true, false, true, false, true, true, false, false, true, true, true, true, false, false, true, false, true, false, false, true, true, true, false, true, false, false, false, true, true, true, true, false, false, false, true, true ], "model.layers.6.self_attn.q_proj.lora_E": [ false, false, true, true, false, false, true, true, false, false, false, true, false, true, false, true, false, false, false, false, true, true, true, true, false, true, false, true, false, true, false, false, false, true, true, false, false, false, false, true, true, true, false, false, true, false, false, false, false, true, true, false, false, false, true, false, false, false, false, false, false, false, false, false ], "model.layers.6.self_attn.v_proj.lora_E": [ false, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, false, true, true, true, false, false, false, false, true, true, false, false, false, false, true, true, false, true, true, true, false, true, true, true, false, true, true, true, true, false, false, false, true, true, false, false, true, false, true, false, true, true, false, true, false, true, false, true ], "model.layers.7.self_attn.q_proj.lora_E": [ false, false, false, false, false, true, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false ], "model.layers.7.self_attn.v_proj.lora_E": [ false, false, true, true, false, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, false, false, true, true, true, true, true, false, true, false, false, true, true, true, true, false, false, false, true, false, false, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, false, false, false, true, true, true, true ], "model.layers.8.self_attn.q_proj.lora_E": [ false, true, false, false, false, false, false, true, false, false, false, false, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, true, false, false, true, false, false, false, false, true, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, true ], "model.layers.8.self_attn.v_proj.lora_E": [ false, true, false, false, false, true, false, false, false, false, true, true, true, true, true, false, false, true, true, true, false, true, true, true, true, true, false, true, true, false, false, true, true, false, false, true, false, true, false, true, true, false, true, false, true, true, true, false, true, false, false, true, true, true, false, true, true, true, true, false, false, false, false, true ], "model.layers.9.self_attn.q_proj.lora_E": [ true, false, true, true, false, false, true, true, false, false, true, false, false, false, false, true, false, true, false, true, false, false, false, true, false, true, false, true, false, true, false, true, true, false, false, true, true, false, false, false, false, true, true, true, false, false, false, false, true, true, true, false, false, false, false, false, true, false, true, false, false, true, false, true ], "model.layers.9.self_attn.v_proj.lora_E": [ true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, false, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, true, false, true, true, false, true, true, true, true, true, true, false, false, true, false, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, false ], "model.layers.10.self_attn.q_proj.lora_E": [ false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false ], "model.layers.10.self_attn.v_proj.lora_E": [ true, true, false, false, false, true, true, false, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, true, true, true, true, false, false, false, false, true, false, false, false, true, true, true, false, true, true, false, true, false, false, true, true, false, false, true, true, true, true, true, true, true, false, true ], "model.layers.11.self_attn.q_proj.lora_E": [ true, false, false, false, false, true, false, false, false, true, true, false, true, false, false, false, false, false, false, false, false, true, false, false, true, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, true, true, true, false, true, false, false, false, true, true, false, false, false, false, true, true, false, true, false, true, true, false, false, false ], "model.layers.11.self_attn.v_proj.lora_E": [ false, true, true, true, false, true, true, false, true, false, true, true, true, true, true, true, true, false, false, true, false, true, true, true, true, false, false, true, true, true, true, false, true, true, true, true, true, true, true, true, true, false, false, false, false, false, true, false, false, true, true, false, false, true, true, true, true, true, false, true, true, true, false, false ], "model.layers.12.self_attn.q_proj.lora_E": [ false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, true, false, false, false, false, false, true, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, true, true, false, true, false, false, false, false, true, false, true, false, false, true, false, true, false, false, true, false, false, false, false, false, false, false, false ], "model.layers.12.self_attn.v_proj.lora_E": [ true, true, true, true, false, true, true, false, true, true, true, true, true, true, true, true, true, false, false, false, false, true, true, false, false, true, true, true, true, true, true, true, true, true, true, false, false, true, false, true, true, true, true, true, false, false, true, false, true, true, true, true, false, true, false, true, false, true, false, true, false, true, true, false ], "model.layers.13.self_attn.q_proj.lora_E": [ true, true, false, true, true, true, false, false, true, true, false, true, false, true, false, true, false, false, true, true, false, true, false, true, true, true, true, false, false, true, true, false, false, true, false, true, false, true, true, true, false, false, false, false, true, true, true, true, false, true, false, true, true, true, false, true, false, true, true, false, false, false, true, false ], "model.layers.13.self_attn.v_proj.lora_E": [ true, false, true, true, true, false, true, true, true, false, true, true, true, false, true, false, true, false, true, true, true, true, true, true, true, true, true, false, true, true, false, false, true, true, false, false, true, false, false, true, false, false, true, true, true, true, true, true, true, false, true, false, false, true, true, true, false, true, true, false, true, true, true, true ], "model.layers.14.self_attn.q_proj.lora_E": [ false, true, false, true, true, false, false, false, true, false, false, true, false, false, true, true, false, true, true, true, false, false, false, true, false, true, false, true, false, false, true, true, true, true, true, false, false, true, true, false, true, true, false, false, true, false, false, false, true, false, true, true, true, false, true, true, true, false, false, true, false, true, true, false ], "model.layers.14.self_attn.v_proj.lora_E": [ true, true, true, false, false, false, true, false, false, false, false, true, true, false, false, true, false, true, true, true, false, true, false, false, true, false, true, false, true, true, false, true, false, true, false, false, true, false, false, true, false, true, true, false, true, false, true, false, true, true, true, true, true, true, false, false, true, true, false, true, true, true, true, false ], "model.layers.15.self_attn.q_proj.lora_E": [ false, true, true, true, true, true, false, true, false, true, false, true, false, true, true, true, true, true, true, false, true, true, false, true, false, true, false, true, true, true, false, true, false, false, false, true, true, true, true, false, true, true, false, true, false, true, false, false, true, true, false, true, false, true, false, true, true, true, true, true, false, true, true, true ], "model.layers.15.self_attn.v_proj.lora_E": [ true, true, true, true, false, true, true, true, true, false, true, true, false, true, true, false, false, true, false, true, true, true, true, true, false, true, true, true, true, true, false, false, false, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, false, true, true, true, true, false, false, false, true, true, true, true, true, true, true ], "model.layers.16.self_attn.q_proj.lora_E": [ false, false, false, false, false, false, false, true, true, true, true, false, true, false, true, true, true, false, true, false, true, true, true, false, false, false, true, false, false, false, true, false, true, true, true, true, false, true, true, false, true, true, false, true, true, true, true, false, true, true, false, true, true, false, true, false, false, true, true, true, false, false, true, true ], "model.layers.16.self_attn.v_proj.lora_E": [ true, false, true, false, true, false, true, false, false, true, false, true, true, true, true, true, true, false, true, false, true, true, true, false, true, true, false, false, false, true, true, true, false, true, false, false, true, false, false, false, true, false, true, false, true, true, true, false, true, true, false, false, true, true, true, false, true, true, false, true, false, true, false, false ], "model.layers.17.self_attn.q_proj.lora_E": [ true, true, true, true, false, true, false, true, false, false, true, true, true, false, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, true, true, false, true, true, true, true, false, true, true, false, true, true, true, false, true, false, true, true, true, true, false, true, true, true, true, true, true, true, true ], "model.layers.17.self_attn.v_proj.lora_E": [ false, true, true, true, true, true, true, true, true, true, false, true, true, false, false, false, true, true, false, true, true, false, false, true, true, false, false, false, false, true, true, true, false, true, false, true, false, true, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, false, false, true, true, true, true ], "model.layers.18.self_attn.q_proj.lora_E": [ false, true, false, true, false, true, false, true, true, true, false, true, true, true, false, true, true, false, true, false, false, false, true, true, false, true, true, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, false, false, true, true, true, true, true, false, true, false, false, false, false, true, false, false, true, false, true, false, true, true ], "model.layers.18.self_attn.v_proj.lora_E": [ true, true, true, true, true, true, true, true, false, true, true, false, false, false, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, false, true, true, true, true, true, true, true, true, false, true, true, false, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, false, false, true ], "model.layers.19.self_attn.q_proj.lora_E": [ false, true, false, true, false, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, true, true, true, false, false, true, true, true, true, false, true, false, true, false, true, false, false, false, true, true, true, true, true, false, false, false, false, true, true, true, true, true, true, false, false, true, true, false, true, true ], "model.layers.19.self_attn.v_proj.lora_E": [ false, true, false, true, true, true, false, true, false, false, false, false, true, true, true, false, true, false, false, false, false, true, false, true, true, false, true, true, false, false, true, true, true, true, false, false, false, false, false, true, false, false, true, false, false, false, true, true, false, false, false, true, true, true, true, false, true, true, false, true, false, true, true, true ], "model.layers.20.self_attn.q_proj.lora_E": [ false, true, false, false, false, false, true, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, true, false, true, true, false, false, false, false, true, false, true, false, true, false, false, false, false, true, true, false, false, true, true, false, false, false, false, false, true, false, false ], "model.layers.20.self_attn.v_proj.lora_E": [ true, false, true, true, false, false, false, true, true, false, false, true, true, true, false, true, false, true, false, false, false, false, true, false, false, false, true, false, true, false, true, true, true, false, true, false, true, false, true, false, true, true, true, true, false, false, false, false, false, false, false, false, true, false, false, false, true, false, false, true, false, false, true, true ], "model.layers.21.self_attn.q_proj.lora_E": [ false, false, true, false, true, true, true, true, true, false, true, true, true, true, true, true, false, false, false, false, true, true, false, true, true, true, true, false, false, false, false, false, false, false, true, true, true, false, true, false, true, false, true, false, false, false, true, false, true, true, true, true, true, false, false, true, true, false, true, true, false, false, true, true ], "model.layers.21.self_attn.v_proj.lora_E": [ true, true, true, true, true, false, true, true, false, true, true, false, false, true, true, false, true, true, false, true, true, true, true, false, false, false, false, false, true, true, true, false, true, true, false, true, true, false, true, true, false, false, false, true, true, false, false, false, true, true, false, true, true, false, false, false, false, true, false, false, false, false, false, false ], "model.layers.22.self_attn.q_proj.lora_E": [ false, true, false, true, true, false, true, false, false, true, false, false, false, false, false, false, true, false, true, true, false, false, false, false, true, true, true, false, false, true, false, false, false, false, false, true, false, false, false, false, true, false, false, false, true, false, true, false, false, false, true, false, true, true, true, false, false, true, false, false, true, true, false, true ], "model.layers.22.self_attn.v_proj.lora_E": [ false, true, true, true, true, false, false, true, true, true, true, false, true, true, false, true, true, true, true, true, true, true, true, true, false, false, true, true, false, true, false, false, true, false, false, false, false, true, false, false, false, true, true, false, false, false, true, true, false, false, true, true, true, true, true, true, true, true, false, false, true, false, true, true ], "model.layers.23.self_attn.q_proj.lora_E": [ true, false, true, false, true, true, true, true, true, false, true, true, true, true, true, true, true, true, false, false, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, false, true, false, true, false, true, true, true, true, true, true, true, false, true, false, true, true, true, false, true, true ], "model.layers.23.self_attn.v_proj.lora_E": [ false, true, false, true, false, false, true, false, true, false, true, true, true, true, true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, false, false, true, false, true, true, false, true, false, true, true, true, true, true, true, true, true, true, true, true, true, false, true, true, true, false, false, false, true, true, false, true, false, true, true ], "model.layers.24.self_attn.q_proj.lora_E": [ true, true, true, true, true, true, false, true, true, true, true, false, true, true, true, false, true, true, true, true, true, true, false, false, true, true, true, true, false, true, true, true, true, true, true, true, true, true, false, true, true, true, false, true, true, true, true, false, true, false, true, true, true, true, false, false, false, true, true, true, true, false, false, true ], "model.layers.24.self_attn.v_proj.lora_E": [ true, true, true, false, true, false, false, true, true, true, false, true, true, false, false, true, false, false, false, false, true, true, true, false, true, false, false, true, false, true, false, true, true, false, true, true, false, false, false, true, false, false, true, true, false, true, true, false, false, true, true, true, true, false, false, true, true, true, false, true, false, true, true, true ], "model.layers.25.self_attn.q_proj.lora_E": [ false, false, false, false, true, true, false, true, true, false, false, false, false, false, false, false, true, false, false, true, false, false, true, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, true, false, false, false, true, true, true, false, false, false, false, false, false, false ], "model.layers.25.self_attn.v_proj.lora_E": [ false, false, false, true, false, false, false, true, true, false, false, true, false, true, true, true, false, false, false, false, true, false, false, false, true, true, true, true, false, false, false, false, true, false, false, false, false, true, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false ], "model.layers.26.self_attn.q_proj.lora_E": [ true, false, false, true, false, false, false, false, false, false, true, false, true, false, true, true, true, false, false, true, true, true, false, false, true, true, false, false, true, false, true, true, false, false, false, true, true, false, false, false, true, false, false, false, true, true, false, false, true, false, true, true, false, true, false, false, true, true, true, false, true, true, true, true ], "model.layers.26.self_attn.v_proj.lora_E": [ false, false, true, false, true, false, false, false, true, false, false, false, false, false, true, false, false, false, false, false, false, false, true, false, false, true, false, true, false, true, true, false, false, false, false, false, false, false, false, false, true, false, false, false, false, false, false, false, true, true, false, false, false, true, false, true, false, true, false, false, false, true, false, false ], "model.layers.27.self_attn.q_proj.lora_E": [ true, false, false, true, true, false, false, true, true, false, false, false, true, true, false, true, false, false, true, false, false, true, true, true, true, false, false, true, true, false, false, false, false, true, true, true, false, true, false, false, false, true, false, true, true, true, false, false, false, true, true, true, true, true, false, false, false, false, true, false, false, false, true, false ], "model.layers.27.self_attn.v_proj.lora_E": [ false, false, true, true, true, true, true, true, true, false, false, false, true, false, false, false, true, true, false, false, false, true, false, true, true, true, true, true, false, true, true, false, true, false, true, true, false, true, true, false, false, true, false, true, true, false, false, true, false, true, true, true, false, false, true, false, false, true, true, true, true, true, false, true ] }, "alpha_pattern": {}, "megatron_config": null, "megatron_core": "megatron.core", "trainable_token_indices": null, "loftq_config": {}, "eva_config": null, "corda_config": null, "use_dora": false, "layer_replication": null, "lora_bias": false, "target_r": 32, "init_r": 64, "tinit": 200, "tfinal": 500, "deltaT": 1, "beta1": 0.85, "beta2": 0.85, "orth_reg_weight": 0.5, "total_step": 5000 }, "error_msg": "" }, "train_info": { "accelerator_memory_reserved_avg": 12361399900, "accelerator_memory_max": 22793945088, "accelerator_memory_reserved_99th": 18203426160, "train_time": 1986.3603882369862, "file_size": 35147440, "num_trainable_params": 18353664, "num_total_params": 3231103544, "status": "success", "metrics": [ { "step": 250, "valid accuracy": 0.0, "train loss": 1.3241184422969818, "train samples": 1000, "train time": 35.95594502204767, "eval time": 11.413120707002236, "tokens / sec": 5888.289123542072, "mem allocated avg": 7292959393.792, "mem reserved avg": 12441731727.36, "elapsed time": 100.98083375500573 }, { "step": 500, "valid accuracy": 0.38, "train loss": 1.0195633232593537, "train samples": 2000, "train time": 37.64258231502754, "eval time": 11.37802824100072, "tokens / sec": 5525.524212428035, "mem allocated avg": 7285510731.776, "mem reserved avg": 12328493907.968, "elapsed time": 197.93603045200143 }, { "step": 750, "valid accuracy": 0.28, "train loss": 0.7883218789100647, "train samples": 3000, "train time": 37.909325722001086, "eval time": 11.385932488003164, "tokens / sec": 5655.626838954038, "mem allocated avg": 7296095842.304, "mem reserved avg": 12484438130.688, "elapsed time": 295.9188707240028 }, { "step": 1000, "valid accuracy": 0.3, "train loss": 0.7408825470209122, "train samples": 4000, "train time": 37.79932949803333, "eval time": 11.34964040399791, "tokens / sec": 5511.6321576772825, "mem allocated avg": 7286506670.08, "mem reserved avg": 12351948455.936, "elapsed time": 393.33776786700037 }, { "step": 1250, "valid accuracy": 0.36, "train loss": 0.7282904219627381, "train samples": 5000, "train time": 37.475317073069164, "eval time": 11.342822429993248, "tokens / sec": 5564.676066473135, "mem allocated avg": 7287005519.872, "mem reserved avg": 12349910024.192, "elapsed time": 490.5430299360014 }, { "step": 1500, "valid accuracy": 0.38, "train loss": 0.7161256531476975, "train samples": 6000, "train time": 37.660518338059774, "eval time": 11.34013032400253, "tokens / sec": 5558.367469107556, "mem allocated avg": 7287642494.976, "mem reserved avg": 12380570386.432, "elapsed time": 588.017992052999 }, { "step": 1750, "valid accuracy": 0.34, "train loss": 0.7056601424217224, "train samples": 7000, "train time": 37.636171496975294, "eval time": 11.3171367870018, "tokens / sec": 5562.600861695649, "mem allocated avg": 7289782888.448, "mem reserved avg": 12389051269.12, "elapsed time": 685.2421731229988 }, { "step": 2000, "valid accuracy": 0.34, "train loss": 0.7058932571411133, "train samples": 8000, "train time": 37.505602380944765, "eval time": 11.37751964799827, "tokens / sec": 5537.732680318789, "mem allocated avg": 7287054886.912, "mem reserved avg": 12336119152.64, "elapsed time": 782.1823508529997 }, { "step": 2250, "valid accuracy": 0.3, "train loss": 0.700018577337265, "train samples": 9000, "train time": 38.06487834800646, "eval time": 11.33160761000181, "tokens / sec": 5646.885247730137, "mem allocated avg": 7297638139.904, "mem reserved avg": 12521129902.08, "elapsed time": 880.444039299 }, { "step": 2500, "valid accuracy": 0.34, "train loss": 0.6984639673233032, "train samples": 10000, "train time": 37.400825600088865, "eval time": 7.680036880999978, "tokens / sec": 5507.017470745635, "mem allocated avg": 7283608303.616, "mem reserved avg": 12278598467.584, "elapsed time": 973.4031999860017 }, { "step": 2750, "valid accuracy": 0.32, "train loss": 0.691307947397232, "train samples": 11000, "train time": 37.97861938195274, "eval time": 11.376824188999308, "tokens / sec": 5578.954776346737, "mem allocated avg": 7293332232.192, "mem reserved avg": 12452821467.136, "elapsed time": 1071.2981272770048 }, { "step": 3000, "valid accuracy": 0.3, "train loss": 0.6851879090070725, "train samples": 12000, "train time": 37.862704559986014, "eval time": 11.377599911000289, "tokens / sec": 5512.839149387935, "mem allocated avg": 7288929478.656, "mem reserved avg": 12371468746.752, "elapsed time": 1168.7257358770003 }, { "step": 3250, "valid accuracy": 0.34, "train loss": 0.6939580011367797, "train samples": 13000, "train time": 37.79518606400961, "eval time": 7.2029460159974406, "tokens / sec": 5580.102176050141, "mem allocated avg": 7290687285.248, "mem reserved avg": 12403068633.088, "elapsed time": 1261.9857917680056 }, { "step": 3500, "valid accuracy": 0.4, "train loss": 0.6825792235136032, "train samples": 14000, "train time": 37.73422463506722, "eval time": 11.28984081800445, "tokens / sec": 5558.614282617983, "mem allocated avg": 7289277476.864, "mem reserved avg": 12381820289.024, "elapsed time": 1359.695578400002 }, { "step": 3750, "valid accuracy": 0.34, "train loss": 0.6795008780956269, "train samples": 15000, "train time": 38.156728624038806, "eval time": 11.362600938999094, "tokens / sec": 5679.286663570962, "mem allocated avg": 7299185600.512, "mem reserved avg": 12562561236.992, "elapsed time": 1458.6053942910003 }, { "step": 4000, "valid accuracy": 0.32, "train loss": 0.6967895623445511, "train samples": 16000, "train time": 37.352128309052205, "eval time": 11.363241717001074, "tokens / sec": 5471.522219805362, "mem allocated avg": 7281535514.624, "mem reserved avg": 12256066666.496, "elapsed time": 1555.2909630150025 }, { "step": 4250, "valid accuracy": 0.34, "train loss": 0.6776066061258316, "train samples": 17000, "train time": 37.65609644694632, "eval time": 11.334564828997827, "tokens / sec": 5613.672683726684, "mem allocated avg": 7291894349.824, "mem reserved avg": 12418562392.064, "elapsed time": 1652.928281804001 }, { "step": 4500, "valid accuracy": 0.34, "train loss": 0.6868188911676407, "train samples": 18000, "train time": 37.48494880297949, "eval time": 11.33762150000257, "tokens / sec": 5544.038517760537, "mem allocated avg": 7285549684.736, "mem reserved avg": 12333837451.264, "elapsed time": 1749.9311109990012 }, { "step": 4750, "valid accuracy": 0.34, "train loss": 0.6806062284708023, "train samples": 19000, "train time": 33.62080936400889, "eval time": 11.34113016500487, "tokens / sec": 6244.31725384755, "mem allocated avg": 7068488509.44, "mem reserved avg": 12120833916.928, "elapsed time": 1843.633759463999 }, { "step": 5000, "valid accuracy": 0.28, "train loss": 0.6862971596717834, "train samples": 20000, "train time": 33.47089828590106, "eval time": 11.363945298006001, "tokens / sec": 6222.7191580255185, "mem allocated avg": 7065409925.12, "mem reserved avg": 12064965787.648, "elapsed time": 1937.0431615920024 }, { "step": 5000, "test accuracy": 0.3904473085670963, "train loss": 0.6862971596717834, "train samples": 20000, "train total tokens": 4198051 } ] }, "meta_info": { "model_info": { "sha": "13afe5124825b4f3751f836b40dafda64c1ed062", "created_at": "2024-09-18T15:23:48+00:00" }, "dataset_info": { "metamath": { "sha": "aa4f34d3d2d3231299b5b03d9b3e5a20da45aa18", "created_at": "2023-09-21T17:22:46+00:00" }, "gsm8k": { "sha": "e53f048856ff4f594e959d75785d2c2d37b678ee", "created_at": "2022-04-12T10:22:10+00:00" } }, "package_info": { "transformers-version": "4.52.4", "transformers-commit-hash": null, "peft-version": "0.15.2.dev0", "peft-commit-hash": "5fe7f8f8abe914d313fc3751f2ea92de7718fbaf", "datasets-version": "3.6.0", "datasets-commit-hash": null, "bitsandbytes-version": "0.46.0", "bitsandbytes-commit-hash": null, "torch-version": "2.7.1+cu126", "torch-commit-hash": null }, "system_info": { "system": "Linux", "release": "6.8.0-1029-aws", "version": "#31-Ubuntu SMP Wed Apr 23 18:42:41 UTC 2025", "machine": "x86_64", "processor": "x86_64", "accelerator": "NVIDIA L40S" }, "pytorch_info": "PyTorch built with:\n - GCC 11.2\n - C++ Version: 201703\n - Intel(R) oneAPI Math Kernel Library Version 2024.2-Product Build 20240605 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v3.7.1 (Git Hash 8d263e693366ef8db40acc569cc7d8edf644556d)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 12.6\n - NVCC architecture flags: -gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90\n - CuDNN 90.7.1 (built against CUDA 12.8)\n - Built with CuDNN 90.5.1\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, COMMIT_SHA=e2d141dbde55c2a4370fac5165b0561b6af4798b, CUDA_VERSION=12.6, CUDNN_VERSION=9.5.1, CXX_COMPILER=/opt/rh/gcc-toolset-11/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=1 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DLIBKINETO_NOXPUPTI=ON -DUSE_FBGEMM -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=range-loop-construct -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-unknown-pragmas -Wno-unused-parameter -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wsuggest-override -Wno-psabi -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, TORCH_VERSION=2.7.1, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=1, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF, \n" } }