{ | |
"step": 50000, | |
"loss": 5.999711477773722, | |
"config": { | |
"base_model_name": "meta-llama/Llama-3.1-8B-Instruct", | |
"masking_type": "bidirectional", | |
"batch_size": 16, | |
"learning_rate": 0.0003, | |
"weight_decay": 0.01, | |
"grad_clip": 1.0, | |
"max_steps": 50000, | |
"max_length": 128, | |
"target_tokens": 100000000, | |
"save_interval": 5000, | |
"log_interval": 50 | |
}, | |
"model_type": "CustomTransformerModel", | |
"attention_type": "bidirectional", | |
"training_tokens": 102400000, | |
"compatible_with_inference": true, | |
"accelerator": "unsloth_kernel_optimized", | |
"training_mode": "pure_diffusion", | |
"save_method": "unsloth_optimized", | |
"timestamp": "2025-06-11T06:22:08.448859" | |
} |