| { | |
| "best_metric": 0.9067, | |
| "best_model_checkpoint": "test-cifar100-vit-b-16/checkpoint-4692", | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 5474, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.8173182316404826e-05, | |
| "loss": 4.0836, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8138, | |
| "eval_loss": 2.878326416015625, | |
| "eval_runtime": 79.7364, | |
| "eval_samples_per_second": 125.413, | |
| "eval_steps_per_second": 1.969, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.6346364632809646e-05, | |
| "loss": 3.1298, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.4519546949214468e-05, | |
| "loss": 2.5016, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8592, | |
| "eval_loss": 1.9474856853485107, | |
| "eval_runtime": 51.8641, | |
| "eval_samples_per_second": 192.811, | |
| "eval_steps_per_second": 3.027, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 1.2692729265619292e-05, | |
| "loss": 2.0681, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8838, | |
| "eval_loss": 1.40227210521698, | |
| "eval_runtime": 72.9429, | |
| "eval_samples_per_second": 137.093, | |
| "eval_steps_per_second": 2.152, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 1.0865911582024116e-05, | |
| "loss": 1.7523, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 9.039093898428938e-06, | |
| "loss": 1.5165, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8986, | |
| "eval_loss": 1.0694773197174072, | |
| "eval_runtime": 47.8979, | |
| "eval_samples_per_second": 208.777, | |
| "eval_steps_per_second": 3.278, | |
| "step": 3128 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 7.21227621483376e-06, | |
| "loss": 1.3514, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.903, | |
| "eval_loss": 0.8822135925292969, | |
| "eval_runtime": 61.8514, | |
| "eval_samples_per_second": 161.678, | |
| "eval_steps_per_second": 2.538, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 5.385458531238583e-06, | |
| "loss": 1.2353, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 3.5586408476434055e-06, | |
| "loss": 1.1393, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9067, | |
| "eval_loss": 0.7875523567199707, | |
| "eval_runtime": 63.4983, | |
| "eval_samples_per_second": 157.484, | |
| "eval_steps_per_second": 2.473, | |
| "step": 4692 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 1.731823164048228e-06, | |
| "loss": 1.0874, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9066, | |
| "eval_loss": 0.7552067637443542, | |
| "eval_runtime": 67.2383, | |
| "eval_samples_per_second": 148.725, | |
| "eval_steps_per_second": 2.335, | |
| "step": 5474 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 5474, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 500, | |
| "total_flos": 2.71460189564928e+19, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |