| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.99985909539242, | |
| "global_step": 8870, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.71815107102593e-05, | |
| "loss": 0.2722, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 4.43630214205186e-05, | |
| "loss": 0.1261, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_acc": 0.98, | |
| "eval_loss": 0.09329384565353394, | |
| "eval_num": 56776, | |
| "eval_runtime": 5408.6044, | |
| "eval_samples_per_second": 10.497, | |
| "eval_steps_per_second": 2.624, | |
| "eval_true_num": 55574, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 4.1544532130777905e-05, | |
| "loss": 0.1078, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3.872604284103721e-05, | |
| "loss": 0.0951, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "eval_acc": 0.98, | |
| "eval_loss": 0.06545884907245636, | |
| "eval_num": 56776, | |
| "eval_runtime": 5398.8946, | |
| "eval_samples_per_second": 10.516, | |
| "eval_steps_per_second": 2.629, | |
| "eval_true_num": 55867, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.590755355129651e-05, | |
| "loss": 0.0832, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 3.308906426155581e-05, | |
| "loss": 0.0774, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "eval_acc": 0.99, | |
| "eval_loss": 0.047995537519454956, | |
| "eval_num": 56776, | |
| "eval_runtime": 5407.5843, | |
| "eval_samples_per_second": 10.499, | |
| "eval_steps_per_second": 2.625, | |
| "eval_true_num": 56047, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 3.0270574971815107e-05, | |
| "loss": 0.0689, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 2.745208568207441e-05, | |
| "loss": 0.0584, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "eval_acc": 0.99, | |
| "eval_loss": 0.03338366374373436, | |
| "eval_num": 56776, | |
| "eval_runtime": 5517.5145, | |
| "eval_samples_per_second": 10.29, | |
| "eval_steps_per_second": 2.573, | |
| "eval_true_num": 56252, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 2.463359639233371e-05, | |
| "loss": 0.0537, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 2.181510710259301e-05, | |
| "loss": 0.042, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "eval_acc": 0.99, | |
| "eval_loss": 0.022190678864717484, | |
| "eval_num": 56776, | |
| "eval_runtime": 5470.5084, | |
| "eval_samples_per_second": 10.379, | |
| "eval_steps_per_second": 2.595, | |
| "eval_true_num": 56411, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 1.8996617812852312e-05, | |
| "loss": 0.0431, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 1.6178128523111614e-05, | |
| "loss": 0.0329, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "eval_acc": 1.0, | |
| "eval_loss": 0.013875513337552547, | |
| "eval_num": 56776, | |
| "eval_runtime": 5431.4557, | |
| "eval_samples_per_second": 10.453, | |
| "eval_steps_per_second": 2.613, | |
| "eval_true_num": 56502, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 1.3359639233370913e-05, | |
| "loss": 0.0305, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 7.89, | |
| "learning_rate": 1.0541149943630215e-05, | |
| "loss": 0.0254, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 7.89, | |
| "eval_acc": 1.0, | |
| "eval_loss": 0.009383471682667732, | |
| "eval_num": 56776, | |
| "eval_runtime": 5439.4304, | |
| "eval_samples_per_second": 10.438, | |
| "eval_steps_per_second": 2.609, | |
| "eval_true_num": 56626, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "learning_rate": 7.722660653889515e-06, | |
| "loss": 0.0225, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 4.904171364148816e-06, | |
| "loss": 0.0214, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "eval_acc": 1.0, | |
| "eval_loss": 0.007043090648949146, | |
| "eval_num": 56776, | |
| "eval_runtime": 5398.7111, | |
| "eval_samples_per_second": 10.517, | |
| "eval_steps_per_second": 2.629, | |
| "eval_true_num": 56659, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 2.0856820744081176e-06, | |
| "loss": 0.017, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 8870, | |
| "total_flos": 7.53120395931648e+17, | |
| "train_loss": 0.06710476681774875, | |
| "train_runtime": 130006.8001, | |
| "train_samples_per_second": 4.367, | |
| "train_steps_per_second": 0.068 | |
| } | |
| ], | |
| "max_steps": 8870, | |
| "num_train_epochs": 10, | |
| "total_flos": 7.53120395931648e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |