Instructions to use BioMedTok/BPE-HF-PubMed-FR with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use BioMedTok/BPE-HF-PubMed-FR with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("fill-mask", model="BioMedTok/BPE-HF-PubMed-FR")# Load model directly from transformers import AutoTokenizer, AutoModelForMaskedLM tokenizer = AutoTokenizer.from_pretrained("BioMedTok/BPE-HF-PubMed-FR") model = AutoModelForMaskedLM.from_pretrained("BioMedTok/BPE-HF-PubMed-FR") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 18.0, | |
| "global_step": 98676, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-09, | |
| "loss": 10.532, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.5e-06, | |
| "loss": 9.367, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 5e-06, | |
| "loss": 7.4277, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 7.5e-06, | |
| "loss": 6.1401, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1e-05, | |
| "loss": 5.8709, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.25e-05, | |
| "loss": 5.753, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.5e-05, | |
| "loss": 5.6777, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.75e-05, | |
| "loss": 5.6151, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2e-05, | |
| "loss": 5.5717, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 2.25e-05, | |
| "loss": 5.5305, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.5e-05, | |
| "loss": 5.4947, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 5.4688, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3e-05, | |
| "loss": 5.4406, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 5.4163, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.5e-05, | |
| "loss": 5.3942, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 5.3762, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 4e-05, | |
| "loss": 5.3524, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 4.2495e-05, | |
| "loss": 5.338, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 4.4995000000000005e-05, | |
| "loss": 5.3205, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 4.7495e-05, | |
| "loss": 5.3096, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 4.9995000000000005e-05, | |
| "loss": 5.2971, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 4.998859263331501e-05, | |
| "loss": 5.2822, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 4.9977139453912406e-05, | |
| "loss": 5.2716, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 4.99656862745098e-05, | |
| "loss": 5.2599, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 4.9954233095107204e-05, | |
| "loss": 5.2516, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 4.99427799157046e-05, | |
| "loss": 5.2391, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 4.9931326736301996e-05, | |
| "loss": 5.2352, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 4.99198964632582e-05, | |
| "loss": 5.2247, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 4.99084432838556e-05, | |
| "loss": 5.2168, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 4.9896990104453e-05, | |
| "loss": 5.2091, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.9885536925050395e-05, | |
| "loss": 5.2037, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 4.98741066520066e-05, | |
| "loss": 5.196, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 4.9862653472603996e-05, | |
| "loss": 5.1892, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 4.985120029320139e-05, | |
| "loss": 5.1825, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 4.98397700201576e-05, | |
| "loss": 5.1753, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 4.9828316840755e-05, | |
| "loss": 5.1722, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 4.9816863661352395e-05, | |
| "loss": 5.1653, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 4.980541048194979e-05, | |
| "loss": 5.1603, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 4.9793980208905996e-05, | |
| "loss": 5.1547, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 4.97825270295034e-05, | |
| "loss": 5.151, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 4.9771073850100794e-05, | |
| "loss": 5.1468, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 4.975962067069819e-05, | |
| "loss": 5.1434, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 4.974816749129559e-05, | |
| "loss": 5.1388, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 4.973671431189299e-05, | |
| "loss": 5.133, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 4.9725284038849186e-05, | |
| "loss": 5.1264, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 4.971383085944659e-05, | |
| "loss": 5.1248, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 4.9702377680043984e-05, | |
| "loss": 5.1185, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 4.969092450064138e-05, | |
| "loss": 5.1139, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 4.967947132123878e-05, | |
| "loss": 5.1114, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 4.966806395455378e-05, | |
| "loss": 5.1084, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 4.9656610775151186e-05, | |
| "loss": 5.1041, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 4.964515759574858e-05, | |
| "loss": 5.102, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 4.963370441634598e-05, | |
| "loss": 5.1012, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 4.962225123694338e-05, | |
| "loss": 5.0961, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 4.9610798057540775e-05, | |
| "loss": 5.0918, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 4.959934487813817e-05, | |
| "loss": 5.0823, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 4.958789169873557e-05, | |
| "loss": 4.7898, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 4.957643851933297e-05, | |
| "loss": 4.47, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 4.9564985339930364e-05, | |
| "loss": 4.1839, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 4.9553532160527766e-05, | |
| "loss": 3.9283, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 4.9542101887483965e-05, | |
| "loss": 3.3536, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 4.953067161444017e-05, | |
| "loss": 2.7553, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 4.9519218435037566e-05, | |
| "loss": 2.3501, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 4.950776525563497e-05, | |
| "loss": 1.9139, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 4.9496312076232364e-05, | |
| "loss": 1.6857, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 4.948485889682976e-05, | |
| "loss": 1.5418, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 4.947340571742716e-05, | |
| "loss": 1.4448, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "learning_rate": 4.946195253802456e-05, | |
| "loss": 1.3729, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 4.945049935862195e-05, | |
| "loss": 1.3178, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 4.9439046179219356e-05, | |
| "loss": 1.2557, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 4.942759299981675e-05, | |
| "loss": 1.1956, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 4.941613982041415e-05, | |
| "loss": 1.1306, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 4.940470954737035e-05, | |
| "loss": 1.0845, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 4.939325636796775e-05, | |
| "loss": 1.0483, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 4.9381803188565143e-05, | |
| "loss": 1.0169, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 4.9370350009162546e-05, | |
| "loss": 0.9886, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 4.9358919736118744e-05, | |
| "loss": 0.966, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 4.934746655671615e-05, | |
| "loss": 0.9429, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 4.933601337731354e-05, | |
| "loss": 0.9261, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 4.932456019791094e-05, | |
| "loss": 0.908, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 4.9313129924867143e-05, | |
| "loss": 0.8914, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 4.930167674546454e-05, | |
| "loss": 0.8789, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 4.929022356606194e-05, | |
| "loss": 0.8643, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 4.927877038665934e-05, | |
| "loss": 0.8531, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 4.926731720725673e-05, | |
| "loss": 0.8418, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "learning_rate": 4.925588693421294e-05, | |
| "loss": 0.83, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 4.924443375481034e-05, | |
| "loss": 0.8201, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 4.9232980575407736e-05, | |
| "loss": 0.8115, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 8.03, | |
| "learning_rate": 4.922152739600513e-05, | |
| "loss": 0.803, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 4.9210074216602534e-05, | |
| "loss": 0.7935, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "learning_rate": 4.919862103719993e-05, | |
| "loss": 0.7857, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 4.9187190764156135e-05, | |
| "loss": 0.7753, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 4.917573758475353e-05, | |
| "loss": 0.7683, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 4.916428440535093e-05, | |
| "loss": 0.761, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "learning_rate": 4.915283122594833e-05, | |
| "loss": 0.7549, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 4.914140095290453e-05, | |
| "loss": 0.7478, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 8.76, | |
| "learning_rate": 4.912994777350193e-05, | |
| "loss": 0.7404, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 4.9118494594099325e-05, | |
| "loss": 0.7335, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 4.910704141469672e-05, | |
| "loss": 0.7283, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "learning_rate": 4.9095611141652926e-05, | |
| "loss": 0.7231, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "learning_rate": 4.908415796225032e-05, | |
| "loss": 0.7156, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "learning_rate": 4.9072704782847724e-05, | |
| "loss": 0.7099, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "learning_rate": 4.906125160344512e-05, | |
| "loss": 0.7071, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 4.9049798424042515e-05, | |
| "loss": 0.702, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "learning_rate": 4.903836815099872e-05, | |
| "loss": 0.6982, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 4.9026914971596116e-05, | |
| "loss": 0.6933, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 4.901546179219352e-05, | |
| "loss": 0.6901, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 4.9004008612790914e-05, | |
| "loss": 0.6854, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "learning_rate": 4.899255543338831e-05, | |
| "loss": 0.6813, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "learning_rate": 4.8981125160344515e-05, | |
| "loss": 0.6762, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 10.03, | |
| "learning_rate": 4.896967198094191e-05, | |
| "loss": 0.6708, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 10.12, | |
| "learning_rate": 4.8958218801539307e-05, | |
| "loss": 0.6687, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 10.22, | |
| "learning_rate": 4.894676562213671e-05, | |
| "loss": 0.6641, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 10.31, | |
| "learning_rate": 4.8935312442734105e-05, | |
| "loss": 0.6594, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 10.4, | |
| "learning_rate": 4.892388216969031e-05, | |
| "loss": 0.6578, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 10.49, | |
| "learning_rate": 4.8912428990287706e-05, | |
| "loss": 0.6542, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 10.58, | |
| "learning_rate": 4.89009758108851e-05, | |
| "loss": 0.6509, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 10.67, | |
| "learning_rate": 4.8889522631482504e-05, | |
| "loss": 0.6471, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 10.76, | |
| "learning_rate": 4.88780694520799e-05, | |
| "loss": 0.6463, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 10.85, | |
| "learning_rate": 4.8866616272677295e-05, | |
| "loss": 0.6418, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 10.94, | |
| "learning_rate": 4.88551859996335e-05, | |
| "loss": 0.6391, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 11.04, | |
| "learning_rate": 4.8843732820230896e-05, | |
| "loss": 0.6354, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 11.13, | |
| "learning_rate": 4.88322796408283e-05, | |
| "loss": 0.6327, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 11.22, | |
| "learning_rate": 4.8820826461425694e-05, | |
| "loss": 0.6292, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 11.31, | |
| "learning_rate": 4.88094190947407e-05, | |
| "loss": 0.6258, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 11.4, | |
| "learning_rate": 4.879798882169691e-05, | |
| "loss": 0.6257, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 11.49, | |
| "learning_rate": 4.87865356422943e-05, | |
| "loss": 0.6221, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 11.58, | |
| "learning_rate": 4.8775082462891706e-05, | |
| "loss": 0.618, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 11.67, | |
| "learning_rate": 4.87636292834891e-05, | |
| "loss": 0.6156, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 11.77, | |
| "learning_rate": 4.87521761040865e-05, | |
| "loss": 0.614, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 11.86, | |
| "learning_rate": 4.87407229246839e-05, | |
| "loss": 0.612, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 11.95, | |
| "learning_rate": 4.8729269745281295e-05, | |
| "loss": 0.6096, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 12.04, | |
| "learning_rate": 4.871781656587869e-05, | |
| "loss": 0.6073, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 12.13, | |
| "learning_rate": 4.8706386292834896e-05, | |
| "loss": 0.6039, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 12.22, | |
| "learning_rate": 4.869493311343229e-05, | |
| "loss": 0.6033, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 12.31, | |
| "learning_rate": 4.8683479934029694e-05, | |
| "loss": 0.6005, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 12.4, | |
| "learning_rate": 4.867202675462709e-05, | |
| "loss": 0.5971, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "learning_rate": 4.866059648158329e-05, | |
| "loss": 0.5933, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 12.59, | |
| "learning_rate": 4.864914330218069e-05, | |
| "loss": 0.5947, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 12.68, | |
| "learning_rate": 4.8637690122778086e-05, | |
| "loss": 0.5928, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 12.77, | |
| "learning_rate": 4.862623694337549e-05, | |
| "loss": 0.5897, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 12.86, | |
| "learning_rate": 4.8614783763972884e-05, | |
| "loss": 0.588, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 12.95, | |
| "learning_rate": 4.860333058457028e-05, | |
| "loss": 0.5862, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "learning_rate": 4.859187740516768e-05, | |
| "loss": 0.5835, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 13.13, | |
| "learning_rate": 4.858042422576508e-05, | |
| "loss": 0.5827, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 13.23, | |
| "learning_rate": 4.8569016859080086e-05, | |
| "loss": 0.581, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 13.32, | |
| "learning_rate": 4.855756367967748e-05, | |
| "loss": 0.5791, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 13.41, | |
| "learning_rate": 4.854611050027488e-05, | |
| "loss": 0.5764, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 13.5, | |
| "learning_rate": 4.853465732087228e-05, | |
| "loss": 0.5749, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 13.59, | |
| "learning_rate": 4.8523204141469675e-05, | |
| "loss": 0.5747, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 13.68, | |
| "learning_rate": 4.8511773868425874e-05, | |
| "loss": 0.5717, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 13.77, | |
| "learning_rate": 4.8500320689023276e-05, | |
| "loss": 0.5716, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 13.86, | |
| "learning_rate": 4.848886750962067e-05, | |
| "loss": 0.5691, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 13.95, | |
| "learning_rate": 4.847741433021807e-05, | |
| "loss": 0.5661, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 14.05, | |
| "learning_rate": 4.846596115081547e-05, | |
| "loss": 0.565, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 14.14, | |
| "learning_rate": 4.845453087777167e-05, | |
| "loss": 0.5627, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 14.23, | |
| "learning_rate": 4.844307769836907e-05, | |
| "loss": 0.563, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 14.32, | |
| "learning_rate": 4.8431624518966466e-05, | |
| "loss": 0.561, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 14.41, | |
| "learning_rate": 4.842017133956386e-05, | |
| "loss": 0.5599, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 14.5, | |
| "learning_rate": 4.8408718160161264e-05, | |
| "loss": 0.5573, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 14.59, | |
| "learning_rate": 4.839726498075866e-05, | |
| "loss": 0.5558, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 14.68, | |
| "learning_rate": 4.8385834707714865e-05, | |
| "loss": 0.5559, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 14.78, | |
| "learning_rate": 4.837438152831226e-05, | |
| "loss": 0.5546, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 14.87, | |
| "learning_rate": 4.8362928348909656e-05, | |
| "loss": 0.5531, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 14.96, | |
| "learning_rate": 4.835147516950706e-05, | |
| "loss": 0.5493, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 15.05, | |
| "learning_rate": 4.8340021990104454e-05, | |
| "loss": 0.5483, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 15.14, | |
| "learning_rate": 4.832859171706065e-05, | |
| "loss": 0.548, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 15.23, | |
| "learning_rate": 4.8317138537658055e-05, | |
| "loss": 0.5462, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 15.32, | |
| "learning_rate": 4.830568535825545e-05, | |
| "loss": 0.5454, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 15.41, | |
| "learning_rate": 4.829423217885285e-05, | |
| "loss": 0.5428, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 15.51, | |
| "learning_rate": 4.828277899945025e-05, | |
| "loss": 0.5428, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 15.6, | |
| "learning_rate": 4.8271325820047645e-05, | |
| "loss": 0.5424, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 15.69, | |
| "learning_rate": 4.825987264064504e-05, | |
| "loss": 0.5421, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 15.78, | |
| "learning_rate": 4.824841946124244e-05, | |
| "loss": 0.5382, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 15.87, | |
| "learning_rate": 4.823698918819865e-05, | |
| "loss": 0.5373, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 15.96, | |
| "learning_rate": 4.8225536008796044e-05, | |
| "loss": 0.5359, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 16.05, | |
| "learning_rate": 4.821408282939344e-05, | |
| "loss": 0.5357, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 16.14, | |
| "learning_rate": 4.8202629649990835e-05, | |
| "loss": 0.5351, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 16.23, | |
| "learning_rate": 4.819117647058824e-05, | |
| "loss": 0.5317, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 16.33, | |
| "learning_rate": 4.817972329118563e-05, | |
| "loss": 0.5333, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 16.42, | |
| "learning_rate": 4.816829301814184e-05, | |
| "loss": 0.5309, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 16.51, | |
| "learning_rate": 4.815683983873924e-05, | |
| "loss": 0.5308, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 16.6, | |
| "learning_rate": 4.8145386659336636e-05, | |
| "loss": 0.5289, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 16.69, | |
| "learning_rate": 4.813393347993403e-05, | |
| "loss": 0.5275, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 16.78, | |
| "learning_rate": 4.812248030053143e-05, | |
| "loss": 0.5269, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 16.87, | |
| "learning_rate": 4.811102712112883e-05, | |
| "loss": 0.5248, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 16.96, | |
| "learning_rate": 4.8099573941726225e-05, | |
| "loss": 0.5242, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 17.06, | |
| "learning_rate": 4.808814366868243e-05, | |
| "loss": 0.5238, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 17.15, | |
| "learning_rate": 4.8076690489279826e-05, | |
| "loss": 0.5239, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 17.24, | |
| "learning_rate": 4.806523730987723e-05, | |
| "loss": 0.5221, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 17.33, | |
| "learning_rate": 4.8053784130474624e-05, | |
| "loss": 0.52, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 17.42, | |
| "learning_rate": 4.804233095107202e-05, | |
| "loss": 0.5184, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 17.51, | |
| "learning_rate": 4.8030900678028225e-05, | |
| "loss": 0.5186, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 17.6, | |
| "learning_rate": 4.801944749862562e-05, | |
| "loss": 0.5176, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 17.69, | |
| "learning_rate": 4.8008017225581826e-05, | |
| "loss": 0.5182, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 17.79, | |
| "learning_rate": 4.799656404617922e-05, | |
| "loss": 0.5148, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 17.88, | |
| "learning_rate": 4.798511086677662e-05, | |
| "loss": 0.5157, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 17.97, | |
| "learning_rate": 4.797365768737402e-05, | |
| "loss": 0.5131, | |
| "step": 98500 | |
| } | |
| ], | |
| "max_steps": 2192800, | |
| "num_train_epochs": 400, | |
| "total_flos": 2.659426878192668e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |