|
{ |
|
"best_metric": 0.7275755258839849, |
|
"best_model_checkpoint": "./checkpoints/clip-2560-wikispan-all/checkpoint-240", |
|
"epoch": 0.5982799451576717, |
|
"global_step": 14400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.8e-06, |
|
"loss": 1.8723, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.6e-06, |
|
"loss": 1.0969, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.4e-06, |
|
"loss": 0.8982, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.2e-06, |
|
"loss": 0.802, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9e-06, |
|
"loss": 0.7304, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.08e-05, |
|
"loss": 0.6815, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.26e-05, |
|
"loss": 0.6435, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.44e-05, |
|
"loss": 0.6089, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.62e-05, |
|
"loss": 0.581, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.5637, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.98e-05, |
|
"loss": 0.538, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.16e-05, |
|
"loss": 0.5223, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.3400000000000003e-05, |
|
"loss": 0.5123, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.52e-05, |
|
"loss": 0.4984, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.4848, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.88e-05, |
|
"loss": 0.4763, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9945750452079567e-05, |
|
"loss": 0.468, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.978300180831826e-05, |
|
"loss": 0.4532, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9620253164556963e-05, |
|
"loss": 0.445, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.945750452079566e-05, |
|
"loss": 0.4382, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.929475587703436e-05, |
|
"loss": 0.4289, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9132007233273057e-05, |
|
"loss": 0.4216, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.8969258589511756e-05, |
|
"loss": 0.4134, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.8806509945750454e-05, |
|
"loss": 0.4084, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.864376130198915e-05, |
|
"loss": 0.3988, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8481012658227846e-05, |
|
"loss": 0.3938, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8318264014466548e-05, |
|
"loss": 0.3886, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8155515370705246e-05, |
|
"loss": 0.3845, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.7992766726943944e-05, |
|
"loss": 0.378, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.7830018083182642e-05, |
|
"loss": 0.375, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.766726943942134e-05, |
|
"loss": 0.3665, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.7504520795660035e-05, |
|
"loss": 0.3644, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.7341772151898733e-05, |
|
"loss": 0.36, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.717902350813743e-05, |
|
"loss": 0.3597, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.701627486437613e-05, |
|
"loss": 0.3549, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.685352622061483e-05, |
|
"loss": 0.3545, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.669077757685353e-05, |
|
"loss": 0.3496, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.6528028933092224e-05, |
|
"loss": 0.3442, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.6365280289330922e-05, |
|
"loss": 0.3424, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.620253164556962e-05, |
|
"loss": 0.3414, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.6039783001808318e-05, |
|
"loss": 0.3361, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.5877034358047016e-05, |
|
"loss": 0.3378, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.5714285714285714e-05, |
|
"loss": 0.3316, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.5551537070524416e-05, |
|
"loss": 0.3302, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.538878842676311e-05, |
|
"loss": 0.3245, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.522603978300181e-05, |
|
"loss": 0.3254, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.5063291139240507e-05, |
|
"loss": 0.3206, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.4900542495479205e-05, |
|
"loss": 0.3212, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.4737793851717903e-05, |
|
"loss": 0.3184, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.45750452079566e-05, |
|
"loss": 0.3141, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.44122965641953e-05, |
|
"loss": 0.3185, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.4249547920433994e-05, |
|
"loss": 0.3115, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.4086799276672696e-05, |
|
"loss": 0.3093, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.3924050632911394e-05, |
|
"loss": 0.3116, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.3761301989150092e-05, |
|
"loss": 0.3084, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.359855334538879e-05, |
|
"loss": 0.3071, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.3435804701627488e-05, |
|
"loss": 0.3047, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.3273056057866183e-05, |
|
"loss": 0.302, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.311030741410488e-05, |
|
"loss": 0.3027, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.294755877034358e-05, |
|
"loss": 0.3019, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.278481012658228e-05, |
|
"loss": 0.3, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.262206148282098e-05, |
|
"loss": 0.2961, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.2459312839059677e-05, |
|
"loss": 0.2967, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.2296564195298375e-05, |
|
"loss": 0.2944, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.213381555153707e-05, |
|
"loss": 0.2937, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.1971066907775768e-05, |
|
"loss": 0.2914, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.1808318264014466e-05, |
|
"loss": 0.2902, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.1645569620253164e-05, |
|
"loss": 0.2889, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.1482820976491862e-05, |
|
"loss": 0.2894, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.1320072332730564e-05, |
|
"loss": 0.286, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.1157323688969262e-05, |
|
"loss": 0.2847, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.0994575045207956e-05, |
|
"loss": 0.2838, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.0831826401446655e-05, |
|
"loss": 0.2822, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.0669077757685353e-05, |
|
"loss": 0.2814, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.050632911392405e-05, |
|
"loss": 0.2818, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.034358047016275e-05, |
|
"loss": 0.2779, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.0180831826401447e-05, |
|
"loss": 0.2794, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.0018083182640145e-05, |
|
"loss": 0.2789, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9855334538878843e-05, |
|
"loss": 0.2757, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.969258589511754e-05, |
|
"loss": 0.2758, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.952983725135624e-05, |
|
"loss": 0.277, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9367088607594938e-05, |
|
"loss": 0.2731, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9204339963833636e-05, |
|
"loss": 0.268, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9041591320072334e-05, |
|
"loss": 0.2702, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.887884267631103e-05, |
|
"loss": 0.2699, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8716094032549727e-05, |
|
"loss": 0.2707, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8553345388788428e-05, |
|
"loss": 0.2661, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8390596745027126e-05, |
|
"loss": 0.2668, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8227848101265824e-05, |
|
"loss": 0.2689, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8065099457504523e-05, |
|
"loss": 0.2695, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.790235081374322e-05, |
|
"loss": 0.2637, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.7739602169981915e-05, |
|
"loss": 0.2645, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.7576853526220614e-05, |
|
"loss": 0.2618, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.741410488245931e-05, |
|
"loss": 0.2626, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.7251356238698013e-05, |
|
"loss": 0.2581, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.708860759493671e-05, |
|
"loss": 0.2597, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.692585895117541e-05, |
|
"loss": 0.2594, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6763110307414104e-05, |
|
"loss": 0.2577, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6600361663652802e-05, |
|
"loss": 0.2585, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.64376130198915e-05, |
|
"loss": 0.2565, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.62748643761302e-05, |
|
"loss": 0.2562, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.6112115732368897e-05, |
|
"loss": 0.2559, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.5949367088607595e-05, |
|
"loss": 0.2551, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.5786618444846296e-05, |
|
"loss": 0.2528, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.562386980108499e-05, |
|
"loss": 0.2539, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.546112115732369e-05, |
|
"loss": 0.2525, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.5298372513562387e-05, |
|
"loss": 0.2534, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.5135623869801085e-05, |
|
"loss": 0.2518, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.4972875226039783e-05, |
|
"loss": 0.2506, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.4810126582278482e-05, |
|
"loss": 0.2517, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.464737793851718e-05, |
|
"loss": 0.2483, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.4484629294755878e-05, |
|
"loss": 0.2471, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.4321880650994574e-05, |
|
"loss": 0.2484, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.4159132007233274e-05, |
|
"loss": 0.2474, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.3996383363471972e-05, |
|
"loss": 0.25, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.383363471971067e-05, |
|
"loss": 0.2458, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.3670886075949367e-05, |
|
"loss": 0.2466, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.3508137432188065e-05, |
|
"loss": 0.2454, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.3345388788426765e-05, |
|
"loss": 0.2461, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.3182640144665461e-05, |
|
"loss": 0.2427, |
|
"step": 14400 |
|
} |
|
], |
|
"max_steps": 24120, |
|
"num_train_epochs": 2, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|