|
{ |
|
"best_metric": 0.5911664962768555, |
|
"best_model_checkpoint": "./output/clip-finetuned-csu-p14-336-e3l37-l/checkpoint-19500", |
|
"epoch": 2.1952043228638973, |
|
"eval_steps": 500, |
|
"global_step": 19500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.056287290329843524, |
|
"grad_norm": 96.92656707763672, |
|
"learning_rate": 2.9437127096701565e-07, |
|
"loss": 0.3748, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.056287290329843524, |
|
"eval_loss": 1.2226382493972778, |
|
"eval_runtime": 131.2779, |
|
"eval_samples_per_second": 15.037, |
|
"eval_steps_per_second": 1.882, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11257458065968705, |
|
"grad_norm": 402.31158447265625, |
|
"learning_rate": 2.887425419340313e-07, |
|
"loss": 0.3057, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11257458065968705, |
|
"eval_loss": 1.0702259540557861, |
|
"eval_runtime": 131.0109, |
|
"eval_samples_per_second": 15.067, |
|
"eval_steps_per_second": 1.885, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16886187098953057, |
|
"grad_norm": 515.0911865234375, |
|
"learning_rate": 2.831138129010469e-07, |
|
"loss": 0.2239, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16886187098953057, |
|
"eval_loss": 0.9957238435745239, |
|
"eval_runtime": 134.8377, |
|
"eval_samples_per_second": 14.64, |
|
"eval_steps_per_second": 1.832, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2251491613193741, |
|
"grad_norm": 3.1967105865478516, |
|
"learning_rate": 2.774850838680626e-07, |
|
"loss": 0.2229, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2251491613193741, |
|
"eval_loss": 0.9504629373550415, |
|
"eval_runtime": 129.0169, |
|
"eval_samples_per_second": 15.3, |
|
"eval_steps_per_second": 1.914, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2814364516492176, |
|
"grad_norm": 276.22998046875, |
|
"learning_rate": 2.718563548350782e-07, |
|
"loss": 0.2098, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2814364516492176, |
|
"eval_loss": 0.9006705284118652, |
|
"eval_runtime": 129.1816, |
|
"eval_samples_per_second": 15.281, |
|
"eval_steps_per_second": 1.912, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.33772374197906113, |
|
"grad_norm": 1105.317138671875, |
|
"learning_rate": 2.6622762580209386e-07, |
|
"loss": 0.1938, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.33772374197906113, |
|
"eval_loss": 0.8782906532287598, |
|
"eval_runtime": 128.6387, |
|
"eval_samples_per_second": 15.345, |
|
"eval_steps_per_second": 1.92, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.39401103230890466, |
|
"grad_norm": 0.00043465400813147426, |
|
"learning_rate": 2.605988967691095e-07, |
|
"loss": 0.1688, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.39401103230890466, |
|
"eval_loss": 0.8405746221542358, |
|
"eval_runtime": 128.6989, |
|
"eval_samples_per_second": 15.338, |
|
"eval_steps_per_second": 1.919, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.4502983226387482, |
|
"grad_norm": 0.027426382526755333, |
|
"learning_rate": 2.549701677361252e-07, |
|
"loss": 0.1457, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4502983226387482, |
|
"eval_loss": 0.813768208026886, |
|
"eval_runtime": 128.7209, |
|
"eval_samples_per_second": 15.336, |
|
"eval_steps_per_second": 1.919, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5065856129685917, |
|
"grad_norm": 0.0012935090344399214, |
|
"learning_rate": 2.4934143870314085e-07, |
|
"loss": 0.179, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.5065856129685917, |
|
"eval_loss": 0.7965527176856995, |
|
"eval_runtime": 128.3969, |
|
"eval_samples_per_second": 15.374, |
|
"eval_steps_per_second": 1.924, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.5628729032984352, |
|
"grad_norm": 0.0017458726651966572, |
|
"learning_rate": 2.4371270967015646e-07, |
|
"loss": 0.1224, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5628729032984352, |
|
"eval_loss": 0.7788484692573547, |
|
"eval_runtime": 128.4939, |
|
"eval_samples_per_second": 15.363, |
|
"eval_steps_per_second": 1.922, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6191601936282788, |
|
"grad_norm": 65.76844787597656, |
|
"learning_rate": 2.380839806371721e-07, |
|
"loss": 0.1551, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.6191601936282788, |
|
"eval_loss": 0.7626588344573975, |
|
"eval_runtime": 128.7302, |
|
"eval_samples_per_second": 15.334, |
|
"eval_steps_per_second": 1.919, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.6754474839581223, |
|
"grad_norm": 5.014094829559326, |
|
"learning_rate": 2.3245525160418776e-07, |
|
"loss": 0.1721, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6754474839581223, |
|
"eval_loss": 0.7464810609817505, |
|
"eval_runtime": 128.749, |
|
"eval_samples_per_second": 15.332, |
|
"eval_steps_per_second": 1.918, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7317347742879657, |
|
"grad_norm": 0.0022696161177009344, |
|
"learning_rate": 2.268265225712034e-07, |
|
"loss": 0.1532, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.7317347742879657, |
|
"eval_loss": 0.7335842251777649, |
|
"eval_runtime": 128.8165, |
|
"eval_samples_per_second": 15.324, |
|
"eval_steps_per_second": 1.917, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.7880220646178093, |
|
"grad_norm": 5.508715730684344e-06, |
|
"learning_rate": 2.2119779353821906e-07, |
|
"loss": 0.1991, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.7880220646178093, |
|
"eval_loss": 0.7244272232055664, |
|
"eval_runtime": 128.9211, |
|
"eval_samples_per_second": 15.312, |
|
"eval_steps_per_second": 1.916, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.8443093549476528, |
|
"grad_norm": 0.30320531129837036, |
|
"learning_rate": 2.155690645052347e-07, |
|
"loss": 0.1551, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.8443093549476528, |
|
"eval_loss": 0.718368411064148, |
|
"eval_runtime": 128.8149, |
|
"eval_samples_per_second": 15.324, |
|
"eval_steps_per_second": 1.917, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.9005966452774964, |
|
"grad_norm": 1.9788849385804497e-05, |
|
"learning_rate": 2.0994033547225037e-07, |
|
"loss": 0.1439, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.9005966452774964, |
|
"eval_loss": 0.7040167450904846, |
|
"eval_runtime": 128.4921, |
|
"eval_samples_per_second": 15.363, |
|
"eval_steps_per_second": 1.922, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.9568839356073399, |
|
"grad_norm": 1.0750063665909693e-06, |
|
"learning_rate": 2.04311606439266e-07, |
|
"loss": 0.1361, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.9568839356073399, |
|
"eval_loss": 0.6983720660209656, |
|
"eval_runtime": 128.5929, |
|
"eval_samples_per_second": 15.351, |
|
"eval_steps_per_second": 1.921, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.0131712259371835, |
|
"grad_norm": 187.1415557861328, |
|
"learning_rate": 1.9868287740628167e-07, |
|
"loss": 0.1144, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.0131712259371835, |
|
"eval_loss": 0.6901102066040039, |
|
"eval_runtime": 128.5322, |
|
"eval_samples_per_second": 15.358, |
|
"eval_steps_per_second": 1.922, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.069458516267027, |
|
"grad_norm": 0.007205183617770672, |
|
"learning_rate": 1.9305414837329728e-07, |
|
"loss": 0.0643, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.069458516267027, |
|
"eval_loss": 0.6831753253936768, |
|
"eval_runtime": 128.59, |
|
"eval_samples_per_second": 15.351, |
|
"eval_steps_per_second": 1.921, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.1257458065968704, |
|
"grad_norm": 0.4961595833301544, |
|
"learning_rate": 1.8742541934031294e-07, |
|
"loss": 0.0773, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.1257458065968704, |
|
"eval_loss": 0.6732445955276489, |
|
"eval_runtime": 128.732, |
|
"eval_samples_per_second": 15.334, |
|
"eval_steps_per_second": 1.919, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.1820330969267139, |
|
"grad_norm": 0.0009373470675200224, |
|
"learning_rate": 1.817966903073286e-07, |
|
"loss": 0.0814, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.1820330969267139, |
|
"eval_loss": 0.6723716259002686, |
|
"eval_runtime": 126.1133, |
|
"eval_samples_per_second": 15.653, |
|
"eval_steps_per_second": 1.959, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.2383203872565574, |
|
"grad_norm": 2.7500348096509697e-06, |
|
"learning_rate": 1.7616796127434424e-07, |
|
"loss": 0.0924, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.2383203872565574, |
|
"eval_loss": 0.6679245233535767, |
|
"eval_runtime": 126.0452, |
|
"eval_samples_per_second": 15.661, |
|
"eval_steps_per_second": 1.96, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.294607677586401, |
|
"grad_norm": 12.024052619934082, |
|
"learning_rate": 1.705392322413599e-07, |
|
"loss": 0.091, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.294607677586401, |
|
"eval_loss": 0.6659140586853027, |
|
"eval_runtime": 129.0611, |
|
"eval_samples_per_second": 15.295, |
|
"eval_steps_per_second": 1.914, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.3508949679162445, |
|
"grad_norm": 0.0008855258929543197, |
|
"learning_rate": 1.6491050320837554e-07, |
|
"loss": 0.0978, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.3508949679162445, |
|
"eval_loss": 0.6577230095863342, |
|
"eval_runtime": 126.2858, |
|
"eval_samples_per_second": 15.631, |
|
"eval_steps_per_second": 1.956, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.407182258246088, |
|
"grad_norm": 0.0003772446943912655, |
|
"learning_rate": 1.592817741753912e-07, |
|
"loss": 0.0598, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.407182258246088, |
|
"eval_loss": 0.6527238488197327, |
|
"eval_runtime": 125.8972, |
|
"eval_samples_per_second": 15.679, |
|
"eval_steps_per_second": 1.962, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.4634695485759315, |
|
"grad_norm": 0.00032130314502865076, |
|
"learning_rate": 1.5365304514240682e-07, |
|
"loss": 0.0713, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.4634695485759315, |
|
"eval_loss": 0.6472681760787964, |
|
"eval_runtime": 128.5052, |
|
"eval_samples_per_second": 15.361, |
|
"eval_steps_per_second": 1.922, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.5197568389057752, |
|
"grad_norm": 0.0006748048472218215, |
|
"learning_rate": 1.4802431610942248e-07, |
|
"loss": 0.0759, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.5197568389057752, |
|
"eval_loss": 0.637607991695404, |
|
"eval_runtime": 125.8757, |
|
"eval_samples_per_second": 15.682, |
|
"eval_steps_per_second": 1.962, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.5760441292356187, |
|
"grad_norm": 4.276073184428242e-08, |
|
"learning_rate": 1.4239558707643812e-07, |
|
"loss": 0.093, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.5760441292356187, |
|
"eval_loss": 0.6378567218780518, |
|
"eval_runtime": 126.214, |
|
"eval_samples_per_second": 15.64, |
|
"eval_steps_per_second": 1.957, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.6323314195654621, |
|
"grad_norm": 0.0019163701217621565, |
|
"learning_rate": 1.3676685804345378e-07, |
|
"loss": 0.061, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.6323314195654621, |
|
"eval_loss": 0.6364826560020447, |
|
"eval_runtime": 128.5083, |
|
"eval_samples_per_second": 15.361, |
|
"eval_steps_per_second": 1.922, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.6886187098953056, |
|
"grad_norm": 10.94436264038086, |
|
"learning_rate": 1.3113812901046944e-07, |
|
"loss": 0.077, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.6886187098953056, |
|
"eval_loss": 0.6351856589317322, |
|
"eval_runtime": 126.3016, |
|
"eval_samples_per_second": 15.629, |
|
"eval_steps_per_second": 1.956, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.744906000225149, |
|
"grad_norm": 245.41305541992188, |
|
"learning_rate": 1.2550939997748508e-07, |
|
"loss": 0.0798, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.744906000225149, |
|
"eval_loss": 0.6253550052642822, |
|
"eval_runtime": 126.2592, |
|
"eval_samples_per_second": 15.635, |
|
"eval_steps_per_second": 1.956, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.8011932905549926, |
|
"grad_norm": 5.819400783479978e-08, |
|
"learning_rate": 1.1988067094450072e-07, |
|
"loss": 0.0846, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.8011932905549926, |
|
"eval_loss": 0.6173177361488342, |
|
"eval_runtime": 128.7312, |
|
"eval_samples_per_second": 15.334, |
|
"eval_steps_per_second": 1.919, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.8574805808848363, |
|
"grad_norm": 4.076898676430574e-06, |
|
"learning_rate": 1.1425194191151638e-07, |
|
"loss": 0.1003, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.8574805808848363, |
|
"eval_loss": 0.6105911135673523, |
|
"eval_runtime": 126.0429, |
|
"eval_samples_per_second": 15.661, |
|
"eval_steps_per_second": 1.96, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.9137678712146797, |
|
"grad_norm": 2.8583364486694336, |
|
"learning_rate": 1.0862321287853203e-07, |
|
"loss": 0.0874, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.9137678712146797, |
|
"eval_loss": 0.6095408797264099, |
|
"eval_runtime": 125.7222, |
|
"eval_samples_per_second": 15.701, |
|
"eval_steps_per_second": 1.965, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.9700551615445232, |
|
"grad_norm": 0.047957953065633774, |
|
"learning_rate": 1.0299448384554767e-07, |
|
"loss": 0.0513, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.9700551615445232, |
|
"eval_loss": 0.6062661409378052, |
|
"eval_runtime": 128.3312, |
|
"eval_samples_per_second": 15.382, |
|
"eval_steps_per_second": 1.925, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.026342451874367, |
|
"grad_norm": 1.2455217301976518e-06, |
|
"learning_rate": 9.736575481256332e-08, |
|
"loss": 0.0743, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.026342451874367, |
|
"eval_loss": 0.6002796292304993, |
|
"eval_runtime": 126.2895, |
|
"eval_samples_per_second": 15.631, |
|
"eval_steps_per_second": 1.956, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.0826297422042104, |
|
"grad_norm": 12.021537780761719, |
|
"learning_rate": 9.173702577957897e-08, |
|
"loss": 0.0704, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.0826297422042104, |
|
"eval_loss": 0.5955923199653625, |
|
"eval_runtime": 126.1104, |
|
"eval_samples_per_second": 15.653, |
|
"eval_steps_per_second": 1.959, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.138917032534054, |
|
"grad_norm": 3.156046152114868, |
|
"learning_rate": 8.610829674659462e-08, |
|
"loss": 0.0368, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.138917032534054, |
|
"eval_loss": 0.5925185084342957, |
|
"eval_runtime": 128.6739, |
|
"eval_samples_per_second": 15.341, |
|
"eval_steps_per_second": 1.92, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.1952043228638973, |
|
"grad_norm": 0.0006427310290746391, |
|
"learning_rate": 8.047956771361026e-08, |
|
"loss": 0.0636, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.1952043228638973, |
|
"eval_loss": 0.5911664962768555, |
|
"eval_runtime": 126.3814, |
|
"eval_samples_per_second": 15.619, |
|
"eval_steps_per_second": 1.954, |
|
"step": 19500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 26649, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7015647607921260.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|