distily_tinyBenchmarks / benchmarks.shelve.dir
lapp0's picture
Training in progress, step 125
a58e975 verified
'teacher', (0, 13436)
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8', (13824, 448)
'logis/teacher', (27648, 448)
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1', (41472, 448)
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8', (41984, 448)
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1', (42496, 448)
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8', (43008, 448)
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1', (43520, 448)