|
'teacher', |
|
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8', |
|
'logis/teacher', |
|
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0001, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1', |
|
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8', |
|
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1', |
|
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8', |
|
'distily_tinyBenchmarks/logs/harness_benchmarks=tinyBenchmarks, learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1', |
|
|