version: 1.1.0 | |
__metadata__: | |
per_device_train_batch_size: 48 | |
per_device_eval_batch_size: 48 | |
fp16: False | |
framework_metadata: | |
python_version: 3.8.13 | |
sparseml_version: 1.0.1 | |
torch_version: 1.9.1+cu111 | |
modifiers: | |
- !GMPruningModifier | |
end_epoch: 11 | |
final_sparsity: 0.7 | |
global_sparsity: False | |
init_sparsity: 0.0 | |
inter_func: cubic | |
leave_enabled: True | |
mask_type: unstructured | |
params: ['re:roberta.encoder.layer.*.attention.self.query.weight', 're:roberta.encoder.layer.*.attention.self.key.weight', 're:roberta.encoder.layer.*.attention.self.value.weight', 're:roberta.encoder.layer.*.attention.output.dense.weight', 're:roberta.encoder.layer.*.intermediate.dense.weight', 're:roberta.encoder.layer.*.output.dense.weight', 're:classifier.dense.weight'] | |
start_epoch: 2 | |
update_frequency: 0.05 | |
- !EpochRangeModifier | |
end_epoch: 15 | |
start_epoch: 0.0 | |