This model has 1 file scanned as unsafe.
- attn_loss_fn=mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=raw_mse, learning_rate=0.0001, warmup_ratio=0.1
- attn_loss_fn=mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=raw_mse, learning_rate=0.0001, warmup_ratio=0
- attn_loss_fn=mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=raw_mse, learning_rate=0.0004, warmup_ratio=0
- attn_loss_fn=mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=raw_mse, learning_rate=0.004, warmup_ratio=0
- attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=cos, hs_weight=10.0, learning_rate=0.001, warmup_ratio=0
- attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=cos, hs_weight=10.0, learning_rate=0.004, warmup_ratio=0.1
- attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=cos, hs_weight=10.0, learning_rate=0.004, warmup_ratio=0
- attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=kl, hs_weight=10.0, learning_rate=0.004, warmup_ratio=0
- attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=mse, hs_weight=10.0, learning_rate=0.001, warmup_ratio=0.1
- attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=mse, hs_weight=10.0, learning_rate=0.001, warmup_ratio=0
- attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=mse, hs_weight=10.0, learning_rate=0.004, warmup_ratio=0.1
- attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=mse, hs_weight=10.0, learning_rate=0.004, warmup_ratio=0
- attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=0.0001, warmup_ratio=0
- attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=0.0004, warmup_ratio=0.1
- attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=0.0004, warmup_ratio=0
- attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=0.001, warmup_ratio=0.1
- attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=0.001, warmup_ratio=0
- attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=0.004, warmup_ratio=0.1
- attn_loss_fn=mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=0.004, warmup_ratio=0
- attn_loss_fn=raw_mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=mse, learning_rate=0.0004
- attn_loss_fn=raw_mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=mse, learning_rate=4e-05
- attn_loss_fn=raw_mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=mse, learning_rate=4e-06
- attn_loss_fn=raw_mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=raw_mse, learning_rate=0.0004
- attn_loss_fn=raw_mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=raw_mse, learning_rate=4e-05
- attn_loss_fn=raw_mse, attn_weight=10.0, hidden_weight=10.0, hs_loss_fn=raw_mse, learning_rate=4e-06
- attn_loss_fn=raw_mse, attn_weight=10.0, hs_loss_fn=cos, hs_weight=10.0, learning_rate=4e-05
- attn_loss_fn=raw_mse, attn_weight=10.0, hs_loss_fn=mse, hs_weight=10.0, learning_rate=0.0004
- attn_loss_fn=raw_mse, attn_weight=10.0, hs_loss_fn=mse, hs_weight=10.0, learning_rate=4e-05
- attn_loss_fn=raw_mse, attn_weight=10.0, hs_loss_fn=mse, hs_weight=10.0, learning_rate=4e-06
- attn_loss_fn=raw_mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=0.0004
- attn_loss_fn=raw_mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=4e-05
- attn_loss_fn=raw_mse, attn_weight=10.0, hs_loss_fn=raw_mse, hs_weight=10.0, learning_rate=4e-06