- attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_norm=batchnorm, attn_projector=orthogonal, max_grad_norm=100
- attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_projector=orthogonal_batchnorm, max_grad_norm=100
- attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_projector=orthogonal_batchnorm
- attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_projector=orthogonal_layernorm, max_grad_norm=100
- attn_layer_mapper=layer-2, attn_loss_fn=cos, attn_projector=orthogonal_layernorm
- attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_norm=None, attn_projector=orthogonal, max_grad_norm=100
- attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_norm=None, attn_projector=orthogonal
- attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_norm=batchnorm, attn_projector=orthogonal, max_grad_norm=100
- attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_norm=batchnorm, attn_projector=orthogonal
- attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_norm=layernorm, attn_projector=orthogonal, max_grad_norm=100
- attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_norm=layernorm, attn_projector=orthogonal
- attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal_batchnorm, max_grad_norm=100
- attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal_batchnorm
- attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal_layernorm, max_grad_norm=100
- attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal_layernorm