# @package _global_ | |
# just used for debugging or when we just want to populate the cache | |
# and do not care about training. | |
transformer_lm: | |
dim: 64 | |
num_heads: 2 | |
num_layers: 2 | |
# @package _global_ | |
# just used for debugging or when we just want to populate the cache | |
# and do not care about training. | |
transformer_lm: | |
dim: 64 | |
num_heads: 2 | |
num_layers: 2 | |