TinyStories-1Layer-21M-Codebook / train_results.json
taufeeque's picture
upload model
26ccd30
raw
history blame contribute delete
571 Bytes
{
"MSE": 0.0,
"MSE/layer0": 0.0,
"dead_code_fraction": 1.0,
"dead_code_fraction/layer0": 1.0,
"epoch": 10.43,
"input_norm": 0.0,
"input_norm/layer0": 0.0,
"loss": 1.6774777018260956,
"max_norm": 153.29054260253906,
"max_norm/layer0": 153.29054260253906,
"mean_norm": 75.17323780059814,
"mean_norm/layer0": 75.17323780059814,
"multicode_k": 1,
"output_norm": 0.0,
"output_norm/layer0": 0.0,
"runtime": 132212.7109,
"samples_per_second": 72.61,
"steps_per_second": 0.756,
"train_samples": 920563
}