{
  "name": "InterpBench",
  "version": "1.0.0",
  "description": "A benchmark of transformers with known circuits for evaluating mechanistic interpretability techniques.",
  "cases": [
    {
      "case_id": "11",
      "files": [
        "edges.pkl",
        "ll_model_510.pth",
        "ll_model_cfg_510.pkl",
        "meta_510.json"
      ],
      "task_description": "Counts the number of words in a sequence based on their length.",
      "vocab": [
        "J",
        "oCLrZaW",
        "no",
        "poiVg",
        "V",
        "b",
        "LB",
        "TPSI"
      ],
      "max_seq_len": 10,
      "min_seq_len": 4,
      "transformer_cfg": {
        "n_layers": 2,
        "d_model": 12,
        "n_ctx": 10,
        "d_head": 3,
        "model_name": "custom",
        "n_heads": 4,
        "d_mlp": 48,
        "act_fn": "gelu",
        "d_vocab": 10,
        "eps": 1e-05,
        "use_attn_result": true,
        "use_attn_scale": true,
        "use_split_qkv_input": true,
        "use_hook_mlp_in": true,
        "use_attn_in": false,
        "use_local_attn": false,
        "original_architecture": null,
        "from_checkpoint": false,
        "checkpoint_index": null,
        "checkpoint_label_type": null,
        "checkpoint_value": null,
        "tokenizer_name": null,
        "window_size": null,
        "attn_types": null,
        "init_mode": "gpt2",
        "normalization_type": null,
        "device": "cpu",
        "n_devices": 1,
        "attention_dir": "causal",
        "attn_only": false,
        "seed": 0,
        "initializer_range": 0.1460593486680443,
        "init_weights": true,
        "scale_attn_by_inverse_layer_idx": false,
        "positional_embedding_type": "standard",
        "final_rms": false,
        "d_vocab_out": 5,
        "parallel_attn_mlp": false,
        "rotary_dim": null,
        "n_params": 3456,
        "use_hook_tokens": false,
        "gated_mlp": false,
        "default_prepend_bos": true,
        "dtype": "torch.float32",
        "tokenizer_prepends_bos": null,
        "n_key_value_heads": null,
        "post_embedding_ln": false,
        "rotary_base": 10000,
        "trust_remote_code": false,
        "rotary_adjacent_pairs": false
      },
      "training_args": {
        "atol": 0.05,
        "lr": 0.01,
        "use_single_loss": false,
        "iit_weight": 1.0,
        "behavior_weight": 1.0,
        "strict_weight": 0.4,
        "epochs": 500,
        "act_fn": "gelu",
        "clip_grad_norm": 1.0,
        "lr_scheduler": ""
      }
    },
    {
      "case_id": "13",
      "files": [
        "edges.pkl",
        "ll_model_510.pth",
        "ll_model_cfg_510.pkl",
        "meta_510.json"
      ],
      "task_description": "Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",
      "vocab": [
        0,
        1,
        2
      ],
      "max_seq_len": 10,
      "min_seq_len": 4,
      "transformer_cfg": {
        "n_layers": 2,
        "d_model": 20,
        "n_ctx": 10,
        "d_head": 5,
        "model_name": "custom",
        "n_heads": 4,
        "d_mlp": 80,
        "act_fn": "gelu",
        "d_vocab": 5,
        "eps": 1e-05,
        "use_attn_result": true,
        "use_attn_scale": true,
        "use_split_qkv_input": true,
        "use_hook_mlp_in": true,
        "use_attn_in": false,
        "use_local_attn": false,
        "original_architecture": null,
        "from_checkpoint": false,
        "checkpoint_index": null,
        "checkpoint_label_type": null,
        "checkpoint_value": null,
        "tokenizer_name": null,
        "window_size": null,
        "attn_types": null,
        "init_mode": "gpt2",
        "normalization_type": null,
        "device": "cpu",
        "n_devices": 1,
        "attention_dir": "bidirectional",
        "attn_only": false,
        "seed": 0,
        "initializer_range": 0.1460593486680443,
        "init_weights": true,
        "scale_attn_by_inverse_layer_idx": false,
        "positional_embedding_type": "standard",
        "final_rms": false,
        "d_vocab_out": 3,
        "parallel_attn_mlp": false,
        "rotary_dim": null,
        "n_params": 9600,
        "use_hook_tokens": false,
        "gated_mlp": false,
        "default_prepend_bos": true,
        "dtype": "torch.float32",
        "tokenizer_prepends_bos": null,
        "n_key_value_heads": null,
        "post_embedding_ln": false,
        "rotary_base": 10000,
        "trust_remote_code": false,
        "rotary_adjacent_pairs": false
      },
      "training_args": {
        "atol": 0.05,
        "lr": 0.01,
        "use_single_loss": false,
        "iit_weight": 1.0,
        "behavior_weight": 1.0,
        "strict_weight": 0.4,
        "epochs": 500,
        "act_fn": "gelu",
        "clip_grad_norm": 1.0,
        "lr_scheduler": ""
      }
    },
    {
      "case_id": "18",
      "files": [
        "edges.pkl",
        "ll_model_510.pth",
        "ll_model_cfg_510.pkl",
        "meta_510.json"
      ],
      "task_description": "Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",
      "vocab": [
        "c",
        "e",
        "b",
        "d",
        "a"
      ],
      "max_seq_len": 10,
      "min_seq_len": 4,
      "transformer_cfg": {
        "n_layers": 2,
        "d_model": 12,
        "n_ctx": 10,
        "d_head": 3,
        "model_name": "custom",
        "n_heads": 4,
        "d_mlp": 48,
        "act_fn": "gelu",
        "d_vocab": 7,
        "eps": 1e-05,
        "use_attn_result": true,
        "use_attn_scale": true,
        "use_split_qkv_input": true,
        "use_hook_mlp_in": true,
        "use_attn_in": false,
        "use_local_attn": false,
        "original_architecture": null,
        "from_checkpoint": false,
        "checkpoint_index": null,
        "checkpoint_label_type": null,
        "checkpoint_value": null,
        "tokenizer_name": null,
        "window_size": null,
        "attn_types": null,
        "init_mode": "gpt2",
        "normalization_type": null,
        "device": "cpu",
        "n_devices": 1,
        "attention_dir": "bidirectional",
        "attn_only": false,
        "seed": 0,
        "initializer_range": 0.12344267996967354,
        "init_weights": true,
        "scale_attn_by_inverse_layer_idx": false,
        "positional_embedding_type": "standard",
        "final_rms": false,
        "d_vocab_out": 3,
        "parallel_attn_mlp": false,
        "rotary_dim": null,
        "n_params": 3456,
        "use_hook_tokens": false,
        "gated_mlp": false,
        "default_prepend_bos": true,
        "dtype": "torch.float32",
        "tokenizer_prepends_bos": null,
        "n_key_value_heads": null,
        "post_embedding_ln": false,
        "rotary_base": 10000,
        "trust_remote_code": false,
        "rotary_adjacent_pairs": false
      },
      "training_args": {
        "atol": 0.05,
        "lr": 0.001,
        "use_single_loss": false,
        "iit_weight": 1.0,
        "behavior_weight": 1.0,
        "strict_weight": 0.4,
        "epochs": 2000,
        "act_fn": "gelu",
        "clip_grad_norm": 0.1,
        "lr_scheduler": ""
      }
    },
    {
      "case_id": "19",
      "files": [
        "edges.pkl",
        "ll_model_510.pth",
        "ll_model_cfg_510.pkl",
        "meta_510.json"
      ],
      "task_description": "Removes consecutive duplicate tokens from a sequence.",
      "vocab": [
        "b",
        "a",
        "c"
      ],
      "max_seq_len": 15,
      "min_seq_len": 4,
      "transformer_cfg": {
        "n_layers": 2,
        "d_model": 32,
        "n_ctx": 15,
        "d_head": 8,
        "model_name": "custom",
        "n_heads": 4,
        "d_mlp": 128,
        "act_fn": "gelu",
        "d_vocab": 5,
        "eps": 1e-05,
        "use_attn_result": true,
        "use_attn_scale": true,
        "use_split_qkv_input": true,
        "use_hook_mlp_in": true,
        "use_attn_in": false,
        "use_local_attn": false,
        "original_architecture": null,
        "from_checkpoint": false,
        "checkpoint_index": null,
        "checkpoint_label_type": null,
        "checkpoint_value": null,
        "tokenizer_name": null,
        "window_size": null,
        "attn_types": null,
        "init_mode": "gpt2",
        "normalization_type": null,
        "device": "cpu",
        "n_devices": 1,
        "attention_dir": "causal",
        "attn_only": false,
        "seed": 0,
        "initializer_range": 0.15689290811054724,
        "init_weights": true,
        "scale_attn_by_inverse_layer_idx": false,
        "positional_embedding_type": "standard",
        "final_rms": false,
        "d_vocab_out": 3,
        "parallel_attn_mlp": false,
        "rotary_dim": null,
        "n_params": 24576,
        "use_hook_tokens": false,
        "gated_mlp": false,
        "default_prepend_bos": true,
        "dtype": "torch.float32",
        "tokenizer_prepends_bos": null,
        "n_key_value_heads": null,
        "post_embedding_ln": false,
        "rotary_base": 10000,
        "trust_remote_code": false,
        "rotary_adjacent_pairs": false
      },
      "training_args": {
        "atol": 0.05,
        "lr": 0.001,
        "use_single_loss": false,
        "iit_weight": 1.0,
        "behavior_weight": 1.0,
        "strict_weight": 0.4,
        "epochs": 2000,
        "act_fn": "gelu",
        "clip_grad_norm": 0.1,
        "lr_scheduler": ""
      }
    },
    {
      "case_id": "20",
      "files": [
        "edges.pkl",
        "ll_model_1110.pth",
        "ll_model_cfg_1110.pkl",
        "meta_1110.json"
      ],
      "task_description": "Detect spam messages based on appearance of spam keywords.",
      "vocab": [
        "J",
        "spam",
        "offer",
        "click",
        "oCLrZaW",
        "no",
        "poiVg",
        "V",
        "b",
        "LB",
        "now",
        "TPSI"
      ],
      "max_seq_len": 10,
      "min_seq_len": 4,
      "transformer_cfg": {
        "n_layers": 2,
        "d_model": 4,
        "n_ctx": 10,
        "d_head": 1,
        "model_name": "custom",
        "n_heads": 4,
        "d_mlp": 16,
        "act_fn": "gelu",
        "d_vocab": 14,
        "eps": 1e-05,
        "use_attn_result": true,
        "use_attn_scale": true,
        "use_split_qkv_input": true,
        "use_hook_mlp_in": true,
        "use_attn_in": false,
        "use_local_attn": false,
        "original_architecture": null,
        "from_checkpoint": false,
        "checkpoint_index": null,
        "checkpoint_label_type": null,
        "checkpoint_value": null,
        "tokenizer_name": null,
        "window_size": null,
        "attn_types": null,
        "init_mode": "gpt2",
        "normalization_type": null,
        "device": "cuda",
        "n_devices": 1,
        "attention_dir": "causal",
        "attn_only": false,
        "seed": 0,
        "initializer_range": 0.16,
        "init_weights": true,
        "scale_attn_by_inverse_layer_idx": false,
        "positional_embedding_type": "standard",
        "final_rms": false,
        "d_vocab_out": 2,
        "parallel_attn_mlp": false,
        "rotary_dim": null,
        "n_params": 384,
        "use_hook_tokens": false,
        "gated_mlp": false,
        "default_prepend_bos": true,
        "dtype": "torch.float32",
        "tokenizer_prepends_bos": null,
        "n_key_value_heads": null,
        "post_embedding_ln": false,
        "rotary_base": 10000,
        "trust_remote_code": false,
        "rotary_adjacent_pairs": false
      },
      "training_args": {
        "atol": 0.05,
        "lr": 0.001,
        "use_single_loss": false,
        "iit_weight": 1.0,
        "behavior_weight": 1.0,
        "strict_weight": 1.0,
        "epochs": 2000,
        "act_fn": "gelu",
        "clip_grad_norm": 0.1,
        "lr_scheduler": ""
      }
    },
    {
      "case_id": "21",
      "files": [
        "edges.pkl",
        "ll_model_510.pth",
        "ll_model_cfg_510.pkl",
        "meta_510.json"
      ],
      "task_description": "Extract unique tokens from a string",
      "vocab": [
        "b",
        "a",
        "c"
      ],
      "max_seq_len": 10,
      "min_seq_len": 4,
      "transformer_cfg": {
        "n_layers": 2,
        "d_model": 20,
        "n_ctx": 10,
        "d_head": 5,
        "model_name": "custom",
        "n_heads": 4,
        "d_mlp": 80,
        "act_fn": "gelu",
        "d_vocab": 5,
        "eps": 1e-05,
        "use_attn_result": true,
        "use_attn_scale": true,
        "use_split_qkv_input": true,
        "use_hook_mlp_in": true,
        "use_attn_in": false,
        "use_local_attn": false,
        "original_architecture": null,
        "from_checkpoint": false,
        "checkpoint_index": null,
        "checkpoint_label_type": null,
        "checkpoint_value": null,
        "tokenizer_name": null,
        "window_size": null,
        "attn_types": null,
        "init_mode": "gpt2",
        "normalization_type": null,
        "device": "cpu",
        "n_devices": 1,
        "attention_dir": "causal",
        "attn_only": false,
        "seed": 0,
        "initializer_range": 0.1885618083164127,
        "init_weights": true,
        "scale_attn_by_inverse_layer_idx": false,
        "positional_embedding_type": "standard",
        "final_rms": false,
        "d_vocab_out": 3,
        "parallel_attn_mlp": false,
        "rotary_dim": null,
        "n_params": 9600,
        "use_hook_tokens": false,
        "gated_mlp": false,
        "default_prepend_bos": true,
        "dtype": "torch.float32",
        "tokenizer_prepends_bos": null,
        "n_key_value_heads": null,
        "post_embedding_ln": false,
        "rotary_base": 10000,
        "trust_remote_code": false,
        "rotary_adjacent_pairs": false
      },
      "training_args": {
        "atol": 0.05,
        "lr": 0.01,
        "use_single_loss": false,
        "iit_weight": 1.0,
        "behavior_weight": 1.0,
        "strict_weight": 0.4,
        "epochs": 500,
        "act_fn": "gelu",
        "clip_grad_norm": 1.0,
        "lr_scheduler": ""
      }
    },
    {
      "case_id": "24",
      "files": [
        "edges.pkl",
        "ll_model_510.pth",
        "ll_model_cfg_510.pkl",
        "meta_510.json"
      ],
      "task_description": "Identifies the first occurrence of each token in a sequence.",
      "vocab": [
        "b",
        "a",
        "c"
      ],
      "max_seq_len": 10,
      "min_seq_len": 4,
      "transformer_cfg": {
        "n_layers": 2,
        "d_model": 20,
        "n_ctx": 10,
        "d_head": 5,
        "model_name": "custom",
        "n_heads": 4,
        "d_mlp": 80,
        "act_fn": "gelu",
        "d_vocab": 5,
        "eps": 1e-05,
        "use_attn_result": true,
        "use_attn_scale": true,
        "use_split_qkv_input": true,
        "use_hook_mlp_in": true,
        "use_attn_in": false,
        "use_local_attn": false,
        "original_architecture": null,
        "from_checkpoint": false,
        "checkpoint_index": null,
        "checkpoint_label_type": null,
        "checkpoint_value": null,
        "tokenizer_name": null,
        "window_size": null,
        "attn_types": null,
        "init_mode": "gpt2",
        "normalization_type": null,
        "device": "cpu",
        "n_devices": 1,
        "attention_dir": "causal",
        "attn_only": false,
        "seed": 0,
        "initializer_range": 0.1885618083164127,
        "init_weights": true,
        "scale_attn_by_inverse_layer_idx": false,
        "positional_embedding_type": "standard",
        "final_rms": false,
        "d_vocab_out": 3,
        "parallel_attn_mlp": false,
        "rotary_dim": null,
        "n_params": 9600,
        "use_hook_tokens": false,
        "gated_mlp": false,
        "default_prepend_bos": true,
        "dtype": "torch.float32",
        "tokenizer_prepends_bos": null,
        "n_key_value_heads": null,
        "post_embedding_ln": false,
        "rotary_base": 10000,
        "trust_remote_code": false,
        "rotary_adjacent_pairs": false
      },
      "training_args": {
        "atol": 0.05,
        "lr": 0.01,
        "use_single_loss": false,
        "iit_weight": 1.0,
        "behavior_weight": 1.0,
        "strict_weight": 0.4,
        "epochs": 500,
        "act_fn": "gelu",
        "clip_grad_norm": 1.0,
        "lr_scheduler": ""
      }
    },
    {
      "case_id": "3",
      "files": [
        "edges.pkl",
        "ll_model_10110.pth",
        "ll_model_cfg_10110.pkl",
        "meta_10110.json"
      ],
      "task_description": "Returns the fraction of 'x' in the input up to the i-th position for all i.",
      "vocab": [
        "x",
        "b",
        "a",
        "c"
      ],
      "max_seq_len": 5,
      "min_seq_len": 4,
      "transformer_cfg": {
        "n_layers": 2,
        "d_model": 12,
        "n_ctx": 5,
        "d_head": 3,
        "model_name": "custom",
        "n_heads": 4,
        "d_mlp": 48,
        "act_fn": "gelu",
        "d_vocab": 6,
        "eps": 1e-05,
        "use_attn_result": true,
        "use_attn_scale": true,
        "use_split_qkv_input": true,
        "use_hook_mlp_in": true,
        "use_attn_in": false,
        "use_local_attn": false,
        "original_architecture": null,
        "from_checkpoint": false,
        "checkpoint_index": null,
        "checkpoint_label_type": null,
        "checkpoint_value": null,
        "tokenizer_name": null,
        "window_size": null,
        "attn_types": null,
        "init_mode": "gpt2",
        "normalization_type": null,
        "device": "cpu",
        "n_devices": 1,
        "attention_dir": "causal",
        "attn_only": false,
        "seed": 0,
        "initializer_range": 0.22188007849009167,
        "init_weights": true,
        "scale_attn_by_inverse_layer_idx": false,
        "positional_embedding_type": "standard",
        "final_rms": false,
        "d_vocab_out": 1,
        "parallel_attn_mlp": false,
        "rotary_dim": null,
        "n_params": 3456,
        "use_hook_tokens": false,
        "gated_mlp": false,
        "default_prepend_bos": true,
        "dtype": "torch.float32",
        "tokenizer_prepends_bos": null,
        "n_key_value_heads": null,
        "post_embedding_ln": false,
        "rotary_base": 10000,
        "trust_remote_code": false,
        "rotary_adjacent_pairs": false
      },
      "training_args": {
        "atol": 0.05,
        "lr": 0.001,
        "use_single_loss": false,
        "iit_weight": 1.0,
        "behavior_weight": 1.0,
        "strict_weight": 10.0,
        "epochs": 2000,
        "act_fn": "gelu",
        "clip_grad_norm": 0.1,
        "lr_scheduler": ""
      }
    },
    {
      "case_id": "33",
      "files": [
        "edges.pkl",
        "ll_model_510.pth",
        "ll_model_cfg_510.pkl",
        "meta_510.json"
      ],
      "task_description": "Checks if each token's length is odd or even.",
      "vocab": [
        "J",
        "oCLrZaW",
        "no",
        "poiVg",
        "V",
        "b",
        "LB",
        "TPSI"
      ],
      "max_seq_len": 10,
      "min_seq_len": 4,
      "transformer_cfg": {
        "n_layers": 2,
        "d_model": 4,
        "n_ctx": 10,
        "d_head": 1,
        "model_name": "custom",
        "n_heads": 4,
        "d_mlp": 16,
        "act_fn": "gelu",
        "d_vocab": 10,
        "eps": 1e-05,
        "use_attn_result": true,
        "use_attn_scale": true,
        "use_split_qkv_input": true,
        "use_hook_mlp_in": true,
        "use_attn_in": false,
        "use_local_attn": false,
        "original_architecture": null,
        "from_checkpoint": false,
        "checkpoint_index": null,
        "checkpoint_label_type": null,
        "checkpoint_value": null,
        "tokenizer_name": null,
        "window_size": null,
        "attn_types": null,
        "init_mode": "gpt2",
        "normalization_type": null,
        "device": "cpu",
        "n_devices": 1,
        "attention_dir": "causal",
        "attn_only": false,
        "seed": 0,
        "initializer_range": 0.17457431218879393,
        "init_weights": true,
        "scale_attn_by_inverse_layer_idx": false,
        "positional_embedding_type": "standard",
        "final_rms": false,
        "d_vocab_out": 2,
        "parallel_attn_mlp": false,
        "rotary_dim": null,
        "n_params": 384,
        "use_hook_tokens": false,
        "gated_mlp": false,
        "default_prepend_bos": true,
        "dtype": "torch.float32",
        "tokenizer_prepends_bos": null,
        "n_key_value_heads": null,
        "post_embedding_ln": false,
        "rotary_base": 10000,
        "trust_remote_code": false,
        "rotary_adjacent_pairs": false
      },
      "training_args": {
        "atol": 0.05,
        "lr": 0.001,
        "use_single_loss": false,
        "iit_weight": 1.0,
        "behavior_weight": 1.0,
        "strict_weight": 0.4,
        "epochs": 2000,
        "act_fn": "gelu",
        "clip_grad_norm": 0.1,
        "lr_scheduler": ""
      }
    },
    {
      "case_id": "34",
      "files": [
        "edges.pkl",
        "ll_model_510.pth",
        "ll_model_cfg_510.pkl",
        "meta_510.json"
      ],
      "task_description": "Calculate the ratio of vowels to consonants in each word.",
      "vocab": [
        "J",
        "oCLrZaW",
        "no",
        "poiVg",
        "V",
        "b",
        "LB",
        "TPSI"
      ],
      "max_seq_len": 10,
      "min_seq_len": 4,
      "transformer_cfg": {
        "n_layers": 2,
        "d_model": 4,
        "n_ctx": 10,
        "d_head": 1,
        "model_name": "custom",
        "n_heads": 4,
        "d_mlp": 16,
        "act_fn": "gelu",
        "d_vocab": 10,
        "eps": 1e-05,
        "use_attn_result": true,
        "use_attn_scale": true,
        "use_split_qkv_input": true,
        "use_hook_mlp_in": true,
        "use_attn_in": false,
        "use_local_attn": false,
        "original_architecture": null,
        "from_checkpoint": false,
        "checkpoint_index": null,
        "checkpoint_label_type": null,
        "checkpoint_value": null,
        "tokenizer_name": null,
        "window_size": null,
        "attn_types": null,
        "init_mode": "gpt2",
        "normalization_type": null,
        "device": "cpu",
        "n_devices": 1,
        "attention_dir": "causal",
        "attn_only": false,
        "seed": 0,
        "initializer_range": 0.16329931618554522,
        "init_weights": true,
        "scale_attn_by_inverse_layer_idx": false,
        "positional_embedding_type": "standard",
        "final_rms": false,
        "d_vocab_out": 5,
        "parallel_attn_mlp": false,
        "rotary_dim": null,
        "n_params": 384,
        "use_hook_tokens": false,
        "gated_mlp": false,
        "default_prepend_bos": true,
        "dtype": "torch.float32",
        "tokenizer_prepends_bos": null,
        "n_key_value_heads": null,
        "post_embedding_ln": false,
        "rotary_base": 10000,
        "trust_remote_code": false,
        "rotary_adjacent_pairs": false
      },
      "training_args": {
        "atol": 0.05,
        "lr": 0.001,
        "use_single_loss": false,
        "iit_weight": 1.0,
        "behavior_weight": 1.0,
        "strict_weight": 0.4,
        "epochs": 2000,
        "act_fn": "gelu",
        "clip_grad_norm": 0.1,
        "lr_scheduler": ""
      }
    },
    {
      "case_id": "35",
      "files": [
        "edges.pkl",
        "ll_model_510.pth",
        "ll_model_cfg_510.pkl",
        "meta_510.json"
      ],
      "task_description": "Alternates capitalization of each character in words.",
      "vocab": [
        "J",
        "oCLrZaW",
        "no",
        "poiVg",
        "V",
        "b",
        "LB",
        "TPSI"
      ],
      "max_seq_len": 10,
      "min_seq_len": 4,
      "transformer_cfg": {
        "n_layers": 2,
        "d_model": 4,
        "n_ctx": 10,
        "d_head": 1,
        "model_name": "custom",
        "n_heads": 4,
        "d_mlp": 16,
        "act_fn": "gelu",
        "d_vocab": 10,
        "eps": 1e-05,
        "use_attn_result": true,
        "use_attn_scale": true,
        "use_split_qkv_input": true,
        "use_hook_mlp_in": true,
        "use_attn_in": false,
        "use_local_attn": false,
        "original_architecture": null,
        "from_checkpoint": false,
        "checkpoint_index": null,
        "checkpoint_label_type": null,
        "checkpoint_value": null,
        "tokenizer_name": null,
        "window_size": null,
        "attn_types": null,
        "init_mode": "gpt2",
        "normalization_type": null,
        "device": "cpu",
        "n_devices": 1,
        "attention_dir": "causal",
        "attn_only": false,
        "seed": 0,
        "initializer_range": 0.1539600717839002,
        "init_weights": true,
        "scale_attn_by_inverse_layer_idx": false,
        "positional_embedding_type": "standard",
        "final_rms": false,
        "d_vocab_out": 8,
        "parallel_attn_mlp": false,
        "rotary_dim": null,
        "n_params": 384,
        "use_hook_tokens": false,
        "gated_mlp": false,
        "default_prepend_bos": true,
        "dtype": "torch.float32",
        "tokenizer_prepends_bos": null,
        "n_key_value_heads": null,
        "post_embedding_ln": false,
        "rotary_base": 10000,
        "trust_remote_code": false,
        "rotary_adjacent_pairs": false
      },
      "training_args": {
        "atol": 0.05,
        "lr": 0.001,
        "use_single_loss": false,
        "iit_weight": 1.0,
        "behavior_weight": 1.0,
        "strict_weight": 0.4,
        "epochs": 2000,
        "act_fn": "gelu",
        "clip_grad_norm": 0.1,
        "lr_scheduler": ""
      }
    },
    {
      "case_id": "36",
      "files": [
        "edges.pkl",
        "ll_model_10110.pth",
        "ll_model_cfg_10110.pkl",
        "meta_10110.json"
      ],
      "task_description": "Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",
      "vocab": [
        "\ud83d\ude22",
        "\ud83d\udcd8",
        "\ud83d\ude0a"
      ],
      "max_seq_len": 10,
      "min_seq_len": 4,
      "transformer_cfg": {
        "n_layers": 2,
        "d_model": 4,
        "n_ctx": 10,
        "d_head": 1,
        "model_name": "custom",
        "n_heads": 4,
        "d_mlp": 16,
        "act_fn": "gelu",
        "d_vocab": 5,
        "eps": 1e-05,
        "use_attn_result": true,
        "use_attn_scale": true,
        "use_split_qkv_input": true,
        "use_hook_mlp_in": true,
        "use_attn_in": false,
        "use_local_attn": false,
        "original_architecture": null,
        "from_checkpoint": false,
        "checkpoint_index": null,
        "checkpoint_label_type": null,
        "checkpoint_value": null,
        "tokenizer_name": null,
        "window_size": null,
        "attn_types": null,
        "init_mode": "gpt2",
        "normalization_type": null,
        "device": "cuda",
        "n_devices": 1,
        "attention_dir": "causal",
        "attn_only": false,
        "seed": 0,
        "initializer_range": 0.19402850002906638,
        "init_weights": true,
        "scale_attn_by_inverse_layer_idx": false,
        "positional_embedding_type": "standard",
        "final_rms": false,
        "d_vocab_out": 3,
        "parallel_attn_mlp": false,
        "rotary_dim": null,
        "n_params": 384,
        "use_hook_tokens": false,
        "gated_mlp": false,
        "default_prepend_bos": true,
        "dtype": "torch.float32",
        "tokenizer_prepends_bos": null,
        "n_key_value_heads": null,
        "post_embedding_ln": false,
        "rotary_base": 10000,
        "trust_remote_code": false,
        "rotary_adjacent_pairs": false
      },
      "training_args": {
        "atol": 0.05,
        "lr": 0.001,
        "use_single_loss": false,
        "iit_weight": 1.0,
        "behavior_weight": 1.0,
        "strict_weight": 10.0,
        "epochs": 2000,
        "act_fn": "gelu",
        "clip_grad_norm": 0.1,
        "lr_scheduler": ""
      }
    },
    {
      "case_id": "37",
      "files": [
        "edges.pkl",
        "ll_model_510.pth",
        "ll_model_cfg_510.pkl",
        "meta_510.json"
      ],
      "task_description": "Reverses each word in the sequence except for specified exclusions.",
      "vocab": [
        "J",
        "oCLrZaW",
        "no",
        "poiVg",
        "V",
        "b",
        "LB",
        "TPSI"
      ],
      "max_seq_len": 10,
      "min_seq_len": 4,
      "transformer_cfg": {
        "n_layers": 2,
        "d_model": 4,
        "n_ctx": 10,
        "d_head": 1,
        "model_name": "custom",
        "n_heads": 4,
        "d_mlp": 16,
        "act_fn": "gelu",
        "d_vocab": 10,
        "eps": 1e-05,
        "use_attn_result": true,
        "use_attn_scale": true,
        "use_split_qkv_input": true,
        "use_hook_mlp_in": true,
        "use_attn_in": false,
        "use_local_attn": false,
        "original_architecture": null,
        "from_checkpoint": false,
        "checkpoint_index": null,
        "checkpoint_label_type": null,
        "checkpoint_value": null,
        "tokenizer_name": null,
        "window_size": null,
        "attn_types": null,
        "init_mode": "gpt2",
        "normalization_type": null,
        "device": "cpu",
        "n_devices": 1,
        "attention_dir": "causal",
        "attn_only": false,
        "seed": 0,
        "initializer_range": 0.1539600717839002,
        "init_weights": true,
        "scale_attn_by_inverse_layer_idx": false,
        "positional_embedding_type": "standard",
        "final_rms": false,
        "d_vocab_out": 8,
        "parallel_attn_mlp": false,
        "rotary_dim": null,
        "n_params": 384,
        "use_hook_tokens": false,
        "gated_mlp": false,
        "default_prepend_bos": true,
        "dtype": "torch.float32",
        "tokenizer_prepends_bos": null,
        "n_key_value_heads": null,
        "post_embedding_ln": false,
        "rotary_base": 10000,
        "trust_remote_code": false,
        "rotary_adjacent_pairs": false
      },
      "training_args": {
        "atol": 0.05,
        "lr": 0.001,
        "use_single_loss": false,
        "iit_weight": 1.0,
        "behavior_weight": 1.0,
        "strict_weight": 0.4,
        "epochs": 2000,
        "act_fn": "gelu",
        "clip_grad_norm": 0.1,
        "lr_scheduler": ""
      }
    },
    {
      "case_id": "38",
      "files": [
        "edges.pkl",
        "ll_model_510.pth",
        "ll_model_cfg_510.pkl",
        "meta_510.json"
      ],
      "task_description": "Checks if tokens alternate between two types.",
      "vocab": [
        "b",
        "a",
        "c"
      ],
      "max_seq_len": 10,
      "min_seq_len": 4,
      "transformer_cfg": {
        "n_layers": 2,
        "d_model": 20,
        "n_ctx": 10,
        "d_head": 5,
        "model_name": "custom",
        "n_heads": 4,
        "d_mlp": 80,
        "act_fn": "gelu",
        "d_vocab": 5,
        "eps": 1e-05,
        "use_attn_result": true,
        "use_attn_scale": true,
        "use_split_qkv_input": true,
        "use_hook_mlp_in": true,
        "use_attn_in": false,
        "use_local_attn": false,
        "original_architecture": null,
        "from_checkpoint": false,
        "checkpoint_index": null,
        "checkpoint_label_type": null,
        "checkpoint_value": null,
        "tokenizer_name": null,
        "window_size": null,
        "attn_types": null,
        "init_mode": "gpt2",
        "normalization_type": null,
        "device": "cpu",
        "n_devices": 1,
        "attention_dir": "causal",
        "attn_only": false,
        "seed": 0,
        "initializer_range": 0.1539600717839002,
        "init_weights": true,
        "scale_attn_by_inverse_layer_idx": false,
        "positional_embedding_type": "standard",
        "final_rms": false,
        "d_vocab_out": 2,
        "parallel_attn_mlp": false,
        "rotary_dim": null,
        "n_params": 9600,
        "use_hook_tokens": false,
        "gated_mlp": false,
        "default_prepend_bos": true,
        "dtype": "torch.float32",
        "tokenizer_prepends_bos": null,
        "n_key_value_heads": null,
        "post_embedding_ln": false,
        "rotary_base": 10000,
        "trust_remote_code": false,
        "rotary_adjacent_pairs": false
      },
      "training_args": {
        "atol": 0.05,
        "lr": 0.001,
        "use_single_loss": false,
        "iit_weight": 1.0,
        "behavior_weight": 1.0,
        "strict_weight": 0.4,
        "epochs": 2000,
        "act_fn": "gelu",
        "clip_grad_norm": 0.1,
        "lr_scheduler": ""
      }
    },
    {
      "case_id": "4",
      "files": [
        "edges.pkl",
        "ll_model_510.pth",
        "ll_model_cfg_510.pkl",
        "meta_510.json"
      ],
      "task_description": "Return fraction of previous open tokens minus the fraction of close tokens.",
      "vocab": [
        "b",
        "(",
        "c",
        ")",
        "a"
      ],
      "max_seq_len": 10,
      "min_seq_len": 4,
      "transformer_cfg": {
        "n_layers": 2,
        "d_model": 20,
        "n_ctx": 10,
        "d_head": 5,
        "model_name": "custom",
        "n_heads": 4,
        "d_mlp": 80,
        "act_fn": "gelu",
        "d_vocab": 7,
        "eps": 1e-05,
        "use_attn_result": true,
        "use_attn_scale": true,
        "use_split_qkv_input": true,
        "use_hook_mlp_in": true,
        "use_attn_in": false,
        "use_local_attn": false,
        "original_architecture": null,
        "from_checkpoint": false,
        "checkpoint_index": null,
        "checkpoint_label_type": null,
        "checkpoint_value": null,
        "tokenizer_name": null,
        "window_size": null,
        "attn_types": null,
        "init_mode": "gpt2",
        "normalization_type": null,
        "device": "cpu",
        "n_devices": 1,
        "attention_dir": "causal",
        "attn_only": false,
        "seed": 0,
        "initializer_range": 0.17056057308448835,
        "init_weights": true,
        "scale_attn_by_inverse_layer_idx": false,
        "positional_embedding_type": "standard",
        "final_rms": false,
        "d_vocab_out": 1,
        "parallel_attn_mlp": false,
        "rotary_dim": null,
        "n_params": 9600,
        "use_hook_tokens": false,
        "gated_mlp": false,
        "default_prepend_bos": true,
        "dtype": "torch.float32",
        "tokenizer_prepends_bos": null,
        "n_key_value_heads": null,
        "post_embedding_ln": false,
        "rotary_base": 10000,
        "trust_remote_code": false,
        "rotary_adjacent_pairs": false
      },
      "training_args": {
        "atol": 0.05,
        "lr": 0.001,
        "use_single_loss": false,
        "iit_weight": 1.0,
        "behavior_weight": 1.0,
        "strict_weight": 0.4,
        "epochs": 2000,
        "act_fn": "gelu",
        "clip_grad_norm": 0.1,
        "lr_scheduler": ""
      }
    },
    {
      "case_id": "8",
      "files": [
        "edges.pkl",
        "ll_model_510.pth",
        "ll_model_cfg_510.pkl",
        "meta_510.json"
      ],
      "task_description": "Fills gaps between tokens with a specified filler.",
      "vocab": [
        "J",
        "oCLrZaW",
        "no",
        "poiVg",
        "V",
        "b",
        "LB",
        "TPSI"
      ],
      "max_seq_len": 10,
      "min_seq_len": 4,
      "transformer_cfg": {
        "n_layers": 2,
        "d_model": 20,
        "n_ctx": 10,
        "d_head": 5,
        "model_name": "custom",
        "n_heads": 4,
        "d_mlp": 80,
        "act_fn": "gelu",
        "d_vocab": 10,
        "eps": 1e-05,
        "use_attn_result": true,
        "use_attn_scale": true,
        "use_split_qkv_input": true,
        "use_hook_mlp_in": true,
        "use_attn_in": false,
        "use_local_attn": false,
        "original_architecture": null,
        "from_checkpoint": false,
        "checkpoint_index": null,
        "checkpoint_label_type": null,
        "checkpoint_value": null,
        "tokenizer_name": null,
        "window_size": null,
        "attn_types": null,
        "init_mode": "gpt2",
        "normalization_type": null,
        "device": "cpu",
        "n_devices": 1,
        "attention_dir": "causal",
        "attn_only": false,
        "seed": 0,
        "initializer_range": 0.13333333333333333,
        "init_weights": true,
        "scale_attn_by_inverse_layer_idx": false,
        "positional_embedding_type": "standard",
        "final_rms": false,
        "d_vocab_out": 8,
        "parallel_attn_mlp": false,
        "rotary_dim": null,
        "n_params": 9600,
        "use_hook_tokens": false,
        "gated_mlp": false,
        "default_prepend_bos": true,
        "dtype": "torch.float32",
        "tokenizer_prepends_bos": null,
        "n_key_value_heads": null,
        "post_embedding_ln": false,
        "rotary_base": 10000,
        "trust_remote_code": false,
        "rotary_adjacent_pairs": false
      },
      "training_args": {
        "atol": 0.05,
        "lr": 0.01,
        "use_single_loss": false,
        "iit_weight": 1.0,
        "behavior_weight": 1.0,
        "strict_weight": 0.4,
        "epochs": 500,
        "act_fn": "gelu",
        "clip_grad_norm": 1.0,
        "lr_scheduler": ""
      }
    },
    {
      "case_id": "ioi",
      "files": [
        "corr_100_100_40.json",
        "ll_model_100_100_40.pth"
      ]
    },
    {
      "case_id": "ioi_next_token",
      "files": [
        "corr_100_100_40.json",
        "ll_model_100_100_40.pth",
        "training_args.json"
      ]
    }
  ]
}