diff --git "a/benchmark_metadata.json" "b/benchmark_metadata.json"
--- "a/benchmark_metadata.json"
+++ "b/benchmark_metadata.json"
@@ -5,50 +5,3437 @@
   "license": "https://creativecommons.org/licenses/by/4.0/",
   "url": "https://huggingface.co/cybershiptrooper/InterpBench",
   "cases": [
+    {
+      "case_id": "101",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/101",
+      "task_description": "Check if each element is a square of an integer.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        26,
+        27,
+        28,
+        29
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 32,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.12199885626608374,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 2,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl"
+    },
+    {
+      "case_id": "103",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/103",
+      "task_description": "Swap consecutive numbers in a list",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 3,
+        "d_model": 24,
+        "n_ctx": 10,
+        "d_head": 6,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 96,
+        "act_fn": "gelu",
+        "d_vocab": 13,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.10787197799411874,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 11,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 20736,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/103/edges.pkl"
+    },
     {
       "case_id": "11",
       "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11",
       "task_description": "Counts the number of words in a sequence based on their length.",
       "vocab": [
-        "J",
-        "LB",
-        "TPSI",
-        "V",
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "no",
+        "oCLrZaW",
+        "poiVg"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 12,
+        "n_ctx": 10,
+        "d_head": 3,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 48,
+        "act_fn": "gelu",
+        "d_vocab": 10,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.1460593486680443,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 5,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 3456,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.01,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 0.4,
+        "epochs": 500,
+        "act_fn": "gelu",
+        "clip_grad_norm": 1.0,
+        "lr_scheduler": ""
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
+    },
+    {
+      "case_id": "110",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/110",
+      "task_description": "Inserts zeros between each element, removing the latter half of the list.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 20,
+        "n_ctx": 10,
+        "d_head": 5,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 80,
+        "act_fn": "gelu",
+        "d_vocab": 13,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.11925695879998878,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 11,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 9600,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/110/edges.pkl"
+    },
+    {
+      "case_id": "111",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/111",
+      "task_description": "Returns the last element of the sequence and pads the rest with zeros.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 3,
+        "d_model": 24,
+        "n_ctx": 10,
+        "d_head": 6,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 96,
+        "act_fn": "gelu",
+        "d_vocab": 13,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.09847319278346618,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 11,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 20736,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/111/edges.pkl"
+    },
+    {
+      "case_id": "113",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/113",
+      "task_description": "Inverts the sequence if it is sorted in ascending order, otherwise leaves it unchanged.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        26,
+        27,
+        28,
+        29
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 7,
+        "d_model": 88,
+        "n_ctx": 10,
+        "d_head": 22,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 352,
+        "act_fn": "gelu",
+        "d_vocab": 32,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.0512147519731584,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 30,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 650496,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl"
+    },
+    {
+      "case_id": "114",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/114",
+      "task_description": "Apply a logarithm base 10 to each element of the input sequence.",
+      "vocab": [
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 12,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.14368424162141993,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 10,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/114/edges.pkl"
+    },
+    {
+      "case_id": "122",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/122",
+      "task_description": "Check if each number is divisible by 3.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        26,
+        27,
+        28,
+        29
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 32,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.12199885626608374,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 2,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl"
+    },
+    {
+      "case_id": "124",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/124",
+      "task_description": "Check if all elements in a list are equal.",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 3,
+        "d_model": 24,
+        "n_ctx": 10,
+        "d_head": 6,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 96,
+        "act_fn": "gelu",
+        "d_vocab": 13,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.11547005383792516,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 2,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 20736,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/124/edges.pkl"
+    },
+    {
+      "case_id": "129",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/129",
+      "task_description": "Checks if all elements are a multiple of n (set the default at 2).",
+      "vocab": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        26,
+        27,
+        28,
+        29
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 3,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 32,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.10504514628777804,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 2,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 576,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/129/edges.pkl"
+    },
+    {
+      "case_id": "13",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13",
+      "task_description": "Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",
+      "vocab": [
+        0,
+        1,
+        2
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 20,
+        "n_ctx": 10,
+        "d_head": 5,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 80,
+        "act_fn": "gelu",
+        "d_vocab": 5,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.1460593486680443,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 3,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 9600,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.01,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 0.4,
+        "epochs": 500,
+        "act_fn": "gelu",
+        "clip_grad_norm": 1.0,
+        "lr_scheduler": ""
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
+    },
+    {
+      "case_id": "14",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14",
+      "task_description": "Returns the count of 'a' in the input sequence.",
+      "vocab": [
+        "a",
+        "b",
+        "c"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 8,
+        "n_ctx": 10,
+        "d_head": 2,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 32,
+        "act_fn": "gelu",
+        "d_vocab": 5,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.15689290811054724,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 10,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 1536,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/edges.pkl"
+    },
+    {
+      "case_id": "18",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18",
+      "task_description": "Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",
+      "vocab": [
+        "a",
+        "b",
+        "c",
+        "d",
+        "e"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 26,
+        "n_ctx": 10,
+        "d_head": 6,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 104,
+        "act_fn": "gelu",
+        "d_vocab": 7,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.12344267996967354,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 3,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 15808,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 1.0,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl"
+    },
+    {
+      "case_id": "19",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19",
+      "task_description": "Removes consecutive duplicate tokens from a sequence.",
+      "vocab": [
+        "a",
+        "b",
+        "c"
+      ],
+      "max_seq_len": 15,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 32,
+        "n_ctx": 15,
+        "d_head": 8,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 128,
+        "act_fn": "gelu",
+        "d_vocab": 5,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.15689290811054724,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 3,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 24576,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 0.4,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": ""
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
+    },
+    {
+      "case_id": "2",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/2",
+      "task_description": "Reverse the input sequence.",
+      "vocab": [
+        "a",
+        "b",
+        "c",
+        "d",
+        "e",
+        "f",
+        "g",
+        "h",
+        "i",
+        "j",
+        "k",
+        "l",
+        "m",
+        "n",
+        "o",
+        "p",
+        "q",
+        "r",
+        "s",
+        "t",
+        "u",
+        "v",
+        "w",
+        "x",
+        "y",
+        "z"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 4,
+        "d_model": 56,
+        "n_ctx": 10,
+        "d_head": 14,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 224,
+        "act_fn": "gelu",
+        "d_vocab": 28,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.07593263966019993,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 26,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 150528,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl"
+    },
+    {
+      "case_id": "20",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20",
+      "task_description": "Detect spam messages based on appearance of spam keywords.",
+      "vocab": [
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "click",
+        "no",
+        "now",
+        "oCLrZaW",
+        "offer",
+        "poiVg",
+        "spam"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 13,
+        "n_ctx": 10,
+        "d_head": 3,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 52,
+        "act_fn": "gelu",
+        "d_vocab": 14,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.16,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 2,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 3952,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.01,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 0.4,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
+    },
+    {
+      "case_id": "21",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21",
+      "task_description": "Extract unique tokens from a string",
+      "vocab": [
+        "a",
+        "b",
+        "c"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 4,
+        "d_model": 50,
+        "n_ctx": 10,
+        "d_head": 12,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 200,
+        "act_fn": "gelu",
+        "d_vocab": 5,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.09847319278346618,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 3,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 118400,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.0005,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 0.5,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
+    },
+    {
+      "case_id": "24",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24",
+      "task_description": "Identifies the first occurrence of each token in a sequence.",
+      "vocab": [
+        "a",
+        "b",
+        "c"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 36,
+        "n_ctx": 10,
+        "d_head": 9,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 144,
+        "act_fn": "gelu",
+        "d_vocab": 5,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.1885618083164127,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 3,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 31104,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl"
+    },
+    {
+      "case_id": "25",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/25",
+      "task_description": "Normalizes token frequencies in a sequence to a range between 0 and 1.",
+      "vocab": [
+        "a",
+        "b",
+        "c"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 62,
+        "n_ctx": 10,
+        "d_head": 15,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 248,
+        "act_fn": "gelu",
+        "d_vocab": 5,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "bidirectional",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.08295613557843402,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 56,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 91264,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/25/edges.pkl"
+    },
+    {
+      "case_id": "26",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/26",
+      "task_description": "Creates a cascading effect by repeating each token in sequence incrementally.",
+      "vocab": [
+        "a",
+        "b",
+        "c"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 21,
+        "n_ctx": 10,
+        "d_head": 5,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 84,
+        "act_fn": "gelu",
+        "d_vocab": 5,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.12344267996967354,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 27,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 10416,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.01,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 0.4,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl"
+    },
+    {
+      "case_id": "29",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/29",
+      "task_description": "Creates abbreviations for each token in the sequence.",
+      "vocab": [
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "no",
+        "oCLrZaW",
+        "poiVg"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 13,
+        "n_ctx": 10,
+        "d_head": 3,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 52,
+        "act_fn": "gelu",
+        "d_vocab": 10,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.1539600717839002,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 8,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 3952,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.01,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 0.4,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl"
+    },
+    {
+      "case_id": "3",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3",
+      "task_description": "Returns the fraction of 'x' in the input up to the i-th position for all i.",
+      "vocab": [
+        "a",
+        "b",
+        "c",
+        "x"
+      ],
+      "max_seq_len": 5,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 12,
+        "n_ctx": 5,
+        "d_head": 3,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 48,
+        "act_fn": "gelu",
+        "d_vocab": 6,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.22188007849009167,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 1,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 3456,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.01,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 10.0,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict",
+        "same_size": false
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
+    },
+    {
+      "case_id": "30",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/30",
+      "task_description": "Tags numeric tokens in a sequence based on whether they fall within a given range.",
+      "vocab": [
+        "0",
+        "1",
+        "10",
+        "11",
+        "12",
+        "13",
+        "14",
+        "15",
+        "16",
+        "17",
+        "18",
+        "19",
+        "2",
+        "20",
+        "21",
+        "22",
+        "23",
+        "24",
+        "25",
+        "26",
+        "27",
+        "28",
+        "29",
+        "3",
+        "4",
+        "5",
+        "6",
+        "7",
+        "8",
+        "9"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 32,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.12199885626608374,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 2,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/30/edges.pkl"
+    },
+    {
+      "case_id": "31",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31",
+      "task_description": "Identify if tokens in the sequence are anagrams of the word 'listen'.",
+      "vocab": [
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "listen",
+        "no",
+        "oCLrZaW",
+        "poiVg"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 11,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.17056057308448835,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 2,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl",
+      "training_args": {
+        "output_dir": "/circuits-benchmark/results",
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": true,
+        "iit_weight": 1.0,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
+        "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl"
+    },
+    {
+      "case_id": "33",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33",
+      "task_description": "Checks if each token's length is odd or even.",
+      "vocab": [
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "no",
+        "oCLrZaW",
+        "poiVg"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 4,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 16,
+        "act_fn": "gelu",
+        "d_vocab": 10,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.17457431218879393,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 2,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 384,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.001,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 0.4,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": ""
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
+    },
+    {
+      "case_id": "34",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34",
+      "task_description": "Calculate the ratio of vowels to consonants in each word.",
+      "vocab": [
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "no",
+        "oCLrZaW",
+        "poiVg"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 16,
+        "n_ctx": 10,
+        "d_head": 4,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 64,
+        "act_fn": "gelu",
+        "d_vocab": 10,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.16329931618554522,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 5,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 6144,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.01,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 1.0,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
+    },
+    {
+      "case_id": "35",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35",
+      "task_description": "Alternates capitalization of each character in words.",
+      "vocab": [
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "no",
+        "oCLrZaW",
+        "poiVg"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 9,
+        "n_ctx": 10,
+        "d_head": 2,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 36,
+        "act_fn": "gelu",
+        "d_vocab": 10,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.1539600717839002,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 8,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 1872,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.01,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 1.0,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
+    },
+    {
+      "case_id": "36",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36",
+      "task_description": "Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",
+      "vocab": [
+        "\ud83d\udcd8",
+        "\ud83d\ude0a",
+        "\ud83d\ude22"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 6,
+        "n_ctx": 10,
+        "d_head": 1,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 24,
+        "act_fn": "gelu",
+        "d_vocab": 5,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.19402850002906638,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 3,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 768,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.01,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 1.0,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
+    },
+    {
+      "case_id": "37",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37",
+      "task_description": "Reverses each word in the sequence except for specified exclusions.",
+      "vocab": [
+        "J",
+        "LB",
+        "TPSI",
+        "V",
+        "b",
+        "no",
+        "oCLrZaW",
+        "poiVg"
+      ],
+      "max_seq_len": 10,
+      "min_seq_len": 4,
+      "files": [
+        {
+          "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
+        },
+        {
+          "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth"
+        },
+        {
+          "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl"
+        },
+        {
+          "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json"
+        }
+      ],
+      "transformer_cfg": {
+        "n_layers": 2,
+        "d_model": 12,
+        "n_ctx": 10,
+        "d_head": 3,
+        "model_name": "custom",
+        "n_heads": 4,
+        "d_mlp": 48,
+        "act_fn": "gelu",
+        "d_vocab": 10,
+        "eps": 1e-05,
+        "use_attn_result": true,
+        "use_attn_scale": true,
+        "use_split_qkv_input": true,
+        "use_hook_mlp_in": true,
+        "use_attn_in": false,
+        "use_local_attn": false,
+        "original_architecture": null,
+        "from_checkpoint": false,
+        "checkpoint_index": null,
+        "checkpoint_label_type": null,
+        "checkpoint_value": null,
+        "tokenizer_name": null,
+        "window_size": null,
+        "attn_types": null,
+        "init_mode": "gpt2",
+        "normalization_type": null,
+        "n_devices": 1,
+        "attention_dir": "causal",
+        "attn_only": false,
+        "seed": 0,
+        "initializer_range": 0.1539600717839002,
+        "init_weights": true,
+        "scale_attn_by_inverse_layer_idx": false,
+        "positional_embedding_type": "standard",
+        "final_rms": false,
+        "d_vocab_out": 8,
+        "parallel_attn_mlp": false,
+        "rotary_dim": null,
+        "n_params": 3456,
+        "use_hook_tokens": false,
+        "gated_mlp": false,
+        "default_prepend_bos": true,
+        "dtype": "torch.float32",
+        "tokenizer_prepends_bos": null,
+        "n_key_value_heads": null,
+        "post_embedding_ln": false,
+        "rotary_base": 10000,
+        "trust_remote_code": false,
+        "rotary_adjacent_pairs": false
+      },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl",
+      "training_args": {
+        "atol": 0.05,
+        "lr": 0.01,
+        "use_single_loss": false,
+        "iit_weight": 1.0,
+        "behavior_weight": 1.0,
+        "strict_weight": 1.0,
+        "epochs": 2000,
+        "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict"
+      },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
+    },
+    {
+      "case_id": "39",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/39",
+      "task_description": "Returns the fraction of 'x' in the input up to the i-th position for all i.",
+      "vocab": [
+        "a",
         "b",
-        "no",
-        "oCLrZaW",
-        "poiVg"
+        "c",
+        "d",
+        "e",
+        "f",
+        "g",
+        "h",
+        "i",
+        "j",
+        "k",
+        "l",
+        "m",
+        "n",
+        "o",
+        "p",
+        "q",
+        "r",
+        "s",
+        "t",
+        "u",
+        "v",
+        "w",
+        "x",
+        "y",
+        "z"
       ],
-      "max_seq_len": 10,
+      "max_seq_len": 60,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 12,
-        "n_ctx": 10,
-        "d_head": 3,
+        "d_model": 120,
+        "n_ctx": 60,
+        "d_head": 30,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 48,
+        "d_mlp": 480,
         "act_fn": "gelu",
-        "d_vocab": 10,
+        "d_vocab": 28,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -70,15 +3457,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.1460593486680443,
+        "initializer_range": 0.08432740427115679,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 5,
+        "d_vocab_out": 1,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 3456,
+        "n_params": 345600,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -88,52 +3475,74 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.01,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
+        "behavior_weight": 0.4,
         "strict_weight": 0.4,
-        "epochs": 500,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
-        "clip_grad_norm": 1.0,
-        "lr_scheduler": ""
+        "use_wandb": true,
+        "save_model_to_wandb": true,
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/39/edges.pkl"
     },
     {
-      "case_id": "13",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13",
-      "task_description": "Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",
+      "case_id": "4",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4",
+      "task_description": "Return fraction of previous open tokens minus the fraction of close tokens.",
       "vocab": [
-        0,
-        1,
-        2
+        "(",
+        ")",
+        "a",
+        "b",
+        "c"
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json"
         }
       ],
       "transformer_cfg": {
@@ -145,7 +3554,7 @@
         "n_heads": 4,
         "d_mlp": 80,
         "act_fn": "gelu",
-        "d_vocab": 5,
+        "d_vocab": 7,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -164,15 +3573,15 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "bidirectional",
+        "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.1460593486680443,
+        "initializer_range": 0.17056057308448835,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 3,
+        "d_vocab_out": 1,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
         "n_params": 9600,
@@ -187,64 +3596,88 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
-        "lr": 0.01,
+        "lr": 0.001,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
         "strict_weight": 0.4,
-        "epochs": 500,
+        "epochs": 2000,
         "act_fn": "gelu",
-        "clip_grad_norm": 1.0,
+        "clip_grad_norm": 0.1,
         "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
     },
     {
-      "case_id": "18",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18",
-      "task_description": "Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",
+      "case_id": "40",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/40",
+      "task_description": "Sum the last and previous to last digits of a number",
       "vocab": [
-        "a",
-        "b",
-        "c",
-        "d",
-        "e"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        26,
+        27,
+        28
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 26,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 6,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 104,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 7,
+        "d_vocab": 31,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -263,18 +3696,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "bidirectional",
+        "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.12344267996967354,
+        "initializer_range": 0.09847319278346618,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 3,
+        "d_vocab_out": 12,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 15808,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -284,65 +3717,92 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
         "lr": 0.001,
-        "use_single_loss": false,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
-        "strict_weight": 1.0,
-        "epochs": 2000,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict"
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/40/edges.pkl"
     },
     {
-      "case_id": "19",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19",
-      "task_description": "Removes consecutive duplicate tokens from a sequence.",
+      "case_id": "44",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44",
+      "task_description": "Replaces each element with the number of elements greater than it in the sequence",
       "vocab": [
-        "a",
-        "b",
-        "c"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
-      "max_seq_len": 15,
+      "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 32,
-        "n_ctx": 15,
-        "d_head": 8,
+        "d_model": 24,
+        "n_ctx": 10,
+        "d_head": 6,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 128,
+        "d_mlp": 96,
         "act_fn": "gelu",
-        "d_vocab": 5,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -361,18 +3821,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "causal",
+        "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.15689290811054724,
+        "initializer_range": 0.13719886811400708,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 3,
+        "d_vocab_out": 10,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 24576,
+        "n_params": 13824,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -382,73 +3842,92 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
         "lr": 0.001,
-        "use_single_loss": false,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
+        "behavior_weight": 0.4,
         "strict_weight": 0.4,
-        "epochs": 2000,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": ""
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl"
     },
     {
-      "case_id": "20",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20",
-      "task_description": "Detect spam messages based on appearance of spam keywords.",
+      "case_id": "45",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/45",
+      "task_description": "Doubles the first half of the sequence",
       "vocab": [
-        "J",
-        "LB",
-        "TPSI",
-        "V",
-        "b",
-        "click",
-        "no",
-        "now",
-        "oCLrZaW",
-        "offer",
-        "poiVg",
-        "spam"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 2,
-        "d_model": 13,
+        "n_layers": 3,
+        "d_model": 24,
         "n_ctx": 10,
-        "d_head": 3,
+        "d_head": 6,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 52,
+        "d_mlp": 96,
         "act_fn": "gelu",
-        "d_vocab": 14,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -467,18 +3946,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "causal",
+        "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.16,
+        "initializer_range": 0.11094003924504584,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 2,
+        "d_vocab_out": 16,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 3952,
+        "n_params": 20736,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -488,65 +3967,181 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.01,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
+        "behavior_weight": 0.4,
         "strict_weight": 0.4,
-        "epochs": 2000,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict"
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/45/edges.pkl"
     },
     {
-      "case_id": "21",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21",
-      "task_description": "Extract unique tokens from a string",
+      "case_id": "51",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/51",
+      "task_description": "Checks if each element is a Fibonacci number",
       "vocab": [
-        "a",
-        "b",
-        "c"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        26,
+        27,
+        28,
+        29,
+        30,
+        31,
+        32,
+        33,
+        34,
+        35,
+        36,
+        37,
+        38,
+        39,
+        40,
+        41,
+        42,
+        43,
+        44,
+        45,
+        46,
+        47,
+        48,
+        49,
+        50,
+        51,
+        52,
+        53,
+        54,
+        55,
+        56,
+        57,
+        58,
+        59,
+        60,
+        61,
+        62,
+        63,
+        64,
+        65,
+        66,
+        67,
+        68,
+        69,
+        70,
+        71,
+        72,
+        73,
+        74,
+        75,
+        76,
+        77,
+        78,
+        79,
+        80,
+        81,
+        82,
+        83,
+        84,
+        85,
+        86,
+        87,
+        88,
+        89,
+        90,
+        91,
+        92,
+        93,
+        94,
+        95,
+        96,
+        97,
+        98,
+        99
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 4,
-        "d_model": 50,
+        "n_layers": 2,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 12,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 200,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 5,
+        "d_vocab": 102,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -568,15 +4163,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.09847319278346618,
+        "initializer_range": 0.07525766947068778,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 3,
+        "d_vocab_out": 2,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 118400,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -586,65 +4181,92 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.0005,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
-        "strict_weight": 0.5,
-        "epochs": 2000,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict"
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/51/edges.pkl"
     },
     {
-      "case_id": "26",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/26",
-      "task_description": "Creates a cascading effect by repeating each token in sequence incrementally.",
+      "case_id": "56",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/56",
+      "task_description": "Sets every third element to zero.",
       "vocab": [
-        "a",
-        "b",
-        "c"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 21,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 5,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 84,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 5,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -666,15 +4288,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.12344267996967354,
+        "initializer_range": 0.13333333333333333,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 27,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 10416,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -684,70 +4306,92 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.01,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
+        "behavior_weight": 0.4,
         "strict_weight": 0.4,
-        "epochs": 2000,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict"
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/56/edges.pkl"
     },
     {
-      "case_id": "29",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/29",
-      "task_description": "Creates abbreviations for each token in the sequence.",
+      "case_id": "58",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/58",
+      "task_description": "Mirrors the first half of the sequence to the second half.",
       "vocab": [
-        "J",
-        "LB",
-        "TPSI",
-        "V",
-        "b",
-        "no",
-        "oCLrZaW",
-        "poiVg"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 2,
-        "d_model": 13,
+        "n_layers": 3,
+        "d_model": 32,
         "n_ctx": 10,
-        "d_head": 3,
+        "d_head": 8,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 52,
+        "d_mlp": 128,
         "act_fn": "gelu",
-        "d_vocab": 10,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -766,18 +4410,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "causal",
+        "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.1539600717839002,
+        "initializer_range": 0.10415112878465911,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 8,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 3952,
+        "n_params": 36864,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -787,66 +4431,92 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.01,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
+        "behavior_weight": 0.4,
         "strict_weight": 0.4,
-        "epochs": 2000,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict"
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/58/edges.pkl"
     },
     {
-      "case_id": "3",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3",
-      "task_description": "Returns the fraction of 'x' in the input up to the i-th position for all i.",
+      "case_id": "63",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/63",
+      "task_description": "Replaces each element with the number of elements less than it in the sequence.",
       "vocab": [
-        "a",
-        "b",
-        "c",
-        "x"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
-      "max_seq_len": 5,
+      "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 12,
-        "n_ctx": 5,
-        "d_head": 3,
+        "d_model": 24,
+        "n_ctx": 10,
+        "d_head": 6,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 48,
+        "d_mlp": 96,
         "act_fn": "gelu",
-        "d_vocab": 6,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -865,18 +4535,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "causal",
+        "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.22188007849009167,
+        "initializer_range": 0.13719886811400708,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 1,
+        "d_vocab_out": 10,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 3456,
+        "n_params": 13824,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -886,59 +4556,80 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.01,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
-        "strict_weight": 10.0,
-        "epochs": 2000,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
+        "lr_scheduler": "linear",
         "model_pair": "strict",
-        "same_size": false
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/63/edges.pkl"
     },
     {
-      "case_id": "33",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33",
-      "task_description": "Checks if each token's length is odd or even.",
+      "case_id": "69",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/69",
+      "task_description": "Assign -1, 0, or 1 to each element of the input sequence based on its sign.",
       "vocab": [
-        "J",
-        "LB",
-        "TPSI",
-        "V",
-        "b",
-        "no",
-        "oCLrZaW",
-        "poiVg"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/meta.json"
         }
       ],
       "transformer_cfg": {
@@ -950,7 +4641,7 @@
         "n_heads": 4,
         "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 10,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -972,7 +4663,7 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.17457431218879393,
+        "initializer_range": 0.16329931618554522,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
@@ -990,69 +4681,84 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
         "lr": 0.001,
-        "use_single_loss": false,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
+        "behavior_weight": 0.4,
         "strict_weight": 0.4,
-        "epochs": 2000,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": ""
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/69/edges.pkl"
     },
     {
-      "case_id": "34",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34",
-      "task_description": "Calculate the ratio of vowels to consonants in each word.",
+      "case_id": "7",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/7",
+      "task_description": "Returns the number of times each token occurs in the input.",
       "vocab": [
-        "J",
-        "LB",
-        "TPSI",
-        "V",
+        "a",
         "b",
-        "no",
-        "oCLrZaW",
-        "poiVg"
+        "c"
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 16,
+        "d_model": 17,
         "n_ctx": 10,
         "d_head": 4,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 64,
+        "d_mlp": 68,
         "act_fn": "gelu",
-        "d_vocab": 10,
+        "d_vocab": 5,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -1071,18 +4777,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "causal",
+        "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.16329931618554522,
+        "initializer_range": 0.15689290811054724,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 5,
+        "d_vocab_out": 10,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 6144,
+        "n_params": 6800,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1094,68 +4800,77 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.01,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
-        "strict_weight": 1.0,
+        "strict_weight": 0.5,
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
         "lr_scheduler": "",
-        "model_pair": "strict"
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 1234,
+        "batch_size": 256,
+        "include_mlp": false,
+        "next_token": false,
+        "detach_while_caching": true
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/edges.pkl"
     },
     {
-      "case_id": "35",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35",
-      "task_description": "Alternates capitalization of each character in words.",
+      "case_id": "79",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/79",
+      "task_description": "Check if each number in a sequence is prime",
       "vocab": [
-        "J",
-        "LB",
-        "TPSI",
-        "V",
-        "b",
-        "no",
-        "oCLrZaW",
-        "poiVg"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 9,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 2,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 36,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 10,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -1177,15 +4892,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.1539600717839002,
+        "initializer_range": 0.16329931618554522,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 8,
+        "d_vocab_out": 2,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 1872,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1195,65 +4910,92 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.01,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
-        "strict_weight": 1.0,
-        "epochs": 2000,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict"
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl"
     },
     {
-      "case_id": "36",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36",
-      "task_description": "Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",
+      "case_id": "82",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/82",
+      "task_description": "Halve the elements in the second half of the sequence.",
       "vocab": [
-        "\ud83d\udcd8",
-        "\ud83d\ude0a",
-        "\ud83d\ude22"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 2,
-        "d_model": 6,
+        "n_layers": 4,
+        "d_model": 24,
         "n_ctx": 10,
-        "d_head": 1,
+        "d_head": 6,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 24,
+        "d_mlp": 96,
         "act_fn": "gelu",
-        "d_vocab": 5,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -1272,18 +5014,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "causal",
+        "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.19402850002906638,
+        "initializer_range": 0.1059625885652035,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 3,
+        "d_vocab_out": 16,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 768,
+        "n_params": 27648,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1293,70 +5035,92 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.01,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
-        "strict_weight": 1.0,
-        "epochs": 2000,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict"
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl"
     },
     {
-      "case_id": "37",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37",
-      "task_description": "Reverses each word in the sequence except for specified exclusions.",
+      "case_id": "86",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/86",
+      "task_description": "Check if each element is a power of 2. Return 1 if true, otherwise 0.",
       "vocab": [
-        "J",
-        "LB",
-        "TPSI",
-        "V",
-        "b",
-        "no",
-        "oCLrZaW",
-        "poiVg"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 12,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 3,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 48,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 10,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -1378,15 +5142,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.1539600717839002,
+        "initializer_range": 0.16329931618554522,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 8,
+        "d_vocab_out": 2,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 3456,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1396,67 +5160,92 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.01,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
-        "strict_weight": 1.0,
-        "epochs": 2000,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
-        "model_pair": "strict"
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/86/edges.pkl"
     },
     {
-      "case_id": "4",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4",
-      "task_description": "Return fraction of previous open tokens minus the fraction of close tokens.",
+      "case_id": "87",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/87",
+      "task_description": "Binarize a sequence of integers using a threshold.",
       "vocab": [
-        "(",
-        ")",
-        "a",
-        "b",
-        "c"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 20,
+        "d_model": 4,
         "n_ctx": 10,
-        "d_head": 5,
+        "d_head": 1,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 80,
+        "d_mlp": 16,
         "act_fn": "gelu",
-        "d_vocab": 7,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -1478,15 +5267,15 @@
         "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.17056057308448835,
+        "initializer_range": 0.16329931618554522,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 1,
+        "d_vocab_out": 2,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 9600,
+        "n_params": 384,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1496,64 +5285,92 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
         "lr": 0.001,
-        "use_single_loss": false,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
+        "behavior_weight": 0.4,
         "strict_weight": 0.4,
-        "epochs": 2000,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": ""
+        "lr_scheduler": "linear",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 67,
+        "batch_size": 256,
+        "include_mlp": false,
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/87/edges.pkl"
     },
     {
-      "case_id": "7",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/7",
-      "task_description": "Returns the number of times each token occurs in the input.",
+      "case_id": "93",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/93",
+      "task_description": "Swaps the nth with the n+1th element if n%2==1.",
       "vocab": [
-        "a",
-        "b",
-        "c"
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/edges.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model.pth"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model_cfg.pkl"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/meta.json"
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/meta.json"
         }
       ],
       "transformer_cfg": {
-        "n_layers": 2,
-        "d_model": 17,
+        "n_layers": 3,
+        "d_model": 20,
         "n_ctx": 10,
-        "d_head": 4,
+        "d_head": 5,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 68,
+        "d_mlp": 80,
         "act_fn": "gelu",
-        "d_vocab": 5,
+        "d_vocab": 13,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -1575,15 +5392,15 @@
         "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.15689290811054724,
+        "initializer_range": 0.10886621079036347,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 10,
+        "d_vocab_out": 11,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 6800,
+        "n_params": 14400,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1593,31 +5410,44 @@
         "post_embedding_ln": false,
         "rotary_base": 10000,
         "trust_remote_code": false,
-        "rotary_adjacent_pairs": false
+        "rotary_adjacent_pairs": false,
+        "load_in_4bit": false,
+        "num_experts": null,
+        "experts_per_token": null
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model_cfg.pkl",
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model_cfg.pkl",
       "training_args": {
+        "output_dir": "/circuits-benchmark/results",
         "atol": 0.05,
-        "lr": 0.01,
-        "use_single_loss": false,
+        "lr": 0.001,
+        "use_single_loss": true,
         "iit_weight": 1.0,
-        "behavior_weight": 1.0,
-        "strict_weight": 0.5,
-        "epochs": 2000,
+        "behavior_weight": 0.4,
+        "strict_weight": 0.4,
+        "epochs": 1000,
+        "early_stop_accuracy_threshold": 99.9,
         "act_fn": "gelu",
+        "use_wandb": true,
+        "save_model_to_wandb": true,
         "clip_grad_norm": 0.1,
-        "lr_scheduler": "",
+        "lr_scheduler": "linear",
         "model_pair": "strict",
         "same_size": false,
-        "seed": 1234,
+        "seed": 67,
         "batch_size": 256,
         "include_mlp": false,
-        "next_token": false,
-        "detach_while_caching": true
+        "detach_while_caching": true,
+        "scheduler_val_metric": [
+          "val/accuracy",
+          "val/IIA",
+          "val/strict_accuracy"
+        ],
+        "siit_sampling": "sample_all",
+        "val_iia_sampling": "all"
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/edges.pkl"
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/93/edges.pkl"
     },
     {
       "case_id": "ioi",