67/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e945aab5d78b85637385b757100d2ecca98cd757462e675249ce4a6c74ef4fb3
3
+ size 667
67/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c03fe8705b1b01ab8f9020df80f3f06bd0d37780ec97f1de35ed385148ea79ba
3
+ size 75626
67/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ede2a8d814262f05c481f13b50315814be1ec23294e84ce9fdc0c86574f7a87c
3
+ size 1100
67/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 0.7, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 3000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-92-s-0.7-iit-0.7-b-0.4", "wandb_name": "case-67-seed-92-s-0.7-b-0.4-iit-0.7", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 92, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
71/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e945aab5d78b85637385b757100d2ecca98cd757462e675249ce4a6c74ef4fb3
3
+ size 667
71/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c34ead7aa3822ede0aa18af62f496f7482f392f73e509a8aacd564f13bc4dde3
3
+ size 77454
71/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87f7b3480ffb7944e2fc076320ff630bde55792609c0c56104352f379f593ad9
3
+ size 1100
71/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 0.4, "behavior_weight": 0.4, "strict_weight": 1.0, "epochs": 3000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-92-s-1-iit-0.4-b-0.4", "wandb_name": "case-71-seed-92-s-1-b-0.4-iit-0.4", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 92, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
91/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
3
+ size 113
91/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb2901707d8b6cac3afc14343af4defb5eeb00887f8b3bc9d77437be16cd0364
3
+ size 14734
91/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55939fede379285f7ea82d2f609634380a17b39d053cb68be0ae278d1f34cb52
3
+ size 1093
91/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 0.4, "behavior_weight": 0.4, "strict_weight": 1.0, "epochs": 3000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-92-s-1-iit-0.4-b-0.4", "wandb_name": "case-91-seed-92-s-1-b-0.4-iit-0.4", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 92, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
benchmark_cases_metadata.csv CHANGED
@@ -8,6 +8,7 @@ case_id,url,task_description,max_seq_len,min_seq_len,transformer_cfg_file_url,tr
8
  79,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/79,Check if each number in a sequence is prime,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
9
  24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl,2,36,10,9,custom,4,144,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1885618083164127,True,False,standard,False,3,False,31104,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
10
  82,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/82,Halve the elements in the second half of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl,4,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1059625885652035,True,False,standard,False,16,False,27648,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 
11
  31,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31,Identify if tokens in the sequence are anagrams of the word 'listen'.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl,2,4,10,1,custom,4,16,gelu,11,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
12
  72,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/72,Negate each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/edges.pkl,2,4,10,1,custom,4,16,gelu,22,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.11202240672224079,True,False,standard,False,20,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
13
  104,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/104,Apply exponential function to all elements of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
@@ -32,6 +33,7 @@ ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect O
32
  101,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/101,Check if each element is a square of an integer.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
33
  ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,"Indirect Object Identification (IOI) task, trained using next token prediction.",16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,,,,True,True,1.0,,,True,,256.0,True,True,"val/accuracy,val/IIA",,,True,0.65,False,0.0,True,max,,True,
34
  65,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/65,Calculate the cube root of each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 
35
  14,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14,Returns the count of 'a' in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/edges.pkl,2,8,10,2,custom,4,32,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.15689290811054724,True,False,standard,False,10,False,1536,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
36
  95,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/95,Counts the distinct prime factors of each number in the input list.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12060453783110546,True,False,standard,False,3,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
37
  84,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/84,Apply the arctangent function to each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
@@ -49,6 +51,7 @@ ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi
49
  122,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/122,Check if each number is divisible by 3.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
50
  85,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/85,Square each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
51
  2,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/2,Reverse the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl,4,56,10,14,custom,4,224,gelu,28,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.07593263966019993,True,False,standard,False,26,False,150528,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 
52
  44,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44,Replaces each element with the number of elements greater than it in the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.13719886811400708,True,False,standard,False,10,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
53
  113,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/113,"Inverts the sequence if it is sorted in ascending order, otherwise leaves it unchanged.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl,7,88,10,22,custom,4,352,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.0512147519731584,True,False,standard,False,30,False,650496,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
54
  77,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/77,Apply the tangent function to each element of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 
8
  79,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/79,Check if each number in a sequence is prime,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
9
  24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl,2,36,10,9,custom,4,144,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1885618083164127,True,False,standard,False,3,False,31104,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
10
  82,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/82,Halve the elements in the second half of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl,4,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1059625885652035,True,False,standard,False,16,False,27648,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
11
+ 71,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/71,Divide each element by the length of the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.08251369970070348,True,False,standard,False,60,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,0.4,0.4,1.0,3000.0,99.9,gelu,True,True,0.1,linear,strict,False,92.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
12
  31,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31,Identify if tokens in the sequence are anagrams of the word 'listen'.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl,2,4,10,1,custom,4,16,gelu,11,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
13
  72,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/72,Negate each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/edges.pkl,2,4,10,1,custom,4,16,gelu,22,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.11202240672224079,True,False,standard,False,20,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
14
  104,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/104,Apply exponential function to all elements of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 
33
  101,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/101,Check if each element is a square of an integer.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
34
  ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,"Indirect Object Identification (IOI) task, trained using next token prediction.",16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,,,,True,True,1.0,,,True,,256.0,True,True,"val/accuracy,val/IIA",,,True,0.65,False,0.0,True,max,,True,
35
  65,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/65,Calculate the cube root of each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
36
+ 91,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/91,Set all values below a threshold to 0,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.14368424162141993,True,False,standard,False,9,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,0.4,0.4,1.0,3000.0,99.9,gelu,True,True,0.1,linear,strict,False,92.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
37
  14,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14,Returns the count of 'a' in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/edges.pkl,2,8,10,2,custom,4,32,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.15689290811054724,True,False,standard,False,10,False,1536,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
38
  95,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/95,Counts the distinct prime factors of each number in the input list.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12060453783110546,True,False,standard,False,3,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
39
  84,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/84,Apply the arctangent function to each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
 
51
  122,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/122,Check if each number is divisible by 3.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
52
  85,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/85,Square each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
53
  2,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/2,Reverse the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl,4,56,10,14,custom,4,224,gelu,28,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.07593263966019993,True,False,standard,False,26,False,150528,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
54
+ 67,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/67,Multiply each element of the sequence by the length of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.0917662935482247,True,False,standard,False,42,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,0.7,0.4,0.7,3000.0,99.9,gelu,True,True,0.1,linear,strict,False,92.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
55
  44,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44,Replaces each element with the number of elements greater than it in the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.13719886811400708,True,False,standard,False,10,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
56
  113,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/113,"Inverts the sequence if it is sorted in ascending order, otherwise leaves it unchanged.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl,7,88,10,22,custom,4,352,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.0512147519731584,True,False,standard,False,30,False,650496,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
57
  77,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/77,Apply the tangent function to each element of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
benchmark_cases_metadata.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:049d63f22c918edb98d89c9eb69f091016c4780b033e5559523abd7e50188238
3
- size 76100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:085f02859e3d07ce9c964936ce063397f84ac7a80e01429a9da0efabd2aa2dea
3
+ size 76400
benchmark_metadata.json CHANGED
@@ -1051,6 +1051,131 @@
1051
  "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth",
1052
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl"
1053
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1054
  {
1055
  "case_id": "31",
1056
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31",
@@ -3991,6 +4116,131 @@
3991
  "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth",
3992
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl"
3993
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3994
  {
3995
  "case_id": "14",
3996
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14",
@@ -6181,6 +6431,131 @@
6181
  "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth",
6182
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl"
6183
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6184
  {
6185
  "case_id": "44",
6186
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44",
 
1051
  "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth",
1052
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl"
1053
  },
1054
+ {
1055
+ "case_id": "71",
1056
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/71",
1057
+ "task_description": "Divide each element by the length of the sequence",
1058
+ "vocab": [
1059
+ 0,
1060
+ 1,
1061
+ 2,
1062
+ 3,
1063
+ 4,
1064
+ 5,
1065
+ 6,
1066
+ 7,
1067
+ 8,
1068
+ 9,
1069
+ 10
1070
+ ],
1071
+ "max_seq_len": 10,
1072
+ "min_seq_len": 4,
1073
+ "files": [
1074
+ {
1075
+ "file_name": "edges.pkl",
1076
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/edges.pkl"
1077
+ },
1078
+ {
1079
+ "file_name": "ll_model.pth",
1080
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model.pth"
1081
+ },
1082
+ {
1083
+ "file_name": "ll_model_cfg.pkl",
1084
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model_cfg.pkl"
1085
+ },
1086
+ {
1087
+ "file_name": "meta.json",
1088
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/meta.json"
1089
+ }
1090
+ ],
1091
+ "transformer_cfg": {
1092
+ "n_layers": 2,
1093
+ "d_model": 24,
1094
+ "n_ctx": 10,
1095
+ "d_head": 6,
1096
+ "model_name": "custom",
1097
+ "n_heads": 4,
1098
+ "d_mlp": 96,
1099
+ "act_fn": "gelu",
1100
+ "d_vocab": 13,
1101
+ "eps": 1e-05,
1102
+ "use_attn_result": true,
1103
+ "use_attn_scale": true,
1104
+ "use_split_qkv_input": true,
1105
+ "use_hook_mlp_in": true,
1106
+ "use_attn_in": false,
1107
+ "use_local_attn": false,
1108
+ "original_architecture": null,
1109
+ "from_checkpoint": false,
1110
+ "checkpoint_index": null,
1111
+ "checkpoint_label_type": null,
1112
+ "checkpoint_value": null,
1113
+ "tokenizer_name": null,
1114
+ "window_size": null,
1115
+ "attn_types": null,
1116
+ "init_mode": "gpt2",
1117
+ "normalization_type": null,
1118
+ "n_devices": 1,
1119
+ "attention_dir": "bidirectional",
1120
+ "attn_only": false,
1121
+ "seed": 0,
1122
+ "initializer_range": 0.08251369970070348,
1123
+ "init_weights": true,
1124
+ "scale_attn_by_inverse_layer_idx": false,
1125
+ "positional_embedding_type": "standard",
1126
+ "final_rms": false,
1127
+ "d_vocab_out": 60,
1128
+ "parallel_attn_mlp": false,
1129
+ "rotary_dim": null,
1130
+ "n_params": 13824,
1131
+ "use_hook_tokens": false,
1132
+ "gated_mlp": false,
1133
+ "default_prepend_bos": true,
1134
+ "dtype": "torch.float32",
1135
+ "tokenizer_prepends_bos": null,
1136
+ "n_key_value_heads": null,
1137
+ "post_embedding_ln": false,
1138
+ "rotary_base": 10000,
1139
+ "trust_remote_code": false,
1140
+ "rotary_adjacent_pairs": false,
1141
+ "load_in_4bit": false,
1142
+ "num_experts": null,
1143
+ "experts_per_token": null
1144
+ },
1145
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model_cfg.pkl",
1146
+ "training_args": {
1147
+ "output_dir": "/circuits-benchmark/results",
1148
+ "atol": 0.05,
1149
+ "lr": 0.001,
1150
+ "use_single_loss": true,
1151
+ "iit_weight": 0.4,
1152
+ "behavior_weight": 0.4,
1153
+ "strict_weight": 1.0,
1154
+ "epochs": 3000,
1155
+ "early_stop_accuracy_threshold": 99.9,
1156
+ "act_fn": "gelu",
1157
+ "use_wandb": true,
1158
+ "save_model_to_wandb": true,
1159
+ "clip_grad_norm": 0.1,
1160
+ "lr_scheduler": "linear",
1161
+ "model_pair": "strict",
1162
+ "same_size": false,
1163
+ "seed": 92,
1164
+ "batch_size": 256,
1165
+ "include_mlp": false,
1166
+ "detach_while_caching": true,
1167
+ "scheduler_val_metric": [
1168
+ "val/accuracy",
1169
+ "val/IIA",
1170
+ "val/strict_accuracy"
1171
+ ],
1172
+ "siit_sampling": "sample_all",
1173
+ "val_iia_sampling": "all"
1174
+ },
1175
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/meta.json",
1176
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model.pth",
1177
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/edges.pkl"
1178
+ },
1179
  {
1180
  "case_id": "31",
1181
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31",
 
4116
  "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth",
4117
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl"
4118
  },
4119
+ {
4120
+ "case_id": "91",
4121
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/91",
4122
+ "task_description": "Set all values below a threshold to 0",
4123
+ "vocab": [
4124
+ 0,
4125
+ 1,
4126
+ 2,
4127
+ 3,
4128
+ 4,
4129
+ 5,
4130
+ 6,
4131
+ 7,
4132
+ 8,
4133
+ 9,
4134
+ 10
4135
+ ],
4136
+ "max_seq_len": 10,
4137
+ "min_seq_len": 4,
4138
+ "files": [
4139
+ {
4140
+ "file_name": "edges.pkl",
4141
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/edges.pkl"
4142
+ },
4143
+ {
4144
+ "file_name": "ll_model.pth",
4145
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model.pth"
4146
+ },
4147
+ {
4148
+ "file_name": "ll_model_cfg.pkl",
4149
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model_cfg.pkl"
4150
+ },
4151
+ {
4152
+ "file_name": "meta.json",
4153
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/meta.json"
4154
+ }
4155
+ ],
4156
+ "transformer_cfg": {
4157
+ "n_layers": 2,
4158
+ "d_model": 4,
4159
+ "n_ctx": 10,
4160
+ "d_head": 1,
4161
+ "model_name": "custom",
4162
+ "n_heads": 4,
4163
+ "d_mlp": 16,
4164
+ "act_fn": "gelu",
4165
+ "d_vocab": 13,
4166
+ "eps": 1e-05,
4167
+ "use_attn_result": true,
4168
+ "use_attn_scale": true,
4169
+ "use_split_qkv_input": true,
4170
+ "use_hook_mlp_in": true,
4171
+ "use_attn_in": false,
4172
+ "use_local_attn": false,
4173
+ "original_architecture": null,
4174
+ "from_checkpoint": false,
4175
+ "checkpoint_index": null,
4176
+ "checkpoint_label_type": null,
4177
+ "checkpoint_value": null,
4178
+ "tokenizer_name": null,
4179
+ "window_size": null,
4180
+ "attn_types": null,
4181
+ "init_mode": "gpt2",
4182
+ "normalization_type": null,
4183
+ "n_devices": 1,
4184
+ "attention_dir": "causal",
4185
+ "attn_only": false,
4186
+ "seed": 0,
4187
+ "initializer_range": 0.14368424162141993,
4188
+ "init_weights": true,
4189
+ "scale_attn_by_inverse_layer_idx": false,
4190
+ "positional_embedding_type": "standard",
4191
+ "final_rms": false,
4192
+ "d_vocab_out": 9,
4193
+ "parallel_attn_mlp": false,
4194
+ "rotary_dim": null,
4195
+ "n_params": 384,
4196
+ "use_hook_tokens": false,
4197
+ "gated_mlp": false,
4198
+ "default_prepend_bos": true,
4199
+ "dtype": "torch.float32",
4200
+ "tokenizer_prepends_bos": null,
4201
+ "n_key_value_heads": null,
4202
+ "post_embedding_ln": false,
4203
+ "rotary_base": 10000,
4204
+ "trust_remote_code": false,
4205
+ "rotary_adjacent_pairs": false,
4206
+ "load_in_4bit": false,
4207
+ "num_experts": null,
4208
+ "experts_per_token": null
4209
+ },
4210
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model_cfg.pkl",
4211
+ "training_args": {
4212
+ "output_dir": "/circuits-benchmark/results",
4213
+ "atol": 0.05,
4214
+ "lr": 0.001,
4215
+ "use_single_loss": true,
4216
+ "iit_weight": 0.4,
4217
+ "behavior_weight": 0.4,
4218
+ "strict_weight": 1.0,
4219
+ "epochs": 3000,
4220
+ "early_stop_accuracy_threshold": 99.9,
4221
+ "act_fn": "gelu",
4222
+ "use_wandb": true,
4223
+ "save_model_to_wandb": true,
4224
+ "clip_grad_norm": 0.1,
4225
+ "lr_scheduler": "linear",
4226
+ "model_pair": "strict",
4227
+ "same_size": false,
4228
+ "seed": 92,
4229
+ "batch_size": 256,
4230
+ "include_mlp": false,
4231
+ "detach_while_caching": true,
4232
+ "scheduler_val_metric": [
4233
+ "val/accuracy",
4234
+ "val/IIA",
4235
+ "val/strict_accuracy"
4236
+ ],
4237
+ "siit_sampling": "sample_all",
4238
+ "val_iia_sampling": "all"
4239
+ },
4240
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/meta.json",
4241
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model.pth",
4242
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/edges.pkl"
4243
+ },
4244
  {
4245
  "case_id": "14",
4246
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14",
 
6431
  "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth",
6432
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl"
6433
  },
6434
+ {
6435
+ "case_id": "67",
6436
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/67",
6437
+ "task_description": "Multiply each element of the sequence by the length of the sequence.",
6438
+ "vocab": [
6439
+ 0,
6440
+ 1,
6441
+ 2,
6442
+ 3,
6443
+ 4,
6444
+ 5,
6445
+ 6,
6446
+ 7,
6447
+ 8,
6448
+ 9,
6449
+ 10
6450
+ ],
6451
+ "max_seq_len": 10,
6452
+ "min_seq_len": 4,
6453
+ "files": [
6454
+ {
6455
+ "file_name": "edges.pkl",
6456
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/edges.pkl"
6457
+ },
6458
+ {
6459
+ "file_name": "ll_model.pth",
6460
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model.pth"
6461
+ },
6462
+ {
6463
+ "file_name": "ll_model_cfg.pkl",
6464
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model_cfg.pkl"
6465
+ },
6466
+ {
6467
+ "file_name": "meta.json",
6468
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/meta.json"
6469
+ }
6470
+ ],
6471
+ "transformer_cfg": {
6472
+ "n_layers": 2,
6473
+ "d_model": 24,
6474
+ "n_ctx": 10,
6475
+ "d_head": 6,
6476
+ "model_name": "custom",
6477
+ "n_heads": 4,
6478
+ "d_mlp": 96,
6479
+ "act_fn": "gelu",
6480
+ "d_vocab": 13,
6481
+ "eps": 1e-05,
6482
+ "use_attn_result": true,
6483
+ "use_attn_scale": true,
6484
+ "use_split_qkv_input": true,
6485
+ "use_hook_mlp_in": true,
6486
+ "use_attn_in": false,
6487
+ "use_local_attn": false,
6488
+ "original_architecture": null,
6489
+ "from_checkpoint": false,
6490
+ "checkpoint_index": null,
6491
+ "checkpoint_label_type": null,
6492
+ "checkpoint_value": null,
6493
+ "tokenizer_name": null,
6494
+ "window_size": null,
6495
+ "attn_types": null,
6496
+ "init_mode": "gpt2",
6497
+ "normalization_type": null,
6498
+ "n_devices": 1,
6499
+ "attention_dir": "bidirectional",
6500
+ "attn_only": false,
6501
+ "seed": 0,
6502
+ "initializer_range": 0.0917662935482247,
6503
+ "init_weights": true,
6504
+ "scale_attn_by_inverse_layer_idx": false,
6505
+ "positional_embedding_type": "standard",
6506
+ "final_rms": false,
6507
+ "d_vocab_out": 42,
6508
+ "parallel_attn_mlp": false,
6509
+ "rotary_dim": null,
6510
+ "n_params": 13824,
6511
+ "use_hook_tokens": false,
6512
+ "gated_mlp": false,
6513
+ "default_prepend_bos": true,
6514
+ "dtype": "torch.float32",
6515
+ "tokenizer_prepends_bos": null,
6516
+ "n_key_value_heads": null,
6517
+ "post_embedding_ln": false,
6518
+ "rotary_base": 10000,
6519
+ "trust_remote_code": false,
6520
+ "rotary_adjacent_pairs": false,
6521
+ "load_in_4bit": false,
6522
+ "num_experts": null,
6523
+ "experts_per_token": null
6524
+ },
6525
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model_cfg.pkl",
6526
+ "training_args": {
6527
+ "output_dir": "/circuits-benchmark/results",
6528
+ "atol": 0.05,
6529
+ "lr": 0.001,
6530
+ "use_single_loss": true,
6531
+ "iit_weight": 0.7,
6532
+ "behavior_weight": 0.4,
6533
+ "strict_weight": 0.7,
6534
+ "epochs": 3000,
6535
+ "early_stop_accuracy_threshold": 99.9,
6536
+ "act_fn": "gelu",
6537
+ "use_wandb": true,
6538
+ "save_model_to_wandb": true,
6539
+ "clip_grad_norm": 0.1,
6540
+ "lr_scheduler": "linear",
6541
+ "model_pair": "strict",
6542
+ "same_size": false,
6543
+ "seed": 92,
6544
+ "batch_size": 256,
6545
+ "include_mlp": false,
6546
+ "detach_while_caching": true,
6547
+ "scheduler_val_metric": [
6548
+ "val/accuracy",
6549
+ "val/IIA",
6550
+ "val/strict_accuracy"
6551
+ ],
6552
+ "siit_sampling": "sample_all",
6553
+ "val_iia_sampling": "all"
6554
+ },
6555
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/meta.json",
6556
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model.pth",
6557
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/edges.pkl"
6558
+ },
6559
  {
6560
  "case_id": "44",
6561
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44",