iarcuschin
commited on
Commit
•
217c82c
1
Parent(s):
92ed75d
Add new models
Browse files- 67/edges.pkl +3 -0
- 67/ll_model.pth +3 -0
- 67/ll_model_cfg.pkl +3 -0
- 67/meta.json +1 -0
- 71/edges.pkl +3 -0
- 71/ll_model.pth +3 -0
- 71/ll_model_cfg.pkl +3 -0
- 71/meta.json +1 -0
- 91/edges.pkl +3 -0
- 91/ll_model.pth +3 -0
- 91/ll_model_cfg.pkl +3 -0
- 91/meta.json +1 -0
- benchmark_cases_metadata.csv +3 -0
- benchmark_cases_metadata.parquet +2 -2
- benchmark_metadata.json +375 -0
67/edges.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e945aab5d78b85637385b757100d2ecca98cd757462e675249ce4a6c74ef4fb3
|
3 |
+
size 667
|
67/ll_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c03fe8705b1b01ab8f9020df80f3f06bd0d37780ec97f1de35ed385148ea79ba
|
3 |
+
size 75626
|
67/ll_model_cfg.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ede2a8d814262f05c481f13b50315814be1ec23294e84ce9fdc0c86574f7a87c
|
3 |
+
size 1100
|
67/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 0.7, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 3000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-92-s-0.7-iit-0.7-b-0.4", "wandb_name": "case-67-seed-92-s-0.7-b-0.4-iit-0.7", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 92, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
71/edges.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e945aab5d78b85637385b757100d2ecca98cd757462e675249ce4a6c74ef4fb3
|
3 |
+
size 667
|
71/ll_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c34ead7aa3822ede0aa18af62f496f7482f392f73e509a8aacd564f13bc4dde3
|
3 |
+
size 77454
|
71/ll_model_cfg.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87f7b3480ffb7944e2fc076320ff630bde55792609c0c56104352f379f593ad9
|
3 |
+
size 1100
|
71/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 0.4, "behavior_weight": 0.4, "strict_weight": 1.0, "epochs": 3000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-92-s-1-iit-0.4-b-0.4", "wandb_name": "case-71-seed-92-s-1-b-0.4-iit-0.4", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 92, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
91/edges.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
|
3 |
+
size 113
|
91/ll_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb2901707d8b6cac3afc14343af4defb5eeb00887f8b3bc9d77437be16cd0364
|
3 |
+
size 14734
|
91/ll_model_cfg.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55939fede379285f7ea82d2f609634380a17b39d053cb68be0ae278d1f34cb52
|
3 |
+
size 1093
|
91/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 0.4, "behavior_weight": 0.4, "strict_weight": 1.0, "epochs": 3000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-92-s-1-iit-0.4-b-0.4", "wandb_name": "case-91-seed-92-s-1-b-0.4-iit-0.4", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 92, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
benchmark_cases_metadata.csv
CHANGED
@@ -8,6 +8,7 @@ case_id,url,task_description,max_seq_len,min_seq_len,transformer_cfg_file_url,tr
|
|
8 |
79,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/79,Check if each number in a sequence is prime,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
9 |
24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl,2,36,10,9,custom,4,144,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1885618083164127,True,False,standard,False,3,False,31104,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
10 |
82,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/82,Halve the elements in the second half of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl,4,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1059625885652035,True,False,standard,False,16,False,27648,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
|
|
11 |
31,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31,Identify if tokens in the sequence are anagrams of the word 'listen'.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl,2,4,10,1,custom,4,16,gelu,11,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
12 |
72,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/72,Negate each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/edges.pkl,2,4,10,1,custom,4,16,gelu,22,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.11202240672224079,True,False,standard,False,20,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
13 |
104,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/104,Apply exponential function to all elements of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
@@ -32,6 +33,7 @@ ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect O
|
|
32 |
101,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/101,Check if each element is a square of an integer.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
33 |
ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,"Indirect Object Identification (IOI) task, trained using next token prediction.",16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,,,,True,True,1.0,,,True,,256.0,True,True,"val/accuracy,val/IIA",,,True,0.65,False,0.0,True,max,,True,
|
34 |
65,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/65,Calculate the cube root of each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
|
|
35 |
14,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14,Returns the count of 'a' in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/edges.pkl,2,8,10,2,custom,4,32,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.15689290811054724,True,False,standard,False,10,False,1536,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
36 |
95,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/95,Counts the distinct prime factors of each number in the input list.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12060453783110546,True,False,standard,False,3,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
37 |
84,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/84,Apply the arctangent function to each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
@@ -49,6 +51,7 @@ ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi
|
|
49 |
122,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/122,Check if each number is divisible by 3.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
50 |
85,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/85,Square each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
51 |
2,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/2,Reverse the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl,4,56,10,14,custom,4,224,gelu,28,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.07593263966019993,True,False,standard,False,26,False,150528,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
|
|
52 |
44,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44,Replaces each element with the number of elements greater than it in the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.13719886811400708,True,False,standard,False,10,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
53 |
113,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/113,"Inverts the sequence if it is sorted in ascending order, otherwise leaves it unchanged.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl,7,88,10,22,custom,4,352,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.0512147519731584,True,False,standard,False,30,False,650496,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
54 |
77,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/77,Apply the tangent function to each element of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
|
|
8 |
79,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/79,Check if each number in a sequence is prime,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/79/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
9 |
24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl,2,36,10,9,custom,4,144,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1885618083164127,True,False,standard,False,3,False,31104,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
10 |
82,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/82,Halve the elements in the second half of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl,4,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1059625885652035,True,False,standard,False,16,False,27648,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
11 |
+
71,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/71,Divide each element by the length of the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.08251369970070348,True,False,standard,False,60,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,0.4,0.4,1.0,3000.0,99.9,gelu,True,True,0.1,linear,strict,False,92.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
12 |
31,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31,Identify if tokens in the sequence are anagrams of the word 'listen'.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/31/edges.pkl,2,4,10,1,custom,4,16,gelu,11,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
13 |
72,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/72,Negate each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/72/edges.pkl,2,4,10,1,custom,4,16,gelu,22,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.11202240672224079,True,False,standard,False,20,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
14 |
104,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/104,Apply exponential function to all elements of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/104/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
|
|
33 |
101,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/101,Check if each element is a square of an integer.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/101/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
34 |
ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,"Indirect Object Identification (IOI) task, trained using next token prediction.",16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,True,,0.05,0.001,False,1.0,1.0,0.4,,,,True,True,1.0,,,True,,256.0,True,True,"val/accuracy,val/IIA",,,True,0.65,False,0.0,True,max,,True,
|
35 |
65,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/65,Calculate the cube root of each element in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
36 |
+
91,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/91,Set all values below a threshold to 0,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.14368424162141993,True,False,standard,False,9,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,0.4,0.4,1.0,3000.0,99.9,gelu,True,True,0.1,linear,strict,False,92.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
37 |
14,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14,Returns the count of 'a' in the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/14/edges.pkl,2,8,10,2,custom,4,32,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.15689290811054724,True,False,standard,False,10,False,1536,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
38 |
95,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/95,Counts the distinct prime factors of each number in the input list.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/95/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12060453783110546,True,False,standard,False,3,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
39 |
84,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/84,Apply the arctangent function to each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/84/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
|
|
51 |
122,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/122,Check if each number is divisible by 3.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/122/edges.pkl,2,4,10,1,custom,4,16,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12199885626608374,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
52 |
85,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/85,Square each element of the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/85/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
53 |
2,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/2,Reverse the input sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl,4,56,10,14,custom,4,224,gelu,28,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.07593263966019993,True,False,standard,False,26,False,150528,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
54 |
+
67,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/67,Multiply each element of the sequence by the length of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.0917662935482247,True,False,standard,False,42,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,0.7,0.4,0.7,3000.0,99.9,gelu,True,True,0.1,linear,strict,False,92.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
55 |
44,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44,Replaces each element with the number of elements greater than it in the sequence,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/44/edges.pkl,2,24,10,6,custom,4,96,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.13719886811400708,True,False,standard,False,10,False,13824,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
56 |
113,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/113,"Inverts the sequence if it is sorted in ascending order, otherwise leaves it unchanged.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/113/edges.pkl,7,88,10,22,custom,4,352,gelu,32,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.0512147519731584,True,False,standard,False,30,False,650496,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.4,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
57 |
77,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/77,Apply the tangent function to each element of the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/77/edges.pkl,2,4,10,1,custom,4,16,gelu,13,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1392621247645583,True,False,standard,False,11,False,384,False,False,True,torch.float32,False,False,10000,False,False,False,/circuits-benchmark/results,0.05,0.001,True,1.0,0.4,0.7,1000.0,99.9,gelu,True,True,0.1,linear,strict,False,67.0,256.0,False,True,"val/accuracy,val/IIA,val/strict_accuracy",sample_all,all,True,,True,,True,,,True,
|
benchmark_cases_metadata.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:085f02859e3d07ce9c964936ce063397f84ac7a80e01429a9da0efabd2aa2dea
|
3 |
+
size 76400
|
benchmark_metadata.json
CHANGED
@@ -1051,6 +1051,131 @@
|
|
1051 |
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth",
|
1052 |
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl"
|
1053 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1054 |
{
|
1055 |
"case_id": "31",
|
1056 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31",
|
@@ -3991,6 +4116,131 @@
|
|
3991 |
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth",
|
3992 |
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl"
|
3993 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3994 |
{
|
3995 |
"case_id": "14",
|
3996 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14",
|
@@ -6181,6 +6431,131 @@
|
|
6181 |
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth",
|
6182 |
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl"
|
6183 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6184 |
{
|
6185 |
"case_id": "44",
|
6186 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44",
|
|
|
1051 |
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/ll_model.pth",
|
1052 |
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/82/edges.pkl"
|
1053 |
},
|
1054 |
+
{
|
1055 |
+
"case_id": "71",
|
1056 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/71",
|
1057 |
+
"task_description": "Divide each element by the length of the sequence",
|
1058 |
+
"vocab": [
|
1059 |
+
0,
|
1060 |
+
1,
|
1061 |
+
2,
|
1062 |
+
3,
|
1063 |
+
4,
|
1064 |
+
5,
|
1065 |
+
6,
|
1066 |
+
7,
|
1067 |
+
8,
|
1068 |
+
9,
|
1069 |
+
10
|
1070 |
+
],
|
1071 |
+
"max_seq_len": 10,
|
1072 |
+
"min_seq_len": 4,
|
1073 |
+
"files": [
|
1074 |
+
{
|
1075 |
+
"file_name": "edges.pkl",
|
1076 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/edges.pkl"
|
1077 |
+
},
|
1078 |
+
{
|
1079 |
+
"file_name": "ll_model.pth",
|
1080 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model.pth"
|
1081 |
+
},
|
1082 |
+
{
|
1083 |
+
"file_name": "ll_model_cfg.pkl",
|
1084 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model_cfg.pkl"
|
1085 |
+
},
|
1086 |
+
{
|
1087 |
+
"file_name": "meta.json",
|
1088 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/meta.json"
|
1089 |
+
}
|
1090 |
+
],
|
1091 |
+
"transformer_cfg": {
|
1092 |
+
"n_layers": 2,
|
1093 |
+
"d_model": 24,
|
1094 |
+
"n_ctx": 10,
|
1095 |
+
"d_head": 6,
|
1096 |
+
"model_name": "custom",
|
1097 |
+
"n_heads": 4,
|
1098 |
+
"d_mlp": 96,
|
1099 |
+
"act_fn": "gelu",
|
1100 |
+
"d_vocab": 13,
|
1101 |
+
"eps": 1e-05,
|
1102 |
+
"use_attn_result": true,
|
1103 |
+
"use_attn_scale": true,
|
1104 |
+
"use_split_qkv_input": true,
|
1105 |
+
"use_hook_mlp_in": true,
|
1106 |
+
"use_attn_in": false,
|
1107 |
+
"use_local_attn": false,
|
1108 |
+
"original_architecture": null,
|
1109 |
+
"from_checkpoint": false,
|
1110 |
+
"checkpoint_index": null,
|
1111 |
+
"checkpoint_label_type": null,
|
1112 |
+
"checkpoint_value": null,
|
1113 |
+
"tokenizer_name": null,
|
1114 |
+
"window_size": null,
|
1115 |
+
"attn_types": null,
|
1116 |
+
"init_mode": "gpt2",
|
1117 |
+
"normalization_type": null,
|
1118 |
+
"n_devices": 1,
|
1119 |
+
"attention_dir": "bidirectional",
|
1120 |
+
"attn_only": false,
|
1121 |
+
"seed": 0,
|
1122 |
+
"initializer_range": 0.08251369970070348,
|
1123 |
+
"init_weights": true,
|
1124 |
+
"scale_attn_by_inverse_layer_idx": false,
|
1125 |
+
"positional_embedding_type": "standard",
|
1126 |
+
"final_rms": false,
|
1127 |
+
"d_vocab_out": 60,
|
1128 |
+
"parallel_attn_mlp": false,
|
1129 |
+
"rotary_dim": null,
|
1130 |
+
"n_params": 13824,
|
1131 |
+
"use_hook_tokens": false,
|
1132 |
+
"gated_mlp": false,
|
1133 |
+
"default_prepend_bos": true,
|
1134 |
+
"dtype": "torch.float32",
|
1135 |
+
"tokenizer_prepends_bos": null,
|
1136 |
+
"n_key_value_heads": null,
|
1137 |
+
"post_embedding_ln": false,
|
1138 |
+
"rotary_base": 10000,
|
1139 |
+
"trust_remote_code": false,
|
1140 |
+
"rotary_adjacent_pairs": false,
|
1141 |
+
"load_in_4bit": false,
|
1142 |
+
"num_experts": null,
|
1143 |
+
"experts_per_token": null
|
1144 |
+
},
|
1145 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model_cfg.pkl",
|
1146 |
+
"training_args": {
|
1147 |
+
"output_dir": "/circuits-benchmark/results",
|
1148 |
+
"atol": 0.05,
|
1149 |
+
"lr": 0.001,
|
1150 |
+
"use_single_loss": true,
|
1151 |
+
"iit_weight": 0.4,
|
1152 |
+
"behavior_weight": 0.4,
|
1153 |
+
"strict_weight": 1.0,
|
1154 |
+
"epochs": 3000,
|
1155 |
+
"early_stop_accuracy_threshold": 99.9,
|
1156 |
+
"act_fn": "gelu",
|
1157 |
+
"use_wandb": true,
|
1158 |
+
"save_model_to_wandb": true,
|
1159 |
+
"clip_grad_norm": 0.1,
|
1160 |
+
"lr_scheduler": "linear",
|
1161 |
+
"model_pair": "strict",
|
1162 |
+
"same_size": false,
|
1163 |
+
"seed": 92,
|
1164 |
+
"batch_size": 256,
|
1165 |
+
"include_mlp": false,
|
1166 |
+
"detach_while_caching": true,
|
1167 |
+
"scheduler_val_metric": [
|
1168 |
+
"val/accuracy",
|
1169 |
+
"val/IIA",
|
1170 |
+
"val/strict_accuracy"
|
1171 |
+
],
|
1172 |
+
"siit_sampling": "sample_all",
|
1173 |
+
"val_iia_sampling": "all"
|
1174 |
+
},
|
1175 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/meta.json",
|
1176 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/ll_model.pth",
|
1177 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/71/edges.pkl"
|
1178 |
+
},
|
1179 |
{
|
1180 |
"case_id": "31",
|
1181 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/31",
|
|
|
4116 |
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/ll_model.pth",
|
4117 |
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/65/edges.pkl"
|
4118 |
},
|
4119 |
+
{
|
4120 |
+
"case_id": "91",
|
4121 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/91",
|
4122 |
+
"task_description": "Set all values below a threshold to 0",
|
4123 |
+
"vocab": [
|
4124 |
+
0,
|
4125 |
+
1,
|
4126 |
+
2,
|
4127 |
+
3,
|
4128 |
+
4,
|
4129 |
+
5,
|
4130 |
+
6,
|
4131 |
+
7,
|
4132 |
+
8,
|
4133 |
+
9,
|
4134 |
+
10
|
4135 |
+
],
|
4136 |
+
"max_seq_len": 10,
|
4137 |
+
"min_seq_len": 4,
|
4138 |
+
"files": [
|
4139 |
+
{
|
4140 |
+
"file_name": "edges.pkl",
|
4141 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/edges.pkl"
|
4142 |
+
},
|
4143 |
+
{
|
4144 |
+
"file_name": "ll_model.pth",
|
4145 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model.pth"
|
4146 |
+
},
|
4147 |
+
{
|
4148 |
+
"file_name": "ll_model_cfg.pkl",
|
4149 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model_cfg.pkl"
|
4150 |
+
},
|
4151 |
+
{
|
4152 |
+
"file_name": "meta.json",
|
4153 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/meta.json"
|
4154 |
+
}
|
4155 |
+
],
|
4156 |
+
"transformer_cfg": {
|
4157 |
+
"n_layers": 2,
|
4158 |
+
"d_model": 4,
|
4159 |
+
"n_ctx": 10,
|
4160 |
+
"d_head": 1,
|
4161 |
+
"model_name": "custom",
|
4162 |
+
"n_heads": 4,
|
4163 |
+
"d_mlp": 16,
|
4164 |
+
"act_fn": "gelu",
|
4165 |
+
"d_vocab": 13,
|
4166 |
+
"eps": 1e-05,
|
4167 |
+
"use_attn_result": true,
|
4168 |
+
"use_attn_scale": true,
|
4169 |
+
"use_split_qkv_input": true,
|
4170 |
+
"use_hook_mlp_in": true,
|
4171 |
+
"use_attn_in": false,
|
4172 |
+
"use_local_attn": false,
|
4173 |
+
"original_architecture": null,
|
4174 |
+
"from_checkpoint": false,
|
4175 |
+
"checkpoint_index": null,
|
4176 |
+
"checkpoint_label_type": null,
|
4177 |
+
"checkpoint_value": null,
|
4178 |
+
"tokenizer_name": null,
|
4179 |
+
"window_size": null,
|
4180 |
+
"attn_types": null,
|
4181 |
+
"init_mode": "gpt2",
|
4182 |
+
"normalization_type": null,
|
4183 |
+
"n_devices": 1,
|
4184 |
+
"attention_dir": "causal",
|
4185 |
+
"attn_only": false,
|
4186 |
+
"seed": 0,
|
4187 |
+
"initializer_range": 0.14368424162141993,
|
4188 |
+
"init_weights": true,
|
4189 |
+
"scale_attn_by_inverse_layer_idx": false,
|
4190 |
+
"positional_embedding_type": "standard",
|
4191 |
+
"final_rms": false,
|
4192 |
+
"d_vocab_out": 9,
|
4193 |
+
"parallel_attn_mlp": false,
|
4194 |
+
"rotary_dim": null,
|
4195 |
+
"n_params": 384,
|
4196 |
+
"use_hook_tokens": false,
|
4197 |
+
"gated_mlp": false,
|
4198 |
+
"default_prepend_bos": true,
|
4199 |
+
"dtype": "torch.float32",
|
4200 |
+
"tokenizer_prepends_bos": null,
|
4201 |
+
"n_key_value_heads": null,
|
4202 |
+
"post_embedding_ln": false,
|
4203 |
+
"rotary_base": 10000,
|
4204 |
+
"trust_remote_code": false,
|
4205 |
+
"rotary_adjacent_pairs": false,
|
4206 |
+
"load_in_4bit": false,
|
4207 |
+
"num_experts": null,
|
4208 |
+
"experts_per_token": null
|
4209 |
+
},
|
4210 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model_cfg.pkl",
|
4211 |
+
"training_args": {
|
4212 |
+
"output_dir": "/circuits-benchmark/results",
|
4213 |
+
"atol": 0.05,
|
4214 |
+
"lr": 0.001,
|
4215 |
+
"use_single_loss": true,
|
4216 |
+
"iit_weight": 0.4,
|
4217 |
+
"behavior_weight": 0.4,
|
4218 |
+
"strict_weight": 1.0,
|
4219 |
+
"epochs": 3000,
|
4220 |
+
"early_stop_accuracy_threshold": 99.9,
|
4221 |
+
"act_fn": "gelu",
|
4222 |
+
"use_wandb": true,
|
4223 |
+
"save_model_to_wandb": true,
|
4224 |
+
"clip_grad_norm": 0.1,
|
4225 |
+
"lr_scheduler": "linear",
|
4226 |
+
"model_pair": "strict",
|
4227 |
+
"same_size": false,
|
4228 |
+
"seed": 92,
|
4229 |
+
"batch_size": 256,
|
4230 |
+
"include_mlp": false,
|
4231 |
+
"detach_while_caching": true,
|
4232 |
+
"scheduler_val_metric": [
|
4233 |
+
"val/accuracy",
|
4234 |
+
"val/IIA",
|
4235 |
+
"val/strict_accuracy"
|
4236 |
+
],
|
4237 |
+
"siit_sampling": "sample_all",
|
4238 |
+
"val_iia_sampling": "all"
|
4239 |
+
},
|
4240 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/meta.json",
|
4241 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/ll_model.pth",
|
4242 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/91/edges.pkl"
|
4243 |
+
},
|
4244 |
{
|
4245 |
"case_id": "14",
|
4246 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/14",
|
|
|
6431 |
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/ll_model.pth",
|
6432 |
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/2/edges.pkl"
|
6433 |
},
|
6434 |
+
{
|
6435 |
+
"case_id": "67",
|
6436 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/67",
|
6437 |
+
"task_description": "Multiply each element of the sequence by the length of the sequence.",
|
6438 |
+
"vocab": [
|
6439 |
+
0,
|
6440 |
+
1,
|
6441 |
+
2,
|
6442 |
+
3,
|
6443 |
+
4,
|
6444 |
+
5,
|
6445 |
+
6,
|
6446 |
+
7,
|
6447 |
+
8,
|
6448 |
+
9,
|
6449 |
+
10
|
6450 |
+
],
|
6451 |
+
"max_seq_len": 10,
|
6452 |
+
"min_seq_len": 4,
|
6453 |
+
"files": [
|
6454 |
+
{
|
6455 |
+
"file_name": "edges.pkl",
|
6456 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/edges.pkl"
|
6457 |
+
},
|
6458 |
+
{
|
6459 |
+
"file_name": "ll_model.pth",
|
6460 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model.pth"
|
6461 |
+
},
|
6462 |
+
{
|
6463 |
+
"file_name": "ll_model_cfg.pkl",
|
6464 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model_cfg.pkl"
|
6465 |
+
},
|
6466 |
+
{
|
6467 |
+
"file_name": "meta.json",
|
6468 |
+
"url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/meta.json"
|
6469 |
+
}
|
6470 |
+
],
|
6471 |
+
"transformer_cfg": {
|
6472 |
+
"n_layers": 2,
|
6473 |
+
"d_model": 24,
|
6474 |
+
"n_ctx": 10,
|
6475 |
+
"d_head": 6,
|
6476 |
+
"model_name": "custom",
|
6477 |
+
"n_heads": 4,
|
6478 |
+
"d_mlp": 96,
|
6479 |
+
"act_fn": "gelu",
|
6480 |
+
"d_vocab": 13,
|
6481 |
+
"eps": 1e-05,
|
6482 |
+
"use_attn_result": true,
|
6483 |
+
"use_attn_scale": true,
|
6484 |
+
"use_split_qkv_input": true,
|
6485 |
+
"use_hook_mlp_in": true,
|
6486 |
+
"use_attn_in": false,
|
6487 |
+
"use_local_attn": false,
|
6488 |
+
"original_architecture": null,
|
6489 |
+
"from_checkpoint": false,
|
6490 |
+
"checkpoint_index": null,
|
6491 |
+
"checkpoint_label_type": null,
|
6492 |
+
"checkpoint_value": null,
|
6493 |
+
"tokenizer_name": null,
|
6494 |
+
"window_size": null,
|
6495 |
+
"attn_types": null,
|
6496 |
+
"init_mode": "gpt2",
|
6497 |
+
"normalization_type": null,
|
6498 |
+
"n_devices": 1,
|
6499 |
+
"attention_dir": "bidirectional",
|
6500 |
+
"attn_only": false,
|
6501 |
+
"seed": 0,
|
6502 |
+
"initializer_range": 0.0917662935482247,
|
6503 |
+
"init_weights": true,
|
6504 |
+
"scale_attn_by_inverse_layer_idx": false,
|
6505 |
+
"positional_embedding_type": "standard",
|
6506 |
+
"final_rms": false,
|
6507 |
+
"d_vocab_out": 42,
|
6508 |
+
"parallel_attn_mlp": false,
|
6509 |
+
"rotary_dim": null,
|
6510 |
+
"n_params": 13824,
|
6511 |
+
"use_hook_tokens": false,
|
6512 |
+
"gated_mlp": false,
|
6513 |
+
"default_prepend_bos": true,
|
6514 |
+
"dtype": "torch.float32",
|
6515 |
+
"tokenizer_prepends_bos": null,
|
6516 |
+
"n_key_value_heads": null,
|
6517 |
+
"post_embedding_ln": false,
|
6518 |
+
"rotary_base": 10000,
|
6519 |
+
"trust_remote_code": false,
|
6520 |
+
"rotary_adjacent_pairs": false,
|
6521 |
+
"load_in_4bit": false,
|
6522 |
+
"num_experts": null,
|
6523 |
+
"experts_per_token": null
|
6524 |
+
},
|
6525 |
+
"transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model_cfg.pkl",
|
6526 |
+
"training_args": {
|
6527 |
+
"output_dir": "/circuits-benchmark/results",
|
6528 |
+
"atol": 0.05,
|
6529 |
+
"lr": 0.001,
|
6530 |
+
"use_single_loss": true,
|
6531 |
+
"iit_weight": 0.7,
|
6532 |
+
"behavior_weight": 0.4,
|
6533 |
+
"strict_weight": 0.7,
|
6534 |
+
"epochs": 3000,
|
6535 |
+
"early_stop_accuracy_threshold": 99.9,
|
6536 |
+
"act_fn": "gelu",
|
6537 |
+
"use_wandb": true,
|
6538 |
+
"save_model_to_wandb": true,
|
6539 |
+
"clip_grad_norm": 0.1,
|
6540 |
+
"lr_scheduler": "linear",
|
6541 |
+
"model_pair": "strict",
|
6542 |
+
"same_size": false,
|
6543 |
+
"seed": 92,
|
6544 |
+
"batch_size": 256,
|
6545 |
+
"include_mlp": false,
|
6546 |
+
"detach_while_caching": true,
|
6547 |
+
"scheduler_val_metric": [
|
6548 |
+
"val/accuracy",
|
6549 |
+
"val/IIA",
|
6550 |
+
"val/strict_accuracy"
|
6551 |
+
],
|
6552 |
+
"siit_sampling": "sample_all",
|
6553 |
+
"val_iia_sampling": "all"
|
6554 |
+
},
|
6555 |
+
"training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/meta.json",
|
6556 |
+
"weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/ll_model.pth",
|
6557 |
+
"circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/67/edges.pkl"
|
6558 |
+
},
|
6559 |
{
|
6560 |
"case_id": "44",
|
6561 |
"url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/44",
|