cybershiptrooper
/

InterpBench

Model card Files Files and versions Community

iarcuschin commited on Sep 13

Commit

4fd68f3

•

1 Parent(s): 52adfa8

Update metadata files

Browse files

Files changed (4) hide show

benchmark_cases_metadata.csv +19 -19
benchmark_cases_metadata.parquet +2 -2
benchmark_metadata.json +56 -40
benchmark_metadata_croissant.json +285 -165

benchmark_cases_metadata.csv CHANGED Viewed

@@ -1,19 +1,19 @@
-case_id,url,task_description,max_seq_len,min_seq_len,transformer_cfg_file_url,training_args_file_url,weights_file_url,circuit_file_url,training_args.atol,training_args.lr,training_args.use_single_loss,training_args.iit_weight,training_args.behavior_weight,training_args.strict_weight,training_args.epochs,training_args.act_fn,training_args.clip_grad_norm,training_args.lr_scheduler,transformer_cfg.n_layers,transformer_cfg.d_model,transformer_cfg.n_ctx,transformer_cfg.d_head,transformer_cfg.model_name,transformer_cfg.n_heads,transformer_cfg.d_mlp,transformer_cfg.act_fn,transformer_cfg.d_vocab,transformer_cfg.eps,transformer_cfg.use_attn_result,transformer_cfg.use_attn_scale,transformer_cfg.use_split_qkv_input,transformer_cfg.use_hook_mlp_in,transformer_cfg.use_attn_in,transformer_cfg.use_local_attn,transformer_cfg.original_architecture,transformer_cfg.from_checkpoint,transformer_cfg.tokenizer_name,transformer_cfg.init_mode,transformer_cfg.normalization_type,transformer_cfg.n_devices,transformer_cfg.attention_dir,transformer_cfg.attn_only,transformer_cfg.seed,transformer_cfg.initializer_range,transformer_cfg.init_weights,transformer_cfg.scale_attn_by_inverse_layer_idx,transformer_cfg.positional_embedding_type,transformer_cfg.final_rms,transformer_cfg.d_vocab_out,transformer_cfg.parallel_attn_mlp,transformer_cfg.n_params,transformer_cfg.use_hook_tokens,transformer_cfg.gated_mlp,transformer_cfg.default_prepend_bos,transformer_cfg.dtype,transformer_cfg.tokenizer_prepends_bos,transformer_cfg.post_embedding_ln,transformer_cfg.rotary_base,transformer_cfg.trust_remote_code,transformer_cfg.rotary_adjacent_pairs,training_args.model_pair,training_args.next_token,training_args.non_ioi_thresh,training_args.use_per_token_check,training_args.batch_size,training_args.num_workers,training_args.early_stop,training_args.scheduler_val_metric,training_args.scheduler_mode
-11,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11,Counts the number of words in a sequence based on their length.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2,12,10,3,custom,4,48,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1460593486680443,True,False,standard,False,5,False,3456,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
-13,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2,20,10,5,custom,4,80,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3,False,9600,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
-18,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl,0.05,0.001,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2,26,10,6,custom,4,104,gelu,7,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3,False,15808,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
-19,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19,Removes consecutive duplicate tokens from a sequence.,15,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,32,15,8,custom,4,128,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.15689290811054724,True,False,standard,False,3,False,24576,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
-20,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20,Detect spam messages based on appearance of spam keywords.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,13,10,3,custom,4,52,gelu,14,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16,True,False,standard,False,2,False,3952,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
-21,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21,Extract unique tokens from a string,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl,0.05,0.0005,False,1.0,1.0,0.5,2000.0,gelu,0.1,,4,50,10,12,custom,4,200,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.09847319278346618,True,False,standard,False,3,False,118400,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
-26,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/26,Creates a cascading effect by repeating each token in sequence incrementally.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,21,10,5,custom,4,84,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12344267996967354,True,False,standard,False,27,False,10416,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
-29,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/29,Creates abbreviations for each token in the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,13,10,3,custom,4,52,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,3952,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
-3,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2,12,5,3,custom,4,48,gelu,6,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.22188007849009167,True,False,standard,False,1,False,3456,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
-33,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33,Checks if each token's length is odd or even.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,4,10,1,custom,4,16,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17457431218879393,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
-34,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34,Calculate the ratio of vowels to consonants in each word.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2,16,10,4,custom,4,64,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,5,False,6144,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
-35,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35,Alternates capitalization of each character in words.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2,9,10,2,custom,4,36,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,1872,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
-36,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36,"Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2,6,10,1,custom,4,24,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.19402850002906638,True,False,standard,False,3,False,768,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
-37,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37,Reverses each word in the sequence except for specified exclusions.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2,12,10,3,custom,4,48,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,3456,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
-4,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4,Return fraction of previous open tokens minus the fraction of close tokens.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,20,10,5,custom,4,80,gelu,7,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,1,False,9600,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
-8,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8,Fills gaps between tokens with a specified filler.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2,20,10,5,custom,4,80,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.13333333333333333,True,False,standard,False,8,False,9600,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
-ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect object identification,16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,,,1.0,,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,,True,0.65,False,256.0,0.0,True,"val/accuracy,val/IIA",max
-ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,Indirect object identification,16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,,,1.0,,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,,True,0.65,False,256.0,0.0,True,"val/accuracy,val/IIA",max

+case_id,url,task_description,max_seq_len,min_seq_len,transformer_cfg_file_url,training_args_file_url,weights_file_url,circuit_file_url,transformer_cfg.n_layers,transformer_cfg.d_model,transformer_cfg.n_ctx,transformer_cfg.d_head,transformer_cfg.model_name,transformer_cfg.n_heads,transformer_cfg.d_mlp,transformer_cfg.act_fn,transformer_cfg.d_vocab,transformer_cfg.eps,transformer_cfg.use_attn_result,transformer_cfg.use_attn_scale,transformer_cfg.use_split_qkv_input,transformer_cfg.use_hook_mlp_in,transformer_cfg.use_attn_in,transformer_cfg.use_local_attn,transformer_cfg.original_architecture,transformer_cfg.from_checkpoint,transformer_cfg.tokenizer_name,transformer_cfg.init_mode,transformer_cfg.normalization_type,transformer_cfg.n_devices,transformer_cfg.attention_dir,transformer_cfg.attn_only,transformer_cfg.seed,transformer_cfg.initializer_range,transformer_cfg.init_weights,transformer_cfg.scale_attn_by_inverse_layer_idx,transformer_cfg.positional_embedding_type,transformer_cfg.final_rms,transformer_cfg.d_vocab_out,transformer_cfg.parallel_attn_mlp,transformer_cfg.n_params,transformer_cfg.use_hook_tokens,transformer_cfg.gated_mlp,transformer_cfg.default_prepend_bos,transformer_cfg.dtype,transformer_cfg.tokenizer_prepends_bos,transformer_cfg.post_embedding_ln,transformer_cfg.rotary_base,transformer_cfg.trust_remote_code,transformer_cfg.rotary_adjacent_pairs,training_args.atol,training_args.lr,training_args.use_single_loss,training_args.iit_weight,training_args.behavior_weight,training_args.strict_weight,training_args.epochs,training_args.act_fn,training_args.clip_grad_norm,training_args.lr_scheduler,training_args.model_pair,training_args.same_size,training_args.seed,training_args.batch_size,training_args.include_mlp,training_args.next_token,training_args.detach_while_caching,training_args.non_ioi_thresh,training_args.use_per_token_check,training_args.num_workers,training_args.early_stop,training_args.scheduler_val_metric,training_args.scheduler_mode,training_args.val_IIA_sampling,training_args.use_all_tokens_for_behavior,training_args.siit_sampling,training_args.optimizer_kwargs.betas
+11,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11,Counts the number of words in a sequence based on their length.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl,2,12,10,3,custom,4,48,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1460593486680443,True,False,standard,False,5,False,3456,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,,True,,,True,True,True,,True,,True,,,,True,,
+13,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl,2,20,10,5,custom,4,80,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3,False,9600,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,,True,,,True,True,True,,True,,True,,,,True,,
+18,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl,2,26,10,6,custom,4,104,gelu,7,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3,False,15808,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.001,False,1.0,1.0,1.0,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
+19,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19,Removes consecutive duplicate tokens from a sequence.,15,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl,2,32,15,8,custom,4,128,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.15689290811054724,True,False,standard,False,3,False,24576,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,,True,,,True,True,True,,True,,True,,,,True,,
+20,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20,Detect spam messages based on appearance of spam keywords.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl,2,13,10,3,custom,4,52,gelu,14,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16,True,False,standard,False,2,False,3952,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,0.4,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
+21,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21,Extract unique tokens from a string,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl,4,50,10,12,custom,4,200,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.09847319278346618,True,False,standard,False,3,False,118400,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.0005,False,1.0,1.0,0.5,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
+26,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/26,Creates a cascading effect by repeating each token in sequence incrementally.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl,2,21,10,5,custom,4,84,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12344267996967354,True,False,standard,False,27,False,10416,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,0.4,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
+29,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/29,Creates abbreviations for each token in the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl,2,13,10,3,custom,4,52,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,3952,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,0.4,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
+3,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl,2,12,5,3,custom,4,48,gelu,6,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.22188007849009167,True,False,standard,False,1,False,3456,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,10.0,2000.0,gelu,0.1,,strict,False,,,True,True,True,,True,,True,,,,True,,
+33,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33,Checks if each token's length is odd or even.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl,2,4,10,1,custom,4,16,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17457431218879393,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,,True,,,True,True,True,,True,,True,,,,True,,
+34,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34,Calculate the ratio of vowels to consonants in each word.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl,2,16,10,4,custom,4,64,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,5,False,6144,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
+35,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35,Alternates capitalization of each character in words.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl,2,9,10,2,custom,4,36,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,1872,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
+36,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36,"Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl,2,6,10,1,custom,4,24,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.19402850002906638,True,False,standard,False,3,False,768,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
+37,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37,Reverses each word in the sequence except for specified exclusions.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl,2,12,10,3,custom,4,48,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,3456,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,strict,True,,,True,True,True,,True,,True,,,,True,,
+4,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4,Return fraction of previous open tokens minus the fraction of close tokens.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl,2,20,10,5,custom,4,80,gelu,7,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,1,False,9600,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,,True,,,True,True,True,,True,,True,,,,True,,
+7,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/7,Returns the number of times each token occurs in the input.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/edges.pkl,2,17,10,4,custom,4,68,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.15689290811054724,True,False,standard,False,10,False,6800,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.01,False,1.0,1.0,0.5,2000.0,gelu,0.1,,strict,False,1234.0,256.0,False,False,True,,True,,True,,,,True,,
+ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect Object Identification (IOI) task.,16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,False,False,standard,False,50257,False,84934656,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.001,False,1.0,1.0,0.4,,,1.0,,,True,0.0,512.0,True,False,True,0.65,False,0.0,True,"val/accuracy,val/IIA",max,random,False,individual,"0.9,0.9"
+ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,"Indirect Object Identification (IOI) task, trained using next token prediction.",16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,0.05,0.001,False,1.0,1.0,0.4,,,1.0,,,True,,256.0,True,True,True,0.65,False,0.0,True,"val/accuracy,val/IIA",max,,True,,

benchmark_cases_metadata.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:568194933b940c4c03457b1c64a8cb074943dfd075ff83f06e84a6376e3a8dcf
-size 58286

 version https://git-lfs.github.com/spec/v1
+oid sha256:9901890c05e11095ebb3dbd5710284edd09c37b40422eec02a126231f62f63d1
+size 64382

benchmark_metadata.json CHANGED Viewed

@@ -891,7 +891,7 @@
       "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
-        "lr": 0.001,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
@@ -899,7 +899,9 @@
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
-        "lr_scheduler": ""
       },
       "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json",
       "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth",
@@ -1514,49 +1516,44 @@
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
     },
     {
-      "case_id": "8",
-      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8",
-      "task_description": "Fills gaps between tokens with a specified filler.",
       "vocab": [
-        "J",
-        "LB",
-        "TPSI",
-        "V",
         "b",
-        "no",
-        "oCLrZaW",
-        "poiVg"
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
-          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
-        "d_model": 20,
         "n_ctx": 10,
-        "d_head": 5,
         "model_name": "custom",
         "n_heads": 4,
-        "d_mlp": 80,
         "act_fn": "gelu",
-        "d_vocab": 10,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
@@ -1575,18 +1572,18 @@
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
-        "attention_dir": "causal",
         "attn_only": false,
         "seed": 0,
-        "initializer_range": 0.13333333333333333,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
-        "d_vocab_out": 8,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 9600,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1598,27 +1595,34 @@
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
-      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.01,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
-        "strict_weight": 0.4,
-        "epochs": 500,
         "act_fn": "gelu",
-        "clip_grad_norm": 1.0,
-        "lr_scheduler": ""
       },
-      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta.json",
-      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model.pth",
-      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl"
     },
     {
       "case_id": "ioi",
       "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi",
-      "task_description": "Indirect object identification",
       "max_seq_len": 16,
       "min_seq_len": 16,
       "files": [
@@ -1671,14 +1675,14 @@
         "attn_only": false,
         "seed": null,
         "initializer_range": 0.02886751345948129,
-        "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
         "d_vocab_out": 50257,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
-        "n_params": 2457600,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
@@ -1692,11 +1696,10 @@
       },
       "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl",
       "training_args": {
-        "next_token": true,
         "non_ioi_thresh": 0.65,
         "use_per_token_check": false,
-        "batch_size": 256,
-        "lr": 0.001,
         "num_workers": 0,
         "early_stop": true,
         "lr_scheduler": null,
@@ -1705,12 +1708,25 @@
           "val/IIA"
         ],
         "scheduler_mode": "max",
         "clip_grad_norm": 1.0,
         "atol": 0.05,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
-        "strict_weight": 0.4
       },
       "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json",
       "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth",
@@ -1719,7 +1735,7 @@
     {
       "case_id": "ioi_next_token",
       "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token",
-      "task_description": "Indirect object identification",
       "max_seq_len": 16,
       "min_seq_len": 16,
       "files": [

       "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
+        "lr": 0.01,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
         "epochs": 2000,
         "act_fn": "gelu",
         "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict",
+        "same_size": false
       },
       "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json",
       "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth",
       "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
     },
     {
+      "case_id": "7",
+      "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/7",
+      "task_description": "Returns the number of times each token occurs in the input.",
       "vocab": [
+        "a",
         "b",
+        "c"
       ],
       "max_seq_len": 10,
       "min_seq_len": 4,
       "files": [
         {
           "file_name": "edges.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/edges.pkl"
         },
         {
           "file_name": "ll_model.pth",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model.pth"
         },
         {
           "file_name": "ll_model_cfg.pkl",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model_cfg.pkl"
         },
         {
           "file_name": "meta.json",
+          "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/meta.json"
         }
       ],
       "transformer_cfg": {
         "n_layers": 2,
+        "d_model": 17,
         "n_ctx": 10,
+        "d_head": 4,
         "model_name": "custom",
         "n_heads": 4,
+        "d_mlp": 68,
         "act_fn": "gelu",
+        "d_vocab": 5,
         "eps": 1e-05,
         "use_attn_result": true,
         "use_attn_scale": true,
         "init_mode": "gpt2",
         "normalization_type": null,
         "n_devices": 1,
+        "attention_dir": "bidirectional",
         "attn_only": false,
         "seed": 0,
+        "initializer_range": 0.15689290811054724,
         "init_weights": true,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
+        "d_vocab_out": 10,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
+        "n_params": 6800,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
         "trust_remote_code": false,
         "rotary_adjacent_pairs": false
       },
+      "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model_cfg.pkl",
       "training_args": {
         "atol": 0.05,
         "lr": 0.01,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
+        "strict_weight": 0.5,
+        "epochs": 2000,
         "act_fn": "gelu",
+        "clip_grad_norm": 0.1,
+        "lr_scheduler": "",
+        "model_pair": "strict",
+        "same_size": false,
+        "seed": 1234,
+        "batch_size": 256,
+        "include_mlp": false,
+        "next_token": false,
+        "detach_while_caching": true
       },
+      "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/meta.json",
+      "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/ll_model.pth",
+      "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/7/edges.pkl"
     },
     {
       "case_id": "ioi",
       "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi",
+      "task_description": "Indirect Object Identification (IOI) task.",
       "max_seq_len": 16,
       "min_seq_len": 16,
       "files": [
         "attn_only": false,
         "seed": null,
         "initializer_range": 0.02886751345948129,
+        "init_weights": false,
         "scale_attn_by_inverse_layer_idx": false,
         "positional_embedding_type": "standard",
         "final_rms": false,
         "d_vocab_out": 50257,
         "parallel_attn_mlp": false,
         "rotary_dim": null,
+        "n_params": 84934656,
         "use_hook_tokens": false,
         "gated_mlp": false,
         "default_prepend_bos": true,
       },
       "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl",
       "training_args": {
+        "next_token": false,
         "non_ioi_thresh": 0.65,
         "use_per_token_check": false,
+        "batch_size": 512,
         "num_workers": 0,
         "early_stop": true,
         "lr_scheduler": null,
           "val/IIA"
         ],
         "scheduler_mode": "max",
+        "scheduler_kwargs": {},
         "clip_grad_norm": 1.0,
+        "seed": 0,
+        "lr": 0.001,
+        "detach_while_caching": true,
+        "optimizer_kwargs": {
+          "betas": [
+            0.9,
+            0.9
+          ]
+        },
         "atol": 0.05,
         "use_single_loss": false,
         "iit_weight": 1.0,
         "behavior_weight": 1.0,
+        "val_IIA_sampling": "random",
+        "use_all_tokens_for_behavior": false,
+        "strict_weight": 0.4,
+        "siit_sampling": "individual"
       },
       "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json",
       "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth",
     {
       "case_id": "ioi_next_token",
       "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token",
+      "task_description": "Indirect Object Identification (IOI) task, trained using next token prediction.",
       "max_seq_len": 16,
       "min_seq_len": 16,
       "files": [

benchmark_metadata_croissant.json CHANGED Viewed

@@ -257,156 +257,6 @@
             }
           }
         },
-        {
-          "@type": "cr:Field",
-          "@id": "training_args.atol",
-          "name": "training_args.atol",
-          "description": "Column 'training_args.atol' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "training_args.atol"
-            }
-          }
-        },
-        {
-          "@type": "cr:Field",
-          "@id": "training_args.lr",
-          "name": "training_args.lr",
-          "description": "Column 'training_args.lr' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "training_args.lr"
-            }
-          }
-        },
-        {
-          "@type": "cr:Field",
-          "@id": "training_args.use_single_loss",
-          "name": "training_args.use_single_loss",
-          "description": "Column 'training_args.use_single_loss' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Boolean",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "training_args.use_single_loss"
-            }
-          }
-        },
-        {
-          "@type": "cr:Field",
-          "@id": "training_args.iit_weight",
-          "name": "training_args.iit_weight",
-          "description": "Column 'training_args.iit_weight' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "training_args.iit_weight"
-            }
-          }
-        },
-        {
-          "@type": "cr:Field",
-          "@id": "training_args.behavior_weight",
-          "name": "training_args.behavior_weight",
-          "description": "Column 'training_args.behavior_weight' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "training_args.behavior_weight"
-            }
-          }
-        },
-        {
-          "@type": "cr:Field",
-          "@id": "training_args.strict_weight",
-          "name": "training_args.strict_weight",
-          "description": "Column 'training_args.strict_weight' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "training_args.strict_weight"
-            }
-          }
-        },
-        {
-          "@type": "cr:Field",
-          "@id": "training_args.epochs",
-          "name": "training_args.epochs",
-          "description": "Column 'training_args.epochs' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "training_args.epochs"
-            }
-          }
-        },
-        {
-          "@type": "cr:Field",
-          "@id": "training_args.act_fn",
-          "name": "training_args.act_fn",
-          "description": "Column 'training_args.act_fn' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Text",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "training_args.act_fn"
-            }
-          }
-        },
-        {
-          "@type": "cr:Field",
-          "@id": "training_args.clip_grad_norm",
-          "name": "training_args.clip_grad_norm",
-          "description": "Column 'training_args.clip_grad_norm' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "training_args.clip_grad_norm"
-            }
-          }
-        },
-        {
-          "@type": "cr:Field",
-          "@id": "training_args.lr_scheduler",
-          "name": "training_args.lr_scheduler",
-          "description": "Column 'training_args.lr_scheduler' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Text",
-          "source": {
-            "fileSet": {
-              "@id": "benchmark-cases-parquet"
-            },
-            "extract": {
-              "column": "training_args.lr_scheduler"
-            }
-          }
-        },
         {
           "@type": "cr:Field",
           "@id": "transformer_cfg.n_layers",
@@ -1037,6 +887,156 @@
             }
           }
         },
         {
           "@type": "cr:Field",
           "@id": "training_args.model_pair",
@@ -1052,6 +1052,66 @@
             }
           }
         },
         {
           "@type": "cr:Field",
           "@id": "training_args.next_token",
@@ -1069,46 +1129,46 @@
         },
         {
           "@type": "cr:Field",
-          "@id": "training_args.non_ioi_thresh",
-          "name": "training_args.non_ioi_thresh",
-          "description": "Column 'training_args.non_ioi_thresh' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
             },
             "extract": {
-              "column": "training_args.non_ioi_thresh"
             }
           }
         },
         {
           "@type": "cr:Field",
-          "@id": "training_args.use_per_token_check",
-          "name": "training_args.use_per_token_check",
-          "description": "Column 'training_args.use_per_token_check' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Boolean",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
             },
             "extract": {
-              "column": "training_args.use_per_token_check"
             }
           }
         },
         {
           "@type": "cr:Field",
-          "@id": "training_args.batch_size",
-          "name": "training_args.batch_size",
-          "description": "Column 'training_args.batch_size' from the parquet file describing all the cases in the benchmark.",
-          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
             },
             "extract": {
-              "column": "training_args.batch_size"
             }
           }
         },
@@ -1171,6 +1231,66 @@
               "column": "training_args.scheduler_mode"
             }
           }
         }
       ]
     }

             }
           }
         },
         {
           "@type": "cr:Field",
           "@id": "transformer_cfg.n_layers",
             }
           }
         },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.atol",
+          "name": "training_args.atol",
+          "description": "Column 'training_args.atol' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Float",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.atol"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.lr",
+          "name": "training_args.lr",
+          "description": "Column 'training_args.lr' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Float",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.lr"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.use_single_loss",
+          "name": "training_args.use_single_loss",
+          "description": "Column 'training_args.use_single_loss' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Boolean",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.use_single_loss"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.iit_weight",
+          "name": "training_args.iit_weight",
+          "description": "Column 'training_args.iit_weight' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Float",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.iit_weight"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.behavior_weight",
+          "name": "training_args.behavior_weight",
+          "description": "Column 'training_args.behavior_weight' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Float",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.behavior_weight"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.strict_weight",
+          "name": "training_args.strict_weight",
+          "description": "Column 'training_args.strict_weight' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Float",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.strict_weight"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.epochs",
+          "name": "training_args.epochs",
+          "description": "Column 'training_args.epochs' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Float",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.epochs"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.act_fn",
+          "name": "training_args.act_fn",
+          "description": "Column 'training_args.act_fn' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Text",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.act_fn"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.clip_grad_norm",
+          "name": "training_args.clip_grad_norm",
+          "description": "Column 'training_args.clip_grad_norm' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Float",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.clip_grad_norm"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.lr_scheduler",
+          "name": "training_args.lr_scheduler",
+          "description": "Column 'training_args.lr_scheduler' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Text",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.lr_scheduler"
+            }
+          }
+        },
         {
           "@type": "cr:Field",
           "@id": "training_args.model_pair",
             }
           }
         },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.same_size",
+          "name": "training_args.same_size",
+          "description": "Column 'training_args.same_size' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Boolean",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.same_size"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.seed",
+          "name": "training_args.seed",
+          "description": "Column 'training_args.seed' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Float",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.seed"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.batch_size",
+          "name": "training_args.batch_size",
+          "description": "Column 'training_args.batch_size' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Float",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.batch_size"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.include_mlp",
+          "name": "training_args.include_mlp",
+          "description": "Column 'training_args.include_mlp' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Boolean",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.include_mlp"
+            }
+          }
+        },
         {
           "@type": "cr:Field",
           "@id": "training_args.next_token",
         },
         {
           "@type": "cr:Field",
+          "@id": "training_args.detach_while_caching",
+          "name": "training_args.detach_while_caching",
+          "description": "Column 'training_args.detach_while_caching' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Boolean",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
             },
             "extract": {
+              "column": "training_args.detach_while_caching"
             }
           }
         },
         {
           "@type": "cr:Field",
+          "@id": "training_args.non_ioi_thresh",
+          "name": "training_args.non_ioi_thresh",
+          "description": "Column 'training_args.non_ioi_thresh' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Float",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
             },
             "extract": {
+              "column": "training_args.non_ioi_thresh"
             }
           }
         },
         {
           "@type": "cr:Field",
+          "@id": "training_args.use_per_token_check",
+          "name": "training_args.use_per_token_check",
+          "description": "Column 'training_args.use_per_token_check' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Boolean",
           "source": {
             "fileSet": {
               "@id": "benchmark-cases-parquet"
             },
             "extract": {
+              "column": "training_args.use_per_token_check"
             }
           }
         },
               "column": "training_args.scheduler_mode"
             }
           }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.val_IIA_sampling",
+          "name": "training_args.val_IIA_sampling",
+          "description": "Column 'training_args.val_IIA_sampling' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Text",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.val_IIA_sampling"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.use_all_tokens_for_behavior",
+          "name": "training_args.use_all_tokens_for_behavior",
+          "description": "Column 'training_args.use_all_tokens_for_behavior' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Boolean",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.use_all_tokens_for_behavior"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.siit_sampling",
+          "name": "training_args.siit_sampling",
+          "description": "Column 'training_args.siit_sampling' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Text",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.siit_sampling"
+            }
+          }
+        },
+        {
+          "@type": "cr:Field",
+          "@id": "training_args.optimizer_kwargs.betas",
+          "name": "training_args.optimizer_kwargs.betas",
+          "description": "Column 'training_args.optimizer_kwargs.betas' from the parquet file describing all the cases in the benchmark.",
+          "dataType": "sc:Text",
+          "source": {
+            "fileSet": {
+              "@id": "benchmark-cases-parquet"
+            },
+            "extract": {
+              "column": "training_args.optimizer_kwargs.betas"
+            }
+          }
         }
       ]
     }