|
case_id,task_description,max_seq_len,min_seq_len,training_args.atol,training_args.lr,training_args.use_single_loss,training_args.iit_weight,training_args.behavior_weight,training_args.strict_weight,training_args.epochs,training_args.act_fn,training_args.clip_grad_norm,training_args.lr_scheduler,transformer_cfg.n_layers,transformer_cfg.d_model,transformer_cfg.n_ctx,transformer_cfg.d_head,transformer_cfg.model_name,transformer_cfg.n_heads,transformer_cfg.d_mlp,transformer_cfg.act_fn,transformer_cfg.d_vocab,transformer_cfg.eps,transformer_cfg.use_attn_result,transformer_cfg.use_attn_scale,transformer_cfg.use_split_qkv_input,transformer_cfg.use_hook_mlp_in,transformer_cfg.use_attn_in,transformer_cfg.use_local_attn,transformer_cfg.original_architecture,transformer_cfg.from_checkpoint,transformer_cfg.checkpoint_index,transformer_cfg.checkpoint_label_type,transformer_cfg.checkpoint_value,transformer_cfg.tokenizer_name,transformer_cfg.window_size,transformer_cfg.attn_types,transformer_cfg.init_mode,transformer_cfg.normalization_type,transformer_cfg.device,transformer_cfg.n_devices,transformer_cfg.attention_dir,transformer_cfg.attn_only,transformer_cfg.seed,transformer_cfg.initializer_range,transformer_cfg.init_weights,transformer_cfg.scale_attn_by_inverse_layer_idx,transformer_cfg.positional_embedding_type,transformer_cfg.final_rms,transformer_cfg.d_vocab_out,transformer_cfg.parallel_attn_mlp,transformer_cfg.rotary_dim,transformer_cfg.n_params,transformer_cfg.use_hook_tokens,transformer_cfg.gated_mlp,transformer_cfg.default_prepend_bos,transformer_cfg.dtype,transformer_cfg.tokenizer_prepends_bos,transformer_cfg.n_key_value_heads,transformer_cfg.post_embedding_ln,transformer_cfg.rotary_base,transformer_cfg.trust_remote_code,transformer_cfg.rotary_adjacent_pairs |
|
11,Counts the number of words in a sequence based on their length.,10.0,4.0,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1460593486680443,True,False,standard,False,5.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False |
|
13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10.0,4.0,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False |
|
18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10.0,4.0,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False |
|
19,Removes consecutive duplicate tokens from a sequence.,15.0,4.0,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,32.0,15.0,8.0,custom,4.0,128.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.15689290811054724,True,False,standard,False,3.0,False,,24576.0,False,False,True,torch.float32,,,False,10000.0,False,False |
|
20,Detect spam messages based on appearance of spam keywords.,10.0,4.0,0.05,0.001,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,14.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.16,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False |
|
21,Extract unique tokens from a string,10.0,4.0,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False |
|
24,Identifies the first occurrence of each token in a sequence.,10.0,4.0,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False |
|
3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5.0,4.0,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,12.0,5.0,3.0,custom,4.0,48.0,gelu,6.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.22188007849009167,True,False,standard,False,1.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False |
|
33,Checks if each token's length is odd or even.,10.0,4.0,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17457431218879393,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False |
|
34,Calculate the ratio of vowels to consonants in each word.,10.0,4.0,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.16329931618554522,True,False,standard,False,5.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False |
|
35,Alternates capitalization of each character in words.,10.0,4.0,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False |
|
36,"Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",10.0,4.0,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.19402850002906638,True,False,standard,False,3.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False |
|
37,Reverses each word in the sequence except for specified exclusions.,10.0,4.0,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False |
|
38,Checks if tokens alternate between two types.,10.0,4.0,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,2.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False |
|
4,Return fraction of previous open tokens minus the fraction of close tokens.,10.0,4.0,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17056057308448835,True,False,standard,False,1.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False |
|
8,Fills gaps between tokens with a specified filler.,10.0,4.0,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.13333333333333333,True,False,standard,False,8.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False |
|
ioi,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, |
|
ioi_next_token,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, |
|
|