File size: 1,455 Bytes
7135151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
act_frequency_n_tokens: 500000
batch_size: 4
collect_act_frequency_every_n_samples: 40000
collect_output_metrics_every_n_samples: 0
cooldown_samples: 0
effective_batch_size: 16
eval_data:
  dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2
  is_tokenized: true
  n_ctx: 1024
  split: train
  streaming: true
  tokenizer_name: gpt2
eval_every_n_samples: 20000
eval_n_samples: 200
log_every_n_grad_steps: 20
loss:
  in_to_orig:
    hook_positions:
    - blocks.3.hook_resid_pre
    - blocks.4.hook_resid_pre
    - blocks.5.hook_resid_pre
    - blocks.6.hook_resid_pre
    - blocks.7.hook_resid_pre
    - blocks.8.hook_resid_pre
    - blocks.9.hook_resid_pre
    - blocks.10.hook_resid_pre
    - blocks.11.hook_resid_pre
    total_coeff: 2.5
  logits_kl:
    coeff: 0.5
  out_to_in:
    coeff: 0
  out_to_orig: null
  sparsity:
    coeff: 10
lr: 0.0005
lr_schedule: cosine
max_grad_norm: 10
min_lr_factor: 0.1
n_samples: 400000
saes:
  dict_size_to_input_ratio: 60
  sae_positions: blocks.2.hook_resid_pre
save_dir: /data/jordan_tensor/sparsify/sparsify/scripts/train_tlens_saes/out
save_every_n_samples: null
seed: 0
tlens_model_name: gpt2-small
tlens_model_path: null
train_data:
  dataset_name: apollo-research/Skylion007-openwebtext-tokenizer-gpt2
  is_tokenized: true
  n_ctx: 1024
  split: train
  streaming: true
  tokenizer_name: gpt2
wandb_project: gpt2-e2e
wandb_run_name: null
wandb_run_name_prefix: recon_
warmup_samples: 20000