|
alg_name: "ROME" |
|
model_name: "./hugging_cache/gpt2-xl" |
|
stats_dir: "./data/stats" |
|
device: cpu |
|
layers: [5] |
|
fact_token: "subject_last" |
|
v_num_grad_steps: 20 |
|
v_lr: 5e-1 |
|
v_loss_layer: 11 |
|
v_weight_decay: 0.5 |
|
clamp_norm_factor: 4 |
|
kl_factor: 0.0625 |
|
mom2_adjustment: false |
|
context_template_length_params: [[5, 10], [10, 10]] |
|
rewrite_module_tmp: "transformer.h.{}.mlp.c_proj" |
|
layer_module_tmp: "transformer.h.{}" |
|
mlp_module_tmp: "transformer.h.{}.mlp" |
|
attn_module_tmp: "transformer.h.{}.attn" |
|
ln_f_module: "transformer.ln_f" |
|
lm_head_module: "transformer.wte" |
|
mom2_dataset: "wikipedia" |
|
mom2_n_samples: 100000 |
|
mom2_dtype: "float32" |
|
model_parallel: false |
|
fp16: false |
|
|
|
|