File size: 726 Bytes
04314e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
batch_size: 1
checkpoint: true
ckpt_freq: 100
data:
  data: /content/data/HansardSequences_250k.big.txt
  eval_instruct_data: ''
  instruct:
    dynamic_chunk_fn_call: true
    shuffle: true
  instruct_data: ''
  shuffle: false
eval_freq: 100
log_freq: 1
lora:
  dropout: 0.0
  enable: true
  rank: 64
  scaling: 2.0
max_norm: 1.0
max_steps: 100
mlflow:
  experiment_name: null
  tracking_uri: null
model_id_or_path: /content/mistral_models/7B-v0.3
no_ckpt: false
no_eval: true
num_ckpt_keep: 3
num_microbatches: 8
optim:
  lr: 0.0001
  pct_start: 0.05
  weight_decay: 0.1
run_dir: /content/debategpt
save_adapters: true
seed: 0
seq_len: 8192
wandb:
  key: null
  offline: false
  project: null
  run_name: null
world_size: 1