impossibleexchange commited on
Commit
bacb466
1 Parent(s): 87a4df0

Update training_config.yml

Browse files
Files changed (1) hide show
  1. training_config.yml +21 -26
training_config.yml CHANGED
@@ -12,57 +12,54 @@ model:
12
  tokenizer:
13
  _component_: models.a2a_tokenizer
14
  path: models/tokenizer.model
15
- freeze_layers:
16
- _component_: torchtune.utils.Freeze_Layers
17
- num_layers: 8
18
  checkpointer:
19
  _component_: torchtune.utils.FullModelMetaCheckpointer
20
- checkpoint_dir: tiresome5/
21
  checkpoint_files:
22
- - meta_model_0.pt
23
  adapter_checkpoint: null
24
  recipe_checkpoint: null
25
  output_dir: output_checkpoints/experiment_1
26
  model_type: LLAMA3
27
- use_freeze_layers: true
28
  resume_from_checkpoint: false
29
- interim_checkpoint_steps: 20000
30
  interim_gen_steps: null
31
  max_new_tokens: 100
32
  temperature: 0.6
33
  top_k: 225
34
  dataset:
35
  _component_: ds.EvenBatcher
36
- buffer_size: 1000
37
  dataset:
38
  _component_: ds.RoundRobinDataset
39
  datasets:
 
 
 
 
 
 
 
 
40
  - _component_: ds.CaptionInstructDataset
41
  dataset_path: ds/sam_llava/output.parquet
42
  train_on_input: false
43
  seed: null
44
  shuffle: true
45
- batch_size: 4
46
  optimizer:
47
  _component_: torch.optim.AdamW
48
- weight_decay: 0.0001
49
- lr: 0.0001
50
- betas:
51
- - 0.9
52
- - 0.998
53
  lr_scheduler:
54
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
55
- num_warmup_steps: 500
56
  loss:
57
  _component_: torch.nn.CrossEntropyLoss
58
- grad_clip:
59
- _component_: torch.nn.utils.clip_grad_norm
60
- max_norm: 2.0
61
- norm_type: 2
62
- epochs: 6
63
  max_steps_per_epoch: null
64
- gradient_accumulation_steps: 32
65
- compile: true
66
  output_dir: /tmp/lora_finetune_output
67
  metric_logger:
68
  _component_: torchtune.utils.metric_logging.DiskLogger
@@ -73,15 +70,13 @@ dtype: bf16
73
  enable_activation_checkpointing: false
74
  profiler:
75
  _component_: torchtune.utils.profiler
76
- enabled: true
77
  inference:
78
  prompt_template: 'Video:
79
-
80
  {video}
81
-
82
  Caption the previous video.'
83
  max_new_tokens: 300
84
  temperature: 0.6
85
  top_k: 300
86
  quantizer: null
87
- gradient-accumulation-steps: 32
 
12
  tokenizer:
13
  _component_: models.a2a_tokenizer
14
  path: models/tokenizer.model
 
 
 
15
  checkpointer:
16
  _component_: torchtune.utils.FullModelMetaCheckpointer
17
+ checkpoint_dir: checkpoints/Meta-Llama-3-8B-Instruct/
18
  checkpoint_files:
19
+ - consolidated.00.pth
20
  adapter_checkpoint: null
21
  recipe_checkpoint: null
22
  output_dir: output_checkpoints/experiment_1
23
  model_type: LLAMA3
 
24
  resume_from_checkpoint: false
25
+ interim_checkpoint_steps: 15000
26
  interim_gen_steps: null
27
  max_new_tokens: 100
28
  temperature: 0.6
29
  top_k: 225
30
  dataset:
31
  _component_: ds.EvenBatcher
32
+ buffer_size: 4
33
  dataset:
34
  _component_: ds.RoundRobinDataset
35
  datasets:
36
+ - _component_: ds.OmegaVideoCaptionDataset
37
+ length: 500000
38
+ - _component_: ds.LlavaInstructDataset
39
+ dataset_path: ds/coco_llava_instruct/output.parquet
40
+ train_on_input: false
41
+ - _component_: ds.LlavaInstructDataset
42
+ dataset_path: ds/vision_flan/output.parquet
43
+ train_on_input: false
44
  - _component_: ds.CaptionInstructDataset
45
  dataset_path: ds/sam_llava/output.parquet
46
  train_on_input: false
47
  seed: null
48
  shuffle: true
49
+ batch_size: 512
50
  optimizer:
51
  _component_: torch.optim.AdamW
52
+ weight_decay: 4.55
53
+ lr: 1.0000000001
 
 
 
54
  lr_scheduler:
55
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
56
+ num_warmup_steps: 4
57
  loss:
58
  _component_: torch.nn.CrossEntropyLoss
59
+ epochs: 1
 
 
 
 
60
  max_steps_per_epoch: null
61
+ gradient_accumulation_steps: 1
62
+ compile: false
63
  output_dir: /tmp/lora_finetune_output
64
  metric_logger:
65
  _component_: torchtune.utils.metric_logging.DiskLogger
 
70
  enable_activation_checkpointing: false
71
  profiler:
72
  _component_: torchtune.utils.profiler
73
+ enabled: false
74
  inference:
75
  prompt_template: 'Video:
 
76
  {video}
 
77
  Caption the previous video.'
78
  max_new_tokens: 300
79
  temperature: 0.6
80
  top_k: 300
81
  quantizer: null
82
+ gradient-accumulation-steps: 32