chaturyaganne commited on
Commit
150150f
·
verified ·
1 Parent(s): 7a3b9d1

Pushed the IDEFICS2 fine-tuned model.

Browse files
checkpoint-25/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31d96b70c9a934c487d586616624ebe4e496d1cf08e9f856712046ae9c23d890
3
  size 93378688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d18d31d2ab07850fc5106340e20eae20454e9eba33cbd5edad2a67f2611e7cba
3
  size 93378688
checkpoint-25/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a6e52f560a2502a6c307967c86d7aafc307a7d8869089cca56e0ad5844bbf10
3
  size 47425352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29bf9fb840fa70a92bd578a839db045c72e8295b4a8902093016b8a283ae6808
3
  size 47425352
checkpoint-25/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:614fcea8e73d817edc55868e83a17dea664dce6af99624cd6b07cf229ea0cb30
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72e70d63777ae67fd851081989e78bfd79a7d57689d0b1082faa1e5830f19194
3
+ size 14180
checkpoint-25/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6222650a495ea54e976d1599e55e1eec1dfa515e390e5f653027100568b23eb1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:630fe59a784956405be1a950d9ce52e5bf6a2f1c12f3a8bd4f3869766a5850cd
3
  size 1064
checkpoint-25/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4,
5
  "eval_steps": 10,
6
  "global_step": 25,
7
  "is_hyper_param_search": false,
@@ -9,54 +9,54 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.08,
13
  "grad_norm": Infinity,
14
- "learning_rate": 0.00018400000000000003,
15
- "loss": 4.5662,
16
  "step": 5
17
  },
18
  {
19
- "epoch": 0.16,
20
- "grad_norm": 21.689138412475586,
21
- "learning_rate": 0.000144,
22
- "loss": 1.5225,
23
  "step": 10
24
  },
25
  {
26
- "epoch": 0.16,
27
- "eval_loss": 1.1455421447753906,
28
- "eval_runtime": 307.8066,
29
- "eval_samples_per_second": 0.65,
30
- "eval_steps_per_second": 0.325,
31
  "step": 10
32
  },
33
  {
34
- "epoch": 0.24,
35
- "grad_norm": 33.072635650634766,
36
- "learning_rate": 0.00010400000000000001,
37
- "loss": 1.0761,
38
  "step": 15
39
  },
40
  {
41
- "epoch": 0.32,
42
- "grad_norm": 17.61521339416504,
43
- "learning_rate": 6.400000000000001e-05,
44
- "loss": 1.0442,
45
  "step": 20
46
  },
47
  {
48
- "epoch": 0.32,
49
- "eval_loss": 1.078382134437561,
50
- "eval_runtime": 307.6568,
51
- "eval_samples_per_second": 0.65,
52
- "eval_steps_per_second": 0.325,
53
  "step": 20
54
  },
55
  {
56
- "epoch": 0.4,
57
- "grad_norm": 9.085550308227539,
58
- "learning_rate": 2.4e-05,
59
- "loss": 0.9102,
60
  "step": 25
61
  }
62
  ],
@@ -77,8 +77,8 @@
77
  "attributes": {}
78
  }
79
  },
80
- "total_flos": 1864510458066432.0,
81
- "train_batch_size": 2,
82
  "trial_name": null,
83
  "trial_params": null
84
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.1,
5
  "eval_steps": 10,
6
  "global_step": 25,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.02,
13
  "grad_norm": Infinity,
14
+ "learning_rate": 0.00017600000000000002,
15
+ "loss": 2.6469,
16
  "step": 5
17
  },
18
  {
19
+ "epoch": 0.04,
20
+ "grad_norm": 19.0399169921875,
21
+ "learning_rate": 0.00013600000000000003,
22
+ "loss": 1.2235,
23
  "step": 10
24
  },
25
  {
26
+ "epoch": 0.04,
27
+ "eval_loss": 1.2343159914016724,
28
+ "eval_runtime": 538.9051,
29
+ "eval_samples_per_second": 1.856,
30
+ "eval_steps_per_second": 1.856,
31
  "step": 10
32
  },
33
  {
34
+ "epoch": 0.06,
35
+ "grad_norm": 11.566468238830566,
36
+ "learning_rate": 9.6e-05,
37
+ "loss": 1.3862,
38
  "step": 15
39
  },
40
  {
41
+ "epoch": 0.08,
42
+ "grad_norm": 12.342832565307617,
43
+ "learning_rate": 5.6000000000000006e-05,
44
+ "loss": 1.1541,
45
  "step": 20
46
  },
47
  {
48
+ "epoch": 0.08,
49
+ "eval_loss": 1.179603934288025,
50
+ "eval_runtime": 540.1428,
51
+ "eval_samples_per_second": 1.851,
52
+ "eval_steps_per_second": 1.851,
53
  "step": 20
54
  },
55
  {
56
+ "epoch": 0.1,
57
+ "grad_norm": 28.799449920654297,
58
+ "learning_rate": 1.6000000000000003e-05,
59
+ "loss": 1.2611,
60
  "step": 25
61
  }
62
  ],
 
77
  "attributes": {}
78
  }
79
  },
80
+ "total_flos": 568385230334592.0,
81
+ "train_batch_size": 1,
82
  "trial_name": null,
83
  "trial_params": null
84
  }
checkpoint-25/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6e46dcfc153cff4330c2978ba9f33a5fa568af92b143d92ea9c00cd30120da6
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bab3bbe7e71b3116bde27f637c54636924026eec9c15eb4352457999d2af31df
3
  size 5112