Mathoufle13
commited on
Commit
•
895ec39
1
Parent(s):
5c08f5c
Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
base_model: unsloth/llama-3-8b-bnb-4bit
|
3 |
language:
|
|
|
1 |
+
1289.4068 seconds used for training.
|
2 |
+
21.49 minutes used for training.
|
3 |
+
Peak reserved memory = 9.545 GB.
|
4 |
+
Peak reserved memory for training = 4.018 GB.
|
5 |
+
Peak reserved memory % of max memory = 43.058 %.
|
6 |
+
Peak reserved memory for training % of max memory = 18.125 %.
|
7 |
+
|
8 |
+
args = TrainingArguments(
|
9 |
+
per_device_train_batch_size = 2,
|
10 |
+
gradient_accumulation_steps = 4,
|
11 |
+
warmup_steps = 10, # Augmenté le nombre de steps de warmup
|
12 |
+
max_steps = 200, # Augmenté le nombre total de steps
|
13 |
+
learning_rate = 1e-4, # Réduit le taux d'apprentissage
|
14 |
+
fp16 = not torch.cuda.is_bf16_supported(),
|
15 |
+
bf16 = torch.cuda.is_bf16_supported(),
|
16 |
+
logging_steps = 1,
|
17 |
+
optim = "adamw_8bit",
|
18 |
+
weight_decay = 0.01,
|
19 |
+
lr_scheduler_type = "linear",
|
20 |
+
seed = 42,
|
21 |
+
output_dir = "outputs",
|
22 |
+
|
23 |
+
|
24 |
+
==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1
|
25 |
+
\\ /| Num examples = 399 | Num Epochs = 4
|
26 |
+
O^O/ \_/ \ Batch size per device = 2 | Gradient Accumulation steps = 4
|
27 |
+
\ / Total batch size = 8 | Total steps = 200
|
28 |
+
"-____-" Number of trainable parameters = 20,971,520
|
29 |
+
[200/200 21:17, Epoch 4/4]
|
30 |
+
Step Training Loss
|
31 |
+
1 2.027900
|
32 |
+
2 2.008700
|
33 |
+
3 1.946100
|
34 |
+
4 1.924700
|
35 |
+
5 1.995000
|
36 |
+
6 1.999000
|
37 |
+
7 1.870100
|
38 |
+
8 1.891400
|
39 |
+
9 1.807600
|
40 |
+
10 1.723200
|
41 |
+
11 1.665100
|
42 |
+
12 1.541000
|
43 |
+
13 1.509100
|
44 |
+
14 1.416600
|
45 |
+
15 1.398600
|
46 |
+
16 1.233200
|
47 |
+
17 1.172100
|
48 |
+
18 1.272100
|
49 |
+
19 1.146000
|
50 |
+
20 1.179000
|
51 |
+
21 1.206400
|
52 |
+
22 1.095400
|
53 |
+
23 0.937300
|
54 |
+
24 1.214300
|
55 |
+
25 1.040200
|
56 |
+
26 1.183400
|
57 |
+
27 1.033900
|
58 |
+
28 0.953100
|
59 |
+
29 0.935700
|
60 |
+
30 0.962200
|
61 |
+
31 0.908900
|
62 |
+
32 0.924900
|
63 |
+
33 0.931000
|
64 |
+
34 1.011300
|
65 |
+
35 0.951900
|
66 |
+
36 0.936000
|
67 |
+
37 0.903000
|
68 |
+
38 0.906900
|
69 |
+
39 0.945700
|
70 |
+
40 0.827000
|
71 |
+
41 0.931800
|
72 |
+
42 0.919600
|
73 |
+
43 0.926900
|
74 |
+
44 0.932900
|
75 |
+
45 0.872700
|
76 |
+
46 0.795200
|
77 |
+
47 0.888700
|
78 |
+
48 0.956800
|
79 |
+
49 1.004200
|
80 |
+
50 0.859500
|
81 |
+
51 0.802500
|
82 |
+
52 0.855400
|
83 |
+
53 0.885500
|
84 |
+
54 1.026600
|
85 |
+
55 0.844100
|
86 |
+
56 0.879800
|
87 |
+
57 0.797400
|
88 |
+
58 0.885300
|
89 |
+
59 0.842800
|
90 |
+
60 0.861600
|
91 |
+
61 0.789100
|
92 |
+
62 0.861600
|
93 |
+
63 0.856700
|
94 |
+
64 0.929200
|
95 |
+
65 0.782500
|
96 |
+
66 0.713600
|
97 |
+
67 0.781000
|
98 |
+
68 0.765100
|
99 |
+
69 0.784700
|
100 |
+
70 0.869500
|
101 |
+
71 0.742900
|
102 |
+
72 0.787900
|
103 |
+
73 0.750800
|
104 |
+
74 0.931700
|
105 |
+
75 0.713000
|
106 |
+
76 0.832100
|
107 |
+
77 0.928300
|
108 |
+
78 0.777600
|
109 |
+
79 0.694000
|
110 |
+
80 0.835400
|
111 |
+
81 0.822000
|
112 |
+
82 0.754600
|
113 |
+
83 0.813400
|
114 |
+
84 0.868800
|
115 |
+
85 0.732400
|
116 |
+
86 0.803700
|
117 |
+
87 0.694400
|
118 |
+
88 0.771300
|
119 |
+
89 0.864400
|
120 |
+
90 0.646700
|
121 |
+
91 0.690800
|
122 |
+
92 0.695000
|
123 |
+
93 0.732300
|
124 |
+
94 0.766900
|
125 |
+
95 0.864100
|
126 |
+
96 0.867200
|
127 |
+
97 0.774300
|
128 |
+
98 0.797700
|
129 |
+
99 0.772100
|
130 |
+
100 0.906700
|
131 |
+
101 0.693400
|
132 |
+
102 0.685500
|
133 |
+
103 0.712200
|
134 |
+
104 0.678400
|
135 |
+
105 0.761900
|
136 |
+
106 0.705300
|
137 |
+
107 0.775700
|
138 |
+
108 0.627600
|
139 |
+
109 0.599300
|
140 |
+
110 0.615100
|
141 |
+
111 0.618200
|
142 |
+
112 0.668700
|
143 |
+
113 0.699900
|
144 |
+
114 0.577000
|
145 |
+
115 0.711600
|
146 |
+
116 0.692900
|
147 |
+
117 0.585400
|
148 |
+
118 0.646400
|
149 |
+
119 0.569200
|
150 |
+
120 0.752300
|
151 |
+
121 0.745000
|
152 |
+
122 0.690100
|
153 |
+
123 0.744700
|
154 |
+
124 0.665800
|
155 |
+
125 0.866100
|
156 |
+
126 0.707400
|
157 |
+
127 0.679300
|
158 |
+
128 0.591400
|
159 |
+
129 0.655100
|
160 |
+
130 0.734000
|
161 |
+
131 0.637900
|
162 |
+
132 0.733900
|
163 |
+
133 0.652500
|
164 |
+
134 0.685400
|
165 |
+
135 0.641300
|
166 |
+
136 0.608200
|
167 |
+
137 0.754100
|
168 |
+
138 0.753700
|
169 |
+
139 0.671000
|
170 |
+
140 0.767200
|
171 |
+
141 0.668700
|
172 |
+
142 0.630300
|
173 |
+
143 0.734700
|
174 |
+
144 0.767700
|
175 |
+
145 0.722200
|
176 |
+
146 0.694400
|
177 |
+
147 0.710100
|
178 |
+
148 0.696300
|
179 |
+
149 0.612600
|
180 |
+
150 0.670400
|
181 |
+
151 0.512900
|
182 |
+
152 0.675100
|
183 |
+
153 0.579900
|
184 |
+
154 0.622900
|
185 |
+
155 0.652500
|
186 |
+
156 0.649200
|
187 |
+
157 0.546700
|
188 |
+
158 0.521600
|
189 |
+
159 0.522200
|
190 |
+
160 0.589400
|
191 |
+
161 0.552600
|
192 |
+
162 0.630700
|
193 |
+
163 0.595600
|
194 |
+
164 0.614300
|
195 |
+
165 0.489400
|
196 |
+
166 0.634500
|
197 |
+
167 0.620800
|
198 |
+
168 0.618600
|
199 |
+
169 0.637900
|
200 |
+
170 0.553900
|
201 |
+
171 0.656000
|
202 |
+
172 0.644000
|
203 |
+
173 0.694300
|
204 |
+
174 0.608900
|
205 |
+
175 0.673000
|
206 |
+
176 0.612500
|
207 |
+
177 0.654200
|
208 |
+
178 0.639200
|
209 |
+
179 0.599100
|
210 |
+
180 0.642100
|
211 |
+
181 0.529700
|
212 |
+
182 0.614000
|
213 |
+
183 0.582900
|
214 |
+
184 0.765100
|
215 |
+
185 0.502700
|
216 |
+
186 0.564300
|
217 |
+
187 0.740200
|
218 |
+
188 0.636100
|
219 |
+
189 0.638800
|
220 |
+
190 0.560100
|
221 |
+
191 0.620000
|
222 |
+
192 0.712800
|
223 |
+
193 0.531000
|
224 |
+
194 0.591600
|
225 |
+
195 0.608600
|
226 |
+
196 0.671800
|
227 |
+
197 0.572900
|
228 |
+
198 0.600900
|
229 |
+
199 0.586800
|
230 |
+
200 0.545900
|
231 |
+
|
232 |
---
|
233 |
base_model: unsloth/llama-3-8b-bnb-4bit
|
234 |
language:
|