Training in progress, epoch 1
Browse files- eval_job_output.txt +6 -132
- logs/events.out.tfevents.1715391511.sphinx2 +3 -0
- model.safetensors +1 -1
- train_job_output.txt +37 -82
- training_args.bin +1 -1
eval_job_output.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
slurm submission log: 2024-05-
|
2 |
created following sbatch script:
|
3 |
|
4 |
###############################
|
@@ -7,13 +7,13 @@ created following sbatch script:
|
|
7 |
|
8 |
#SBATCH --account=nlp
|
9 |
#SBATCH --cpus-per-task=16
|
10 |
-
#SBATCH --dependency=afterok:
|
11 |
#SBATCH --gres=gpu:1
|
12 |
-
#SBATCH --job-name=tthrush-job-
|
13 |
#SBATCH --mem=60G
|
14 |
#SBATCH --nodelist=sphinx2
|
15 |
#SBATCH --open-mode=append
|
16 |
-
#SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/
|
17 |
#SBATCH --partition=sphinx
|
18 |
#SBATCH --time=14-0
|
19 |
|
@@ -24,7 +24,7 @@ created following sbatch script:
|
|
24 |
cd .
|
25 |
|
26 |
# launch commands
|
27 |
-
srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/
|
28 |
|
29 |
###############################
|
30 |
|
@@ -34,133 +34,7 @@ submission to slurm complete!
|
|
34 |
###############################
|
35 |
slurm submission output
|
36 |
|
37 |
-
Submitted batch job
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
###############################
|
42 |
-
|
43 |
-
slurm submission log: 2024-05-09 23:03:14.163910
|
44 |
-
created following sbatch script:
|
45 |
-
|
46 |
-
###############################
|
47 |
-
|
48 |
-
#!/bin/bash
|
49 |
-
|
50 |
-
#SBATCH --account=nlp
|
51 |
-
#SBATCH --cpus-per-task=16
|
52 |
-
#SBATCH --dependency=afterok:7593150
|
53 |
-
#SBATCH --gres=gpu:1
|
54 |
-
#SBATCH --job-name=tthrush-job-4358983
|
55 |
-
#SBATCH --mem=60G
|
56 |
-
#SBATCH --nodelist=sphinx2
|
57 |
-
#SBATCH --open-mode=append
|
58 |
-
#SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_3/pythia-70m_sciq/eval_job_output.txt
|
59 |
-
#SBATCH --partition=sphinx
|
60 |
-
#SBATCH --time=14-0
|
61 |
-
|
62 |
-
# activate your desired anaconda environment
|
63 |
-
. /nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection
|
64 |
-
|
65 |
-
# cd to working directory
|
66 |
-
cd .
|
67 |
-
|
68 |
-
# launch commands
|
69 |
-
srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_3/pythia-70m_sciq,revision=main,dtype=float16,trust_remote_code=True --tasks xnli_en,xnli_fr,sciq,piqa,lambada,arc_easy --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_3/pythia-70m_sciq/perf'
|
70 |
-
|
71 |
-
###############################
|
72 |
-
|
73 |
-
submission to slurm complete!
|
74 |
-
|
75 |
-
|
76 |
-
###############################
|
77 |
-
slurm submission output
|
78 |
-
|
79 |
-
Submitted batch job 7593151
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
###############################
|
84 |
-
|
85 |
-
slurm submission log: 2024-05-10 08:21:53.588187
|
86 |
-
created following sbatch script:
|
87 |
-
|
88 |
-
###############################
|
89 |
-
|
90 |
-
#!/bin/bash
|
91 |
-
|
92 |
-
#SBATCH --account=nlp
|
93 |
-
#SBATCH --cpus-per-task=16
|
94 |
-
#SBATCH --dependency=afterok:7593609
|
95 |
-
#SBATCH --gres=gpu:1
|
96 |
-
#SBATCH --job-name=tthrush-job-2971685
|
97 |
-
#SBATCH --mem=60G
|
98 |
-
#SBATCH --nodelist=sphinx1
|
99 |
-
#SBATCH --open-mode=append
|
100 |
-
#SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_3/pythia-70m_sciq/eval_job_output.txt
|
101 |
-
#SBATCH --partition=sphinx
|
102 |
-
#SBATCH --time=14-0
|
103 |
-
|
104 |
-
# activate your desired anaconda environment
|
105 |
-
. /nlp/scr/tthrush/miniconda3/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection
|
106 |
-
|
107 |
-
# cd to working directory
|
108 |
-
cd .
|
109 |
-
|
110 |
-
# launch commands
|
111 |
-
srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_3/pythia-70m_sciq,revision=main,dtype=float16,trust_remote_code=True --tasks xnli_en,xnli_fr,sciq,piqa,lambada,arc_easy --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_3/pythia-70m_sciq/perf'
|
112 |
-
|
113 |
-
###############################
|
114 |
-
|
115 |
-
submission to slurm complete!
|
116 |
-
|
117 |
-
|
118 |
-
###############################
|
119 |
-
slurm submission output
|
120 |
-
|
121 |
-
Submitted batch job 7593610
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
###############################
|
126 |
-
|
127 |
-
slurm submission log: 2024-05-10 08:23:19.697943
|
128 |
-
created following sbatch script:
|
129 |
-
|
130 |
-
###############################
|
131 |
-
|
132 |
-
#!/bin/bash
|
133 |
-
|
134 |
-
#SBATCH --account=nlp
|
135 |
-
#SBATCH --cpus-per-task=16
|
136 |
-
#SBATCH --dependency=afterok:7593622
|
137 |
-
#SBATCH --gres=gpu:1
|
138 |
-
#SBATCH --job-name=tthrush-job-3736565
|
139 |
-
#SBATCH --mem=60G
|
140 |
-
#SBATCH --nodelist=sphinx2
|
141 |
-
#SBATCH --open-mode=append
|
142 |
-
#SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_3/pythia-70m_sciq/eval_job_output.txt
|
143 |
-
#SBATCH --partition=sphinx
|
144 |
-
#SBATCH --time=14-0
|
145 |
-
|
146 |
-
# activate your desired anaconda environment
|
147 |
-
. /nlp/scr/tthrush/miniconda3/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection
|
148 |
-
|
149 |
-
# cd to working directory
|
150 |
-
cd .
|
151 |
-
|
152 |
-
# launch commands
|
153 |
-
srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_3/pythia-70m_sciq,revision=main,dtype=float16,trust_remote_code=True --tasks xnli_en,xnli_fr,sciq,piqa,lambada,arc_easy --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_3/pythia-70m_sciq/perf'
|
154 |
-
|
155 |
-
###############################
|
156 |
-
|
157 |
-
submission to slurm complete!
|
158 |
-
|
159 |
-
|
160 |
-
###############################
|
161 |
-
slurm submission output
|
162 |
-
|
163 |
-
Submitted batch job 7593623
|
164 |
|
165 |
|
166 |
|
|
|
1 |
+
slurm submission log: 2024-05-10 17:55:25.593370
|
2 |
created following sbatch script:
|
3 |
|
4 |
###############################
|
|
|
7 |
|
8 |
#SBATCH --account=nlp
|
9 |
#SBATCH --cpus-per-task=16
|
10 |
+
#SBATCH --dependency=afterok:7594445
|
11 |
#SBATCH --gres=gpu:1
|
12 |
+
#SBATCH --job-name=tthrush-job-3137191
|
13 |
#SBATCH --mem=60G
|
14 |
#SBATCH --nodelist=sphinx2
|
15 |
#SBATCH --open-mode=append
|
16 |
+
#SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_4/pythia-70m_sciq/eval_job_output.txt
|
17 |
#SBATCH --partition=sphinx
|
18 |
#SBATCH --time=14-0
|
19 |
|
|
|
24 |
cd .
|
25 |
|
26 |
# launch commands
|
27 |
+
srun --unbuffered run_as_child_processes 'lm_eval --model hf --model_args pretrained=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_4/pythia-70m_sciq,revision=main,dtype=float16,trust_remote_code=True --tasks xnli_en,xnli_fr,sciq,piqa,lambada,arc_easy --device cuda --output_path /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_4/pythia-70m_sciq/perf'
|
28 |
|
29 |
###############################
|
30 |
|
|
|
34 |
###############################
|
35 |
slurm submission output
|
36 |
|
37 |
+
Submitted batch job 7594446
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
|
40 |
|
logs/events.out.tfevents.1715391511.sphinx2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9cf07c779e11eb67fd5705198c08cfdaf6d945068b69ce996e27e1a0d6deed3f
|
3 |
+
size 6234
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 281715176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9678958d00a2302dca4e2d5af91226207f76a77e1d57585a36a335cba43f9253
|
3 |
size 281715176
|
train_job_output.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
slurm submission log: 2024-05-10
|
2 |
created following sbatch script:
|
3 |
|
4 |
###############################
|
@@ -7,23 +7,24 @@ created following sbatch script:
|
|
7 |
|
8 |
#SBATCH --account=nlp
|
9 |
#SBATCH --cpus-per-task=16
|
|
|
10 |
#SBATCH --gres=gpu:2
|
11 |
-
#SBATCH --job-name=tthrush-job-
|
12 |
#SBATCH --mem=400G
|
13 |
-
#SBATCH --nodelist=
|
14 |
#SBATCH --open-mode=append
|
15 |
-
#SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/
|
16 |
#SBATCH --partition=sphinx
|
17 |
#SBATCH --time=14-0
|
18 |
|
19 |
# activate your desired anaconda environment
|
20 |
-
. /nlp/scr/tthrush/miniconda3/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection
|
21 |
|
22 |
# cd to working directory
|
23 |
cd .
|
24 |
|
25 |
# launch commands
|
26 |
-
srun --unbuffered run_as_child_processes 'torchrun --master_port 29502 --nproc_per_node=2 train_llm.py --dataset_id /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/
|
27 |
|
28 |
###############################
|
29 |
|
@@ -33,105 +34,59 @@ submission to slurm complete!
|
|
33 |
###############################
|
34 |
slurm submission output
|
35 |
|
36 |
-
Submitted batch job
|
37 |
|
38 |
|
39 |
|
40 |
###############################
|
41 |
|
42 |
-
slurm
|
43 |
-
created following sbatch script:
|
44 |
|
45 |
-
|
|
|
46 |
|
47 |
-
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
#SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_3/pythia-70m_sciq/train_job_output.txt
|
57 |
-
#SBATCH --partition=sphinx
|
58 |
-
#SBATCH --time=14-0
|
59 |
|
60 |
-
|
61 |
-
. /nlp/scr/tthrush/miniconda3/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection
|
62 |
|
63 |
-
|
64 |
-
cd .
|
65 |
|
66 |
-
# launch commands
|
67 |
-
srun --unbuffered run_as_child_processes 'torchrun --master_port 29502 --nproc_per_node=2 train_llm.py --dataset_id /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/train_data_3/sciq --output_dir /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_3/pythia-70m_sciq --output_hub_id pythia-70m_sciq --model_id EleutherAI/pythia-70m --num_train_epochs 1 --learning_rate 1e-3 --warmup_ratio=0.1 --gradient_accumulation_steps 2'
|
68 |
-
|
69 |
-
###############################
|
70 |
-
|
71 |
-
submission to slurm complete!
|
72 |
-
|
73 |
-
|
74 |
-
###############################
|
75 |
-
slurm submission output
|
76 |
-
|
77 |
-
Submitted batch job 7593622
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
###############################
|
82 |
|
83 |
###############################
|
84 |
-
start time: 2024-05-10
|
85 |
machine: sphinx2
|
86 |
conda env: pretraining-coreset-selection
|
87 |
###############################
|
88 |
running following processes
|
89 |
|
90 |
-
torchrun --master_port 29502 --nproc_per_node=2 train_llm.py --dataset_id /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/
|
91 |
|
92 |
|
93 |
###############################
|
94 |
command outputs:
|
95 |
|
96 |
|
97 |
-
[2024-05-10 15:02:45,277] torch.distributed.run: [WARNING]
|
98 |
-
[2024-05-10 15:02:45,277] torch.distributed.run: [WARNING] *****************************************
|
99 |
-
[2024-05-10 15:02:45,277] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
100 |
-
[2024-05-10 15:02:45,277] torch.distributed.run: [WARNING] *****************************************
|
101 |
-
05/10/2024 15:02:52 - INFO - __main__ - Script parameters ScriptArguments(dataset_id='/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/train_data_3/sciq', output_dir='/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_3/pythia-70m_sciq', output_hub_id='pythia-70m_sciq', hf_hub_token=True, model_id='EleutherAI/pythia-70m', per_device_train_batch_size=256, num_train_epochs=1, learning_rate=0.001, gradient_accumulation_steps=2, from_scratch=True, warmup_ratio=0.1, adam_beta1=0.9, adam_beta2=0.95, adam_epsilon=1e-08, weight_decay=0.01, lr_scheduler_type='cosine', local_rank=0, resume_from_checkpoint=False, deepspeed=None, peft=False)
|
102 |
-
05/10/2024 15:02:52 - INFO - __main__ - Script parameters ScriptArguments(dataset_id='/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/train_data_3/sciq', output_dir='/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_3/pythia-70m_sciq', output_hub_id='pythia-70m_sciq', hf_hub_token=True, model_id='EleutherAI/pythia-70m', per_device_train_batch_size=256, num_train_epochs=1, learning_rate=0.001, gradient_accumulation_steps=2, from_scratch=True, warmup_ratio=0.1, adam_beta1=0.9, adam_beta2=0.95, adam_epsilon=1e-08, weight_decay=0.01, lr_scheduler_type='cosine', local_rank=0, resume_from_checkpoint=False, deepspeed=None, peft=False)
|
103 |
-
|
104 |
0%| | 0/763 [00:00<?, ?it/s][rank0]:[W reducer.cpp:1360] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
0%| | 0/143 [00:00<?, ?it/s][rank0]:[W reducer.cpp:1360] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
106 |
[rank1]:[W reducer.cpp:1360] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
107 |
-
|
108 |
0%| | 1/763 [00:45<9:35:14, 45.29s/it]
|
109 |
0%| | 2/763 [00:58<5:37:14, 26.59s/it]
|
110 |
0%| | 3/763 [01:09<4:02:06, 19.11s/it]
|
111 |
1%| | 4/763 [01:15<3:00:21, 14.26s/it]
|
112 |
1%| | 5/763 [01:20<2:18:15, 10.94s/it]
|
113 |
1%| | 6/763 [01:25<1:50:17, 8.74s/it]
|
114 |
1%| | 7/763 [01:28<1:27:09, 6.92s/it]
|
115 |
1%| | 8/763 [01:31<1:10:29, 5.60s/it]
|
116 |
1%| | 9/763 [01:33<56:45, 4.52s/it]
|
117 |
1%|β | 10/763 [01:35<45:43, 3.64s/it]
|
118 |
1%|β | 11/763 [01:36<37:31, 2.99s/it]
|
119 |
2%|β | 12/763 [01:38<31:52, 2.55s/it]
|
120 |
2%|β | 13/763 [01:39<27:34, 2.21s/it]
|
121 |
2%|β | 14/763 [01:40<23:46, 1.90s/it]
|
122 |
2%|β | 15/763 [01:41<20:46, 1.67s/it]
|
123 |
2%|β | 16/763 [01:42<17:33, 1.41s/it]
|
124 |
2%|β | 17/763 [01:43<14:55, 1.20s/it]
|
125 |
2%|β | 18/763 [01:44<13:45, 1.11s/it]
|
126 |
2%|β | 19/763 [01:45<12:10, 1.02it/s]
|
127 |
3%|β | 20/763 [01:45<11:06, 1.12it/s]
|
128 |
3%|β | 21/763 [01:46<10:04, 1.23it/s]
|
129 |
3%|β | 22/763 [01:46<09:17, 1.33it/s]
|
130 |
3%|β | 23/763 [01:47<08:41, 1.42it/s]
|
131 |
3%|β | 24/763 [01:48<08:17, 1.49it/s]
|
132 |
3%|β | 25/763 [01:48<08:02, 1.53it/s]
|
133 |
{'loss': 9.8343, 'grad_norm': 0.9369164705276489, 'learning_rate': 0.0003246753246753247, 'epoch': 0.03}
|
134 |
-
|
135 |
3%|β | 25/763 [01:48<08:02, 1.53it/s]
|
136 |
3%|β | 26/763 [01:49<08:00, 1.53it/s]
|
137 |
4%|β | 27/763 [01:49<07:33, 1.62it/s]
|
138 |
4%|β | 28/763 [01:50<07:18, 1.68it/s]
|
139 |
4%|β | 29/763 [01:51<07:21, 1.66it/s]
|
140 |
4%|β | 30/763 [01:51<07:10, 1.70it/s]
|
141 |
4%|β | 31/763 [01:52<06:59, 1.75it/s]
|
142 |
4%|β | 32/763 [01:52<06:53, 1.77it/s]
|
143 |
4%|β | 33/763 [01:53<06:42, 1.81it/s]
|
144 |
4%|β | 34/763 [01:53<06:35, 1.84it/s]
|
145 |
5%|β | 35/763 [01:54<06:28, 1.87it/s]
|
146 |
5%|β | 36/763 [01:54<06:28, 1.87it/s]
|
147 |
5%|β | 37/763 [01:55<06:24, 1.89it/s]
|
148 |
5%|β | 38/763 [01:55<06:21, 1.90it/s]
|
149 |
5%|β | 39/763 [01:56<06:18, 1.92it/s]
|
150 |
5%|β | 40/763 [01:56<06:14, 1.93it/s]
|
151 |
5%|β | 41/763 [01:57<06:11, 1.94it/s]
|
152 |
6%|β | 42/763 [01:57<06:10, 1.95it/s]
|
153 |
6%|β | 43/763 [01:58<06:09, 1.95it/s]
|
154 |
6%|β | 44/763 [01:58<06:05, 1.97it/s]
|
155 |
6%|β | 45/763 [01:59<06:07, 1.95it/s]
|
156 |
6%|β | 46/763 [01:59<06:06, 1.96it/s]
|
157 |
6%|β | 47/763 [02:00<06:03, 1.97it/s]
|
158 |
6%|β | 48/763 [02:00<06:01, 1.98it/s]
|
159 |
6%|β | 49/763 [02:01<06:00, 1.98it/s]
|
160 |
7%|β | 50/763 [02:01<05:58, 1.99it/s]
|
161 |
{'loss': 7.3748, 'grad_norm': 0.3545825183391571, 'learning_rate': 0.0006493506493506494, 'epoch': 0.07}
|
162 |
-
|
163 |
7%|β | 50/763 [02:01<05:58, 1.99it/s]
|
164 |
7%|β | 51/763 [02:02<05:58, 1.99it/s]
|
165 |
7%|β | 52/763 [02:02<05:57, 1.99it/s]
|
166 |
7%|β | 53/763 [02:03<05:57, 1.99it/s]
|
167 |
7%|β | 54/763 [02:03<05:55, 1.99it/s]
|
168 |
7%|β | 55/763 [02:04<05:54, 2.00it/s]
|
169 |
7%|β | 56/763 [02:04<05:54, 2.00it/s]
|
170 |
7%|β | 57/763 [02:05<05:53, 2.00it/s]
|
171 |
8%|β | 58/763 [02:05<05:53, 1.99it/s]
|
172 |
8%|β | 59/763 [02:06<05:53, 1.99it/s]
|
173 |
8%|β | 60/763 [02:06<05:52, 2.00it/s]
|
174 |
8%|β | 61/763 [02:07<05:52, 1.99it/s]
|
175 |
8%|β | 62/763 [02:07<05:51, 2.00it/s]
|
176 |
8%|β | 63/763 [02:08<05:50, 2.00it/s]
|
177 |
8%|β | 64/763 [02:08<05:48, 2.01it/s]
|
178 |
9%|β | 65/763 [02:09<05:47, 2.01it/s]
|
179 |
9%|β | 66/763 [02:09<05:57, 1.95it/s]
|
180 |
9%|β | 67/763 [02:10<05:53, 1.97it/s]
|
181 |
9%|β | 68/763 [02:10<05:51, 1.98it/s]
|
182 |
9%|β | 69/763 [02:11<05:49, 1.99it/s]
|
183 |
9%|β | 70/763 [02:11<05:47, 1.99it/s]
|
184 |
9%|β | 71/763 [02:12<05:46, 2.00it/s]
|
185 |
9%|β | 72/763 [02:12<05:45, 2.00it/s]
|
186 |
10%|β | 73/763 [02:13<05:43, 2.01it/s]
|
187 |
10%|β | 74/763 [02:13<05:42, 2.01it/s]
|
188 |
10%|β | 75/763 [02:14<05:42, 2.01it/s]
|
189 |
|
190 |
-
|
191 |
10%|β | 75/763 [02:14<05:42, 2.01it/s]
|
192 |
10%|β | 76/763 [02:14<05:41, 2.01it/s]
|
193 |
10%|β | 77/763 [02:15<05:41, 2.01it/s]
|
194 |
10%|β | 78/763 [02:15<05:40, 2.01it/s]
|
195 |
10%|β | 79/763 [02:16<05:39, 2.01it/s]
|
196 |
10%|β | 80/763 [02:16<05:39, 2.01it/s]
|
197 |
11%|β | 81/763 [02:17<05:38, 2.01it/s]
|
198 |
11%|β | 82/763 [02:17<05:38, 2.01it/s]
|
199 |
11%|β | 83/763 [02:18<05:37, 2.01it/s]
|
200 |
11%|β | 84/763 [02:18<05:37, 2.01it/s]
|
201 |
11%|β | 85/763 [02:19<05:37, 2.01it/s]
|
202 |
11%|ββ | 86/763 [02:19<05:36, 2.01it/s]
|
203 |
11%|ββ | 87/763 [02:20<05:34, 2.02it/s]
|
204 |
12%|ββ | 88/763 [02:20<05:34, 2.02it/s]
|
205 |
12%|ββ | 89/763 [02:21<05:34, 2.02it/s]
|
206 |
12%|ββ | 90/763 [02:21<05:33, 2.02it/s]
|
207 |
12%|ββ | 91/763 [02:22<05:32, 2.02it/s]
|
208 |
12%|ββ | 92/763 [02:22<05:31, 2.02it/s]
|
209 |
12%|ββ | 93/763 [02:23<05:32, 2.02it/s]
|
210 |
12%|ββ | 94/763 [02:23<05:31, 2.02it/s]
|
211 |
12%|ββ | 95/763 [02:24<05:30, 2.02it/s]
|
212 |
13%|ββ | 96/763 [02:24<05:30, 2.02it/s]
|
213 |
13%|ββ | 97/763 [02:25<05:30, 2.01it/s]
|
214 |
13%|ββ | 98/763 [02:25<05:29, 2.02it/s]
|
215 |
13%|ββ | 99/763 [02:26<05:29, 2.02it/s]
|
216 |
13%|ββ | 100/763 [02:26<05:32, 2.00it/s]{'loss': 5.0788, 'grad_norm': 0.4961775541305542, 'learning_rate': 0.0009972289418801728, 'epoch': 0.13}
|
217 |
-
|
218 |
|
219 |
13%|ββ | 100/763 [02:26<05:32, 2.00it/s]
|
220 |
13%|ββ | 101/763 [02:27<05:31, 2.00it/s]
|
221 |
13%|ββ | 102/763 [02:27<05:30, 2.00it/s]
|
222 |
13%|ββ | 103/763 [02:28<05:28, 2.01it/s]
|
223 |
14%|ββ | 104/763 [02:28<05:27, 2.01it/s]
|
224 |
14%|ββ | 105/763 [02:29<05:27, 2.01it/s]
|
225 |
14%|ββ | 106/763 [02:29<05:26, 2.01it/s]
|
226 |
14%|ββ | 107/763 [02:30<05:25, 2.01it/s]
|
227 |
14%|ββ | 108/763 [02:30<05:24, 2.02it/s]
|
228 |
14%|ββ | 109/763 [02:31<05:23, 2.02it/s]
|
229 |
14%|ββ | 110/763 [02:31<05:23, 2.02it/s]
|
230 |
15%|ββ | 111/763 [02:32<05:23, 2.02it/s]
|
231 |
15%|ββ | 112/763 [02:32<05:22, 2.02it/s]
|
232 |
15%|ββ | 113/763 [02:33<05:21, 2.02it/s]
|
233 |
15%|ββ | 114/763 [02:33<05:21, 2.02it/s]
|
234 |
15%|ββ | 115/763 [02:34<05:20, 2.02it/s]
|
235 |
15%|ββ | 116/763 [02:34<05:19, 2.02it/s]
|
236 |
15%|ββ | 117/763 [02:35<05:19, 2.02it/s]
|
237 |
15%|ββ | 118/763 [02:35<05:18, 2.02it/s]
|
238 |
16%|ββ | 119/763 [02:36<05:18, 2.02it/s]
|
239 |
16%|ββ | 120/763 [02:36<05:17, 2.02it/s]
|
240 |
16%|ββ | 121/763 [02:37<05:17, 2.02it/s]
|
241 |
16%|ββ | 122/763 [02:37<05:17, 2.02it/s]
|
242 |
16%|ββ | 123/763 [02:38<05:16, 2.02it/s]
|
243 |
16%|ββ | 124/763 [02:38<05:15, 2.02it/s]
|
244 |
16%|ββ | 125/763 [02:39<05:16, 2.02it/s]
|
245 |
{'loss': 4.6084, 'grad_norm': 0.2889716327190399, 'learning_rate': 0.0009879683689693263, 'epoch': 0.16}
|
246 |
-
|
247 |
16%|ββ | 125/763 [02:39<05:16, 2.02it/s]
|
248 |
17%|ββ | 126/763 [02:39<05:15, 2.02it/s]
|
249 |
17%|ββ | 127/763 [02:40<05:15, 2.01it/s]
|
250 |
17%|ββ | 128/763 [02:40<05:15, 2.02it/s]
|
251 |
17%|ββ | 129/763 [02:41<05:14, 2.02it/s]
|
252 |
17%|ββ | 130/763 [02:41<05:13, 2.02it/s]
|
253 |
17%|ββ | 131/763 [02:42<05:12, 2.02it/s]
|
254 |
17%|ββ | 132/763 [02:42<05:12, 2.02it/s]
|
255 |
17%|ββ | 133/763 [02:43<05:12, 2.02it/s]
|
256 |
18%|ββ | 134/763 [02:43<05:11, 2.02it/s]
|
257 |
18%|ββ | 135/763 [02:44<05:11, 2.02it/s]
|
258 |
18%|ββ | 136/763 [02:44<05:10, 2.02it/s]
|
259 |
18%|ββ | 137/763 [02:45<05:09, 2.02it/s]
|
260 |
18%|ββ | 138/763 [02:45<05:09, 2.02it/s]
|
261 |
18%|ββ | 139/763 [02:46<05:08, 2.02it/s]
|
262 |
18%|ββ | 140/763 [02:46<05:08, 2.02it/s]
|
263 |
18%|ββ | 141/763 [02:47<05:07, 2.02it/s]
|
264 |
19%|ββ | 142/763 [02:47<05:07, 2.02it/s]
|
265 |
19%|ββ | 143/763 [02:48<05:06, 2.02it/s]
|
266 |
19%|ββ | 144/763 [02:48<05:05, 2.02it/s]
|
267 |
19%|ββ | 145/763 [02:49<05:05, 2.02it/s]
|
268 |
19%|ββ | 146/763 [02:49<05:04, 2.02it/s]
|
269 |
19%|ββ | 147/763 [02:50<05:04, 2.02it/s]
|
270 |
19%|ββ | 148/763 [02:50<05:04, 2.02it/s]
|
271 |
20%|ββ | 149/763 [02:51<05:03, 2.02it/s]
|
272 |
20%|ββ | 150/763 [02:51<05:03, 2.02it/s]{'loss': 4.2413, 'grad_norm': 0.44144755601882935, 'learning_rate': 0.0009723185625357323, 'epoch': 0.2}
|
273 |
-
|
274 |
|
275 |
20%|ββ | 150/763 [02:51<05:03, 2.02it/s]
|
276 |
20%|ββ | 151/763 [02:52<05:03, 2.02it/s]
|
277 |
20%|ββ | 152/763 [02:52<05:02, 2.02it/s]
|
278 |
20%|ββ | 153/763 [02:53<05:01, 2.02it/s]
|
279 |
20%|ββ | 154/763 [02:53<05:01, 2.02it/s]
|
280 |
20%|ββ | 155/763 [02:54<05:00, 2.02it/s]
|
281 |
20%|ββ | 156/763 [02:54<05:00, 2.02it/s]
|
282 |
21%|ββ | 157/763 [02:55<04:59, 2.02it/s]
|
283 |
21%|ββ | 158/763 [02:55<04:59, 2.02it/s]
|
284 |
21%|ββ | 159/763 [02:56<04:58, 2.02it/s]
|
285 |
21%|ββ | 160/763 [02:56<04:58, 2.02it/s]
|
286 |
21%|ββ | 161/763 [02:57<04:57, 2.02it/s]
|
287 |
21%|ββ | 162/763 [02:57<04:57, 2.02it/s]
|
288 |
21%|βββ | 163/763 [02:58<04:57, 2.02it/s]
|
289 |
21%|βββ | 164/763 [02:58<04:56, 2.02it/s]
|
290 |
22%|βββ | 165/763 [02:59<04:55, 2.02it/s]
|
291 |
22%|βββ | 166/763 [02:59<04:55, 2.02it/s]
|
292 |
22%|βββ | 167/763 [03:00<04:54, 2.02it/s]
|
293 |
22%|βββ | 168/763 [03:00<04:54, 2.02it/s]
|
294 |
22%|βββ | 169/763 [03:01<04:53, 2.02it/s]
|
295 |
22%|βββ | 170/763 [03:01<04:52, 2.02it/s]
|
296 |
22%|βββ | 171/763 [03:02<04:52, 2.02it/s]
|
297 |
23%|βββ | 172/763 [03:02<04:51, 2.02it/s]
|
298 |
23%|βββ | 173/763 [03:03<04:51, 2.02it/s]
|
299 |
23%|βββ | 174/763 [03:03<04:51, 2.02it/s]
|
300 |
23%|βββ | 175/763 [03:04<04:50, 2.02it/s]{'loss': 3.9435, 'grad_norm': 0.39299631118774414, 'learning_rate': 0.0009504844339512095, 'epoch': 0.23}
|
301 |
-
|
302 |
|
303 |
23%|βββ | 175/763 [03:04<04:50, 2.02it/s]
|
304 |
23%|βββ | 176/763 [03:04<04:50, 2.02it/s]
|
305 |
23%|βββ | 177/763 [03:04<04:50, 2.02it/s]
|
306 |
23%|βββ | 178/763 [03:05<04:49, 2.02it/s]
|
307 |
23%|βββ | 179/763 [03:05<04:48, 2.02it/s]
|
308 |
24%|βββ | 180/763 [03:06<04:48, 2.02it/s]
|
309 |
24%|βββ | 181/763 [03:06<04:47, 2.02it/s]
|
310 |
24%|βββ | 182/763 [03:07<04:47, 2.02it/s]
|
311 |
24%|βββ | 183/763 [03:07<04:46, 2.02it/s]
|
312 |
24%|βββ | 184/763 [03:08<04:46, 2.02it/s]
|
313 |
24%|βββ | 185/763 [03:08<04:46, 2.02it/s]
|
314 |
24%|βββ | 186/763 [03:09<04:45, 2.02it/s]
|
315 |
25%|βββ | 187/763 [03:09<04:45, 2.02it/s]
|
316 |
25%|βββ | 188/763 [03:10<04:44, 2.02it/s]
|
317 |
25%|βββ | 189/763 [03:10<04:43, 2.02it/s]
|
318 |
25%|βββ | 190/763 [03:11<04:43, 2.02it/s]
|
319 |
25%|βββ | 191/763 [03:11<04:42, 2.02it/s]
|
320 |
25%|βββ | 192/763 [03:12<04:42, 2.02it/s]
|
321 |
25%|βββ | 193/763 [03:12<04:41, 2.02it/s]
|
322 |
25%|βββ | 194/763 [03:13<04:41, 2.02it/s]
|
323 |
26%|βββ | 195/763 [03:13<04:41, 2.02it/s]
|
324 |
26%|βββ | 196/763 [03:14<04:40, 2.02it/s]
|
325 |
26%|βββ | 197/763 [03:14<04:39, 2.02it/s]
|
326 |
26%|βββ | 198/763 [03:15<04:39, 2.02it/s]
|
327 |
26%|βββ | 199/763 [03:15<04:38, 2.02it/s]
|
328 |
26%|βββ | 200/763 [03:16<04:38, 2.02it/s]{'loss': 3.6901, 'grad_norm': 0.34031203389167786, 'learning_rate': 0.0009227518692591244, 'epoch': 0.26}
|
329 |
-
|
330 |
|
331 |
26%|βββ | 200/763 [03:16<04:38, 2.02it/s]
|
332 |
26%|βββ | 201/763 [03:16<04:38, 2.02it/s]
|
333 |
26%|βββ | 202/763 [03:17<04:37, 2.02it/s]
|
334 |
27%|βββ | 203/763 [03:17<04:36, 2.02it/s]
|
335 |
27%|βββ | 204/763 [03:18<04:36, 2.02it/s]
|
336 |
27%|βββ | 205/763 [03:18<04:35, 2.03it/s]
|
337 |
27%|βββ | 206/763 [03:19<04:35, 2.03it/s]
|
338 |
27%|βββ | 207/763 [03:19<04:34, 2.02it/s]
|
339 |
27%|βββ | 208/763 [03:20<04:34, 2.02it/s]
|
340 |
27%|βββ | 209/763 [03:20<04:33, 2.02it/s]
|
341 |
28%|βββ | 210/763 [03:21<04:33, 2.02it/s]
|
342 |
28%|βββ | 211/763 [03:21<04:32, 2.02it/s]
|
343 |
28%|βββ | 212/763 [03:22<04:32, 2.02it/s]
|
344 |
28%|βββ | 213/763 [03:22<04:31, 2.02it/s]
|
345 |
28%|βββ | 214/763 [03:23<04:31, 2.02it/s]
|
346 |
28%|βββ | 215/763 [03:23<04:30, 2.03it/s]
|
347 |
28%|βββ | 216/763 [03:24<04:30, 2.02it/s]
|
348 |
28%|βββ | 217/763 [03:24<04:29, 2.03it/s]
|
349 |
29%|βββ | 218/763 [03:25<04:29, 2.03it/s]
|
350 |
29%|βββ | 219/763 [03:25<04:28, 2.03it/s]
|
351 |
29%|βββ | 220/763 [03:26<04:28, 2.03it/s]
|
352 |
29%|βββ | 221/763 [03:26<04:27, 2.02it/s]
|
353 |
29%|βββ | 222/763 [03:27<04:26, 2.03it/s]
|
354 |
29%|βββ | 223/763 [03:27<04:26, 2.03it/s]
|
355 |
29%|βββ | 224/763 [03:28<04:26, 2.02it/s]
|
356 |
29%|βββ | 225/763 [03:28<04:25, 2.02it/s]{'loss': 3.4966, 'grad_norm': 0.37306490540504456, 'learning_rate': 0.0008894839859139472, 'epoch': 0.29}
|
357 |
-
|
358 |
|
359 |
29%|βββ | 225/763 [03:28<04:25, 2.02it/s]
|
360 |
30%|βββ | 226/763 [03:29<04:25, 2.02it/s]
|
361 |
30%|βββ | 227/763 [03:29<04:24, 2.02it/s]
|
362 |
30%|βββ | 228/763 [03:30<04:24, 2.02it/s]
|
363 |
30%|βββ | 229/763 [03:30<04:23, 2.02it/s]
|
364 |
30%|βββ | 230/763 [03:31<04:23, 2.02it/s]
|
365 |
30%|βββ | 231/763 [03:31<04:22, 2.03it/s]
|
366 |
30%|βββ | 232/763 [03:32<04:22, 2.02it/s]
|
367 |
31%|βββ | 233/763 [03:32<04:22, 2.02it/s]
|
368 |
31%|βββ | 234/763 [03:33<04:21, 2.02it/s]
|
369 |
31%|βββ | 235/763 [03:33<04:20, 2.02it/s]
|
370 |
31%|βββ | 236/763 [03:34<04:20, 2.02it/s]
|
371 |
31%|βββ | 237/763 [03:34<04:20, 2.02it/s]
|
372 |
31%|βββ | 238/763 [03:35<04:19, 2.02it/s]
|
373 |
31%|ββββ | 239/763 [03:35<04:18, 2.02it/s]
|
374 |
31%|ββββ | 240/763 [03:36<04:18, 2.02it/s]
|
375 |
32%|ββββ | 241/763 [03:36<04:17, 2.02it/s]
|
376 |
32%|ββββ | 242/763 [03:37<04:17, 2.02it/s]
|
377 |
32%|ββββ | 243/763 [03:37<04:17, 2.02it/s]
|
378 |
32%|ββββ | 244/763 [03:38<04:16, 2.02it/s]
|
379 |
32%|ββββ | 245/763 [03:38<04:16, 2.02it/s]
|
380 |
32%|ββββ | 246/763 [03:39<04:15, 2.02it/s]
|
381 |
32%|ββββ | 247/763 [03:39<04:15, 2.02it/s]
|
382 |
33%|ββββ | 248/763 [03:40<04:14, 2.02it/s]
|
383 |
33%|ββββ | 249/763 [03:40<04:14, 2.02it/s]
|
384 |
33%|ββββ | 250/763 [03:41<04:13, 2.03it/s]{'loss': 3.3449, 'grad_norm': 0.4239332973957062, 'learning_rate': 0.0008511163782882168, 'epoch': 0.33}
|
385 |
-
|
386 |
|
387 |
33%|ββββ | 250/763 [03:41<04:13, 2.03it/s]
|
388 |
33%|ββββ | 251/763 [03:41<04:13, 2.02it/s]
|
389 |
33%|ββββ | 252/763 [03:42<04:12, 2.02it/s]
|
390 |
33%|ββββ | 253/763 [03:42<04:12, 2.02it/s]
|
391 |
33%|ββββ | 254/763 [03:43<04:11, 2.02it/s]
|
392 |
33%|ββββ | 255/763 [03:43<04:11, 2.02it/s]
|
393 |
34%|ββββ | 256/763 [03:44<04:10, 2.02it/s]
|
394 |
34%|ββββ | 257/763 [03:44<04:09, 2.02it/s]
|
395 |
34%|ββββ | 258/763 [03:45<04:09, 2.03it/s]
|
396 |
34%|ββββ | 259/763 [03:45<04:08, 2.03it/s]
|
397 |
34%|ββββ | 260/763 [03:46<04:08, 2.02it/s]
|
398 |
34%|ββββ | 261/763 [03:46<04:08, 2.02it/s]
|
399 |
34%|ββββ | 262/763 [03:47<04:07, 2.02it/s]
|
400 |
34%|ββββ | 263/763 [03:47<04:07, 2.02it/s]
|
401 |
35%|ββββ | 264/763 [03:47<04:06, 2.02it/s]
|
402 |
35%|ββββ | 265/763 [03:48<04:06, 2.02it/s]
|
403 |
35%|ββββ | 266/763 [03:48<04:05, 2.02it/s]
|
404 |
35%|ββββ | 267/763 [03:49<04:05, 2.02it/s]
|
405 |
35%|ββββ | 268/763 [03:49<04:04, 2.02it/s]
|
406 |
35%|ββββ | 269/763 [03:50<04:04, 2.02it/s]
|
407 |
35%|ββββ | 270/763 [03:50<04:03, 2.02it/s]
|
408 |
36%|ββββ | 271/763 [03:51<04:03, 2.02it/s]
|
409 |
36%|ββββ | 272/763 [03:51<04:02, 2.02it/s]
|
410 |
36%|ββββ | 273/763 [03:52<04:02, 2.02it/s]
|
411 |
36%|ββββ | 274/763 [03:52<04:01, 2.02it/s]
|
412 |
36%|ββββ | 275/763 [03:53<04:01, 2.02it/s]
|
413 |
|
414 |
-
|
415 |
36%|ββββ | 275/763 [03:53<04:01, 2.02it/s]
|
416 |
36%|ββββ | 276/763 [03:53<04:00, 2.02it/s]
|
417 |
36%|ββββ | 277/763 [03:54<04:00, 2.02it/s]
|
418 |
36%|ββββ | 278/763 [03:54<03:59, 2.02it/s]
|
419 |
37%|ββββ | 279/763 [03:55<03:59, 2.02it/s]
|
420 |
37%|ββββ | 280/763 [03:55<03:58, 2.02it/s]
|
421 |
37%|ββββ | 281/763 [03:56<03:58, 2.02it/s]
|
422 |
37%|ββββ | 282/763 [03:56<03:57, 2.02it/s]
|
423 |
37%|ββββ | 283/763 [03:57<03:57, 2.02it/s]
|
424 |
37%|ββββ | 284/763 [03:57<03:56, 2.02it/s]
|
425 |
37%|ββββ | 285/763 [03:58<03:56, 2.02it/s]
|
426 |
37%|ββββ | 286/763 [03:58<03:55, 2.02it/s]
|
427 |
38%|ββββ | 287/763 [03:59<03:55, 2.02it/s]
|
428 |
38%|ββββ | 288/763 [03:59<03:55, 2.02it/s]
|
429 |
38%|ββββ | 289/763 [04:00<03:54, 2.02it/s]
|
430 |
38%|ββββ | 290/763 [04:00<03:54, 2.02it/s]
|
431 |
38%|ββββ | 291/763 [04:01<03:53, 2.02it/s]
|
432 |
38%|ββββ | 292/763 [04:01<03:53, 2.02it/s]
|
433 |
38%|ββββ | 293/763 [04:02<03:52, 2.02it/s]
|
434 |
39%|ββββ | 294/763 [04:02<03:51, 2.02it/s]
|
435 |
39%|ββββ | 295/763 [04:03<03:51, 2.02it/s]
|
436 |
39%|ββββ | 296/763 [04:03<03:50, 2.02it/s]
|
437 |
39%|ββββ | 297/763 [04:04<03:50, 2.02it/s]
|
438 |
39%|ββββ | 298/763 [04:04<03:49, 2.02it/s]
|
439 |
39%|ββββ | 299/763 [04:05<03:49, 2.02it/s]
|
440 |
39%|ββββ | 300/763 [04:05<03:48, 2.02it/s]
|
441 |
{'loss': 3.0883, 'grad_norm': 0.3450576961040497, 'learning_rate': 0.0007611516571398591, 'epoch': 0.39}
|
442 |
-
|
443 |
39%|ββββ | 300/763 [04:05<03:48, 2.02it/s]
|
444 |
39%|ββββ | 301/763 [04:06<03:48, 2.02it/s]
|
445 |
40%|ββββ | 302/763 [04:06<03:48, 2.02it/s]
|
446 |
40%|ββββ | 303/763 [04:07<03:47, 2.02it/s]
|
447 |
40%|ββββ | 304/763 [04:07<03:47, 2.02it/s]
|
448 |
40%|ββββ | 305/763 [04:08<03:46, 2.02it/s]
|
449 |
40%|ββββ | 306/763 [04:08<03:45, 2.02it/s]
|
450 |
40%|ββββ | 307/763 [04:09<03:45, 2.02it/s]
|
451 |
40%|ββββ | 308/763 [04:09<03:44, 2.02it/s]
|
452 |
40%|ββββ | 309/763 [04:10<03:44, 2.02it/s]
|
453 |
41%|ββββ | 310/763 [04:10<03:43, 2.02it/s]
|
454 |
41%|ββββ | 311/763 [04:11<03:43, 2.02it/s]
|
455 |
41%|ββββ | 312/763 [04:11<03:42, 2.02it/s]
|
456 |
41%|ββββ | 313/763 [04:12<03:42, 2.02it/s]
|
457 |
41%|ββββ | 314/763 [04:12<03:41, 2.02it/s]
|
458 |
41%|βββββ | 315/763 [04:13<03:41, 2.02it/s]
|
459 |
41%|βββββ | 316/763 [04:13<03:40, 2.02it/s]
|
460 |
42%|οΏ½οΏ½οΏ½ββββ | 317/763 [04:14<03:40, 2.02it/s]
|
461 |
42%|βββββ | 318/763 [04:14<03:39, 2.02it/s]
|
462 |
42%|βββββ | 319/763 [04:15<03:39, 2.02it/s]
|
463 |
42%|βββββ | 320/763 [04:15<03:38, 2.02it/s]
|
464 |
42%|βββββ | 321/763 [04:16<03:38, 2.02it/s]
|
465 |
42%|βββββ | 322/763 [04:16<03:38, 2.02it/s]
|
466 |
42%|βββββ | 323/763 [04:17<03:37, 2.02it/s]
|
467 |
42%|βββββ | 324/763 [04:17<03:36, 2.02it/s]
|
468 |
43%|βββββ | 325/763 [04:18<03:36, 2.03it/s]
|
469 |
|
470 |
-
|
471 |
43%|βββββ | 325/763 [04:18<03:36, 2.03it/s]
|
472 |
43%|βββββ | 326/763 [04:18<03:36, 2.02it/s]
|
473 |
43%|βββββ | 327/763 [04:19<03:35, 2.02it/s]
|
474 |
43%|βββββ | 328/763 [04:19<03:35, 2.02it/s]
|
475 |
43%|βββββ | 329/763 [04:20<03:34, 2.02it/s]
|
476 |
43%|βββββ | 330/763 [04:20<03:33, 2.02it/s]
|
477 |
43%|βββββ | 331/763 [04:21<03:33, 2.02it/s]
|
478 |
44%|βββββ | 332/763 [04:21<03:33, 2.02it/s]
|
479 |
44%|βββββ | 333/763 [04:22<03:32, 2.02it/s]
|
480 |
44%|βββββ | 334/763 [04:22<03:31, 2.02it/s]
|
481 |
44%|βββββ | 335/763 [04:23<03:31, 2.02it/s]
|
482 |
44%|βββββ | 336/763 [04:23<03:31, 2.02it/s]
|
483 |
44%|βββββ | 337/763 [04:24<03:30, 2.02it/s]
|
484 |
44%|βββββ | 338/763 [04:24<03:30, 2.02it/s]
|
485 |
44%|βββββ | 339/763 [04:25<03:29, 2.02it/s]
|
486 |
45%|βββββ | 340/763 [04:25<03:29, 2.02it/s]
|
487 |
45%|βββββ | 341/763 [04:26<03:28, 2.02it/s]
|
488 |
45%|βββββ | 342/763 [04:26<03:28, 2.02it/s]
|
489 |
45%|βββββ | 343/763 [04:27<03:27, 2.02it/s]
|
490 |
45%|βββββ | 344/763 [04:27<03:26, 2.02it/s]
|
491 |
45%|βββββ | 345/763 [04:28<03:26, 2.03it/s]
|
492 |
45%|βββββ | 346/763 [04:28<03:26, 2.02it/s]
|
493 |
45%|βββββ | 347/763 [04:29<03:25, 2.02it/s]
|
494 |
46%|βββββ | 348/763 [04:29<03:25, 2.02it/s]
|
495 |
46%|βββββ | 349/763 [04:30<03:24, 2.02it/s]
|
496 |
46%|βββββ | 350/763 [04:30<03:24, 2.02it/s]{'loss': 2.9083, 'grad_norm': 0.3510383367538452, 'learning_rate': 0.0006575541090118104, 'epoch': 0.46}
|
497 |
-
|
498 |
|
499 |
46%|βββββ | 350/763 [04:30<03:24, 2.02it/s]
|
500 |
46%|βββββ | 351/763 [04:31<03:24, 2.02it/s]
|
501 |
46%|βββββ | 352/763 [04:31<03:23, 2.02it/s]
|
502 |
46%|βββββ | 353/763 [04:32<03:22, 2.02it/s]
|
503 |
46%|βββββ | 354/763 [04:32<03:22, 2.02it/s]
|
504 |
47%|βββββ | 355/763 [04:32<03:21, 2.02it/s]
|
505 |
47%|βββββ | 356/763 [04:33<03:21, 2.02it/s]
|
506 |
47%|βββββ | 357/763 [04:33<03:20, 2.02it/s]
|
507 |
47%|βββββ | 358/763 [04:34<03:20, 2.02it/s]
|
508 |
47%|βββββ | 359/763 [04:34<03:19, 2.02it/s]
|
509 |
47%|βββββ | 360/763 [04:35<03:19, 2.02it/s]
|
510 |
47%|βββββ | 361/763 [04:35<03:18, 2.02it/s]
|
511 |
47%|βββββ | 362/763 [04:36<03:18, 2.02it/s]
|
512 |
48%|βββββ | 363/763 [04:36<03:17, 2.02it/s]
|
513 |
48%|βββββ | 364/763 [04:37<03:17, 2.02it/s]
|
514 |
48%|βββββ | 365/763 [04:37<03:16, 2.02it/s]
|
515 |
48%|βββββ | 366/763 [04:38<03:16, 2.02it/s]
|
516 |
48%|βββββ | 367/763 [04:38<03:15, 2.02it/s]
|
517 |
48%|βββββ | 368/763 [04:39<03:15, 2.02it/s]
|
518 |
48%|βββββ | 369/763 [04:39<03:14, 2.02it/s]
|
519 |
48%|βββββ | 370/763 [04:40<03:14, 2.02it/s]
|
520 |
49%|βββββ | 371/763 [04:40<03:13, 2.02it/s]
|
521 |
49%|βββββ | 372/763 [04:41<03:13, 2.02it/s]
|
522 |
49%|βββββ | 373/763 [04:41<03:12, 2.03it/s]
|
523 |
49%|βββββ | 374/763 [04:42<03:12, 2.03it/s]
|
524 |
49%|βββββ | 375/763 [04:42<03:11, 2.02it/s]{'loss': 2.8176, 'grad_norm': 0.3813249170780182, 'learning_rate': 0.0006023127766192824, 'epoch': 0.49}
|
525 |
-
|
526 |
|
527 |
49%|βββββ | 375/763 [04:42<03:11, 2.02it/s]
|
528 |
49%|βββββ | 376/763 [04:43<03:11, 2.02it/s]
|
529 |
49%|βββββ | 377/763 [04:43<03:10, 2.02it/s]
|
530 |
50%|βββββ | 378/763 [04:44<03:10, 2.02it/s]
|
531 |
50%|βββββ | 379/763 [04:44<03:09, 2.02it/s]
|
532 |
50%|βββββ | 380/763 [04:45<03:09, 2.03it/s]
|
533 |
50%|βββββ | 381/763 [04:45<03:08, 2.02it/s]
|
534 |
50%|βββββ | 382/763 [04:46<03:08, 2.02it/s]
|
535 |
50%|βββββ | 383/763 [04:46<03:07, 2.02it/s]
|
536 |
50%|βββββ | 384/763 [04:47<03:07, 2.02it/s]
|
537 |
50%|βββββ | 385/763 [04:47<03:07, 2.02it/s]
|
538 |
51%|βββββ | 386/763 [04:48<03:06, 2.02it/s]
|
539 |
51%|βββββ | 387/763 [04:48<03:05, 2.02it/s]
|
540 |
51%|βββββ | 388/763 [04:49<03:05, 2.02it/s]
|
541 |
51%|βββββ | 389/763 [04:49<03:04, 2.03it/s]
|
542 |
51%|βββββ | 390/763 [04:50<03:04, 2.02it/s]
|
543 |
51%|βββββ | 391/763 [04:50<03:03, 2.03it/s]
|
544 |
51%|ββββββ | 392/763 [04:51<03:03, 2.02it/s]
|
545 |
52%|ββββββ | 393/763 [04:51<03:02, 2.02it/s]
|
546 |
52%|ββββββ | 394/763 [04:52<03:02, 2.02it/s]
|
547 |
52%|ββββββ | 395/763 [04:52<03:02, 2.02it/s]
|
548 |
52%|ββββββ | 396/763 [04:53<03:01, 2.02it/s]
|
549 |
52%|ββββββ | 397/763 [04:53<03:00, 2.02it/s]
|
550 |
52%|ββββββ | 398/763 [04:54<03:00, 2.02it/s]
|
551 |
52%|ββββββ | 399/763 [04:54<02:59, 2.02it/s]
|
552 |
52%|ββββββ | 400/763 [04:55<02:59, 2.02it/s]{'loss': 2.7491, 'grad_norm': 0.34457528591156006, 'learning_rate': 0.0005457318077590012, 'epoch': 0.52}
|
553 |
-
|
554 |
|
555 |
52%|ββββββ | 400/763 [04:55<02:59, 2.02it/s]
|
556 |
53%|ββββββ | 401/763 [04:55<02:59, 2.02it/s]
|
557 |
53%|ββββββ | 402/763 [04:56<02:58, 2.02it/s]
|
558 |
53%|ββββββ | 403/763 [04:56<02:58, 2.02it/s]
|
559 |
53%|ββββββ | 404/763 [04:57<02:57, 2.02it/s]
|
560 |
53%|ββββββ | 405/763 [04:57<02:57, 2.02it/s]
|
561 |
53%|ββββββ | 406/763 [04:58<02:56, 2.02it/s]
|
562 |
53%|ββββββ | 407/763 [04:58<02:56, 2.02it/s]
|
563 |
53%|ββββββ | 408/763 [04:59<02:55, 2.02it/s]
|
564 |
54%|ββββββ | 409/763 [04:59<02:55, 2.02it/s]
|
565 |
54%|ββββββ | 410/763 [05:00<02:54, 2.02it/s]
|
566 |
54%|ββββββ | 411/763 [05:00<02:54, 2.02it/s]
|
567 |
54%|ββββββ | 412/763 [05:01<02:53, 2.02it/s]
|
568 |
54%|ββββββ | 413/763 [05:01<02:53, 2.02it/s]
|
569 |
54%|ββββββ | 414/763 [05:02<02:52, 2.02it/s]
|
570 |
54%|ββββββ | 415/763 [05:02<02:51, 2.02it/s]
|
571 |
55%|ββββββ | 416/763 [05:03<02:51, 2.02it/s]
|
572 |
55%|ββββββ | 417/763 [05:03<02:50, 2.03it/s]
|
573 |
55%|ββββββ | 418/763 [05:04<02:50, 2.03it/s]
|
574 |
55%|ββββββ | 419/763 [05:04<02:49, 2.03it/s]
|
575 |
55%|ββββββ | 420/763 [05:05<02:49, 2.03it/s]
|
576 |
55%|ββββββ | 421/763 [05:05<02:48, 2.03it/s]
|
577 |
55%|ββββββ | 422/763 [05:06<02:48, 2.03it/s]
|
578 |
55%|ββββββ | 423/763 [05:06<02:47, 2.02it/s]
|
579 |
56%|ββββββ | 424/763 [05:07<02:47, 2.03it/s]
|
580 |
56%|ββββββ | 425/763 [05:07<02:47, 2.02it/s]{'loss': 2.6858, 'grad_norm': 0.365237295627594, 'learning_rate': 0.0004885520476290998, 'epoch': 0.56}
|
581 |
-
|
582 |
|
583 |
56%|ββββββ | 425/763 [05:07<02:47, 2.02it/s]
|
584 |
56%|ββββββ | 426/763 [05:08<02:46, 2.02it/s]
|
585 |
56%|ββββββ | 427/763 [05:08<02:46, 2.02it/s]
|
586 |
56%|ββββββ | 428/763 [05:09<02:45, 2.02it/s]
|
587 |
56%|ββββββ | 429/763 [05:09<02:45, 2.02it/s]
|
588 |
56%|ββββββ | 430/763 [05:10<02:44, 2.02it/s]
|
589 |
56%|ββββββ | 431/763 [05:10<02:43, 2.03it/s]
|
590 |
57%|ββββββ | 432/763 [05:11<02:43, 2.03it/s]
|
591 |
57%|ββββββ | 433/763 [05:11<02:42, 2.02it/s]
|
592 |
57%|ββββββ | 434/763 [05:12<02:42, 2.02it/s]
|
593 |
57%|ββββββ | 435/763 [05:12<02:42, 2.02it/s]
|
594 |
57%|ββββββ | 436/763 [05:13<02:41, 2.02it/s]
|
595 |
57%|ββββββ | 437/763 [05:13<02:40, 2.03it/s]
|
596 |
57%|ββββββ | 438/763 [05:14<02:40, 2.02it/s]
|
597 |
58%|ββββββ | 439/763 [05:14<02:40, 2.02it/s]
|
598 |
58%|ββββββ | 440/763 [05:15<02:39, 2.02it/s]
|
599 |
58%|ββββββ | 441/763 [05:15<02:38, 2.03it/s]
|
600 |
58%|ββββββ | 442/763 [05:15<02:38, 2.02it/s]
|
601 |
58%|ββββββ | 443/763 [05:16<02:37, 2.03it/s]
|
602 |
58%|ββββββ | 444/763 [05:16<02:37, 2.02it/s]
|
603 |
58%|ββββββ | 445/763 [05:17<02:36, 2.03it/s]
|
604 |
58%|ββββββ | 446/763 [05:17<02:36, 2.03it/s]
|
605 |
59%|ββββββ | 447/763 [05:18<02:36, 2.03it/s]
|
606 |
59%|ββββββ | 448/763 [05:18<02:35, 2.03it/s]
|
607 |
59%|ββββββ | 449/763 [05:19<02:34, 2.03it/s]
|
608 |
59%|ββββββ | 450/763 [05:19<02:34, 2.03it/s]{'loss': 2.6339, 'grad_norm': 0.3599332869052887, 'learning_rate': 0.00043152218172535383, 'epoch': 0.59}
|
609 |
-
|
610 |
|
611 |
59%|ββββββ | 450/763 [05:19<02:34, 2.03it/s]
|
612 |
59%|ββββββ | 451/763 [05:20<02:34, 2.02it/s]
|
613 |
59%|ββββββ | 452/763 [05:20<02:33, 2.02it/s]
|
614 |
59%|ββββββ | 453/763 [05:21<02:33, 2.02it/s]
|
615 |
60%|ββββββ | 454/763 [05:21<02:32, 2.02it/s]
|
616 |
60%|ββββββ | 455/763 [05:22<02:32, 2.02it/s]
|
617 |
60%|ββββββ | 456/763 [05:22<02:31, 2.02it/s]
|
618 |
60%|ββββββ | 457/763 [05:23<02:31, 2.02it/s]
|
619 |
60%|ββββββ | 458/763 [05:23<02:30, 2.03it/s]
|
620 |
60%|ββββββ | 459/763 [05:24<02:30, 2.02it/s]
|
621 |
60%|ββββββ | 460/763 [05:24<02:29, 2.02it/s]
|
622 |
60%|ββββββ | 461/763 [05:25<02:29, 2.02it/s]
|
623 |
61%|ββββββ | 462/763 [05:25<02:28, 2.02it/s]
|
624 |
61%|ββββββ | 463/763 [05:26<02:28, 2.02it/s]
|
625 |
61%|ββββββ | 464/763 [05:26<02:27, 2.02it/s]
|
626 |
61%|ββββββ | 465/763 [05:27<02:27, 2.02it/s]
|
627 |
61%|ββββββ | 466/763 [05:27<02:26, 2.02it/s]
|
628 |
61%|ββββββ | 467/763 [05:28<02:26, 2.02it/s]
|
629 |
61%|βββββββ | 468/763 [05:28<02:25, 2.02it/s]
|
630 |
61%|βββββββ | 469/763 [05:29<02:25, 2.03it/s]
|
631 |
62%|βββββββ | 470/763 [05:29<02:24, 2.02it/s]
|
632 |
62%|βββββββ | 471/763 [05:30<02:24, 2.02it/s]
|
633 |
62%|βββββββ | 472/763 [05:30<02:23, 2.02it/s]
|
634 |
62%|βββββββ | 473/763 [05:31<02:23, 2.02it/s]
|
635 |
62%|βββββββ | 474/763 [05:31<02:22, 2.02it/s]
|
636 |
62%|βββββββ | 475/763 [05:32<02:22, 2.02it/s]{'loss': 2.5753, 'grad_norm': 0.36764204502105713, 'learning_rate': 0.0003753889328974423, 'epoch': 0.62}
|
637 |
-
|
638 |
|
639 |
62%|βββββββ | 475/763 [05:32<02:22, 2.02it/s]
|
640 |
62%|βββββββ | 476/763 [05:32<02:22, 2.01it/s]
|
641 |
63%|βββββββ | 477/763 [05:33<02:21, 2.02it/s]
|
642 |
63%|βββββββ | 478/763 [05:33<02:21, 2.02it/s]
|
643 |
63%|βββββββ | 479/763 [05:34<02:20, 2.02it/s]
|
644 |
63%|βββββββ | 480/763 [05:34<02:20, 2.02it/s]
|
645 |
63%|βββββββ | 481/763 [05:35<02:19, 2.02it/s]
|
646 |
63%|βββββββ | 482/763 [05:35<02:19, 2.02it/s]
|
647 |
63%|βββββββ | 483/763 [05:36<02:18, 2.02it/s]
|
648 |
63%|βββββββ | 484/763 [05:36<02:17, 2.02it/s]
|
649 |
64%|βββββββ | 485/763 [05:37<02:17, 2.02it/s]
|
650 |
64%|βββββββ | 486/763 [05:37<02:16, 2.02it/s]
|
651 |
64%|βββββββ | 487/763 [05:38<02:16, 2.02it/s]
|
652 |
64%|βββββββ | 488/763 [05:38<02:15, 2.02it/s]
|
653 |
64%|βββββββ | 489/763 [05:39<02:15, 2.02it/s]
|
654 |
64%|βββββββ | 490/763 [05:39<02:14, 2.03it/s]
|
655 |
64%|βββββββ | 491/763 [05:40<02:14, 2.02it/s]
|
656 |
64%|βββββββ | 492/763 [05:40<02:13, 2.03it/s]
|
657 |
65%|βββββββ | 493/763 [05:41<02:13, 2.02it/s]
|
658 |
65%|βββββββ | 494/763 [05:41<02:12, 2.03it/s]
|
659 |
65%|βββββββ | 495/763 [05:42<02:12, 2.03it/s]
|
660 |
65%|βββββββ | 496/763 [05:42<02:11, 2.02it/s]
|
661 |
65%|βββββββ | 497/763 [05:43<02:11, 2.03it/s]
|
662 |
65%|βββββββ | 498/763 [05:43<02:10, 2.03it/s]
|
663 |
65%|βββββββ | 499/763 [05:44<02:10, 2.02it/s]
|
664 |
66%|βββββββ | 500/763 [05:44<02:09, 2.02it/s]{'loss': 2.5207, 'grad_norm': 0.35472121834754944, 'learning_rate': 0.00032088728410319416, 'epoch': 0.66}
|
665 |
-
|
666 |
|
667 |
66%|βββββββ | 500/763 [05:44<02:09, 2.02it/s]
|
668 |
66%|βββββββ | 501/763 [05:45<02:09, 2.02it/s]
|
669 |
66%|βββββββ | 502/763 [05:45<02:09, 2.02it/s]
|
670 |
66%|βββββββ | 503/763 [05:46<02:08, 2.02it/s]
|
671 |
66%|βββββββ | 504/763 [05:46<02:08, 2.02it/s]
|
672 |
66%|βββββββ | 505/763 [05:47<02:07, 2.02it/s]
|
673 |
66%|βββββββ | 506/763 [05:47<02:07, 2.02it/s]
|
674 |
66%|βββββββ | 507/763 [05:48<02:06, 2.02it/s]
|
675 |
67%|βββββββ | 508/763 [05:48<02:05, 2.03it/s]
|
676 |
67%|βββββββ | 509/763 [05:49<02:05, 2.03it/s]
|
677 |
67%|βββββββ | 510/763 [05:49<02:04, 2.03it/s]
|
678 |
67%|βββββββ | 511/763 [05:50<02:04, 2.02it/s]
|
679 |
67%|βββββββ | 512/763 [05:50<02:03, 2.02it/s]
|
680 |
67%|βββββββ | 513/763 [05:51<02:03, 2.02it/s]
|
681 |
67%|βββββββ | 514/763 [05:51<02:03, 2.02it/s]
|
682 |
67%|βββββββ | 515/763 [05:52<02:02, 2.02it/s]
|
683 |
68%|βββββββ | 516/763 [05:52<02:01, 2.02it/s]
|
684 |
68%|βββββββ | 517/763 [05:53<02:01, 2.02it/s]
|
685 |
68%|βββββββ | 518/763 [05:53<02:00, 2.03it/s]
|
686 |
68%|βββββββ | 519/763 [05:54<02:00, 2.03it/s]
|
687 |
68%|βββββββ | 520/763 [05:54<01:59, 2.03it/s]
|
688 |
68%|βββββββ | 521/763 [05:55<01:59, 2.03it/s]
|
689 |
68%|βββββββ | 522/763 [05:55<01:59, 2.02it/s]
|
690 |
69%|βββββββ | 523/763 [05:56<01:58, 2.03it/s]
|
691 |
69%|βββββββ | 524/763 [05:56<01:58, 2.02it/s]
|
692 |
69%|βββββββ | 525/763 [05:57<01:57, 2.03it/s]{'loss': 2.4755, 'grad_norm': 0.342868447303772, 'learning_rate': 0.0002687308548795825, 'epoch': 0.69}
|
693 |
-
|
694 |
|
695 |
69%|βββββββ | 525/763 [05:57<01:57, 2.03it/s]
|
696 |
69%|βββββββ | 526/763 [05:57<01:57, 2.02it/s]
|
697 |
69%|βββββββ | 527/763 [05:57<01:56, 2.02it/s]
|
698 |
69%|βββββββ | 528/763 [05:58<01:56, 2.02it/s]
|
699 |
69%|βββββββ | 529/763 [05:58<01:55, 2.03it/s]
|
700 |
69%|βββββββ | 530/763 [05:59<01:55, 2.02it/s]
|
701 |
70%|βββββββ | 531/763 [05:59<01:54, 2.02it/s]
|
702 |
70%|βββββββ | 532/763 [06:00<01:54, 2.02it/s]
|
703 |
70%|βββββοΏ½οΏ½οΏ½β | 533/763 [06:00<01:53, 2.02it/s]
|
704 |
70%|βββββββ | 534/763 [06:01<01:53, 2.02it/s]
|
705 |
70%|βββββββ | 535/763 [06:01<01:52, 2.02it/s]
|
706 |
70%|βββββββ | 536/763 [06:02<01:52, 2.02it/s]
|
707 |
70%|βββββββ | 537/763 [06:02<01:51, 2.02it/s]
|
708 |
71%|βββββββ | 538/763 [06:03<01:51, 2.02it/s]
|
709 |
71%|βββββββ | 539/763 [06:03<01:50, 2.02it/s]
|
710 |
71%|βββββββ | 540/763 [06:04<01:50, 2.02it/s]
|
711 |
71%|βββββββ | 541/763 [06:04<01:49, 2.02it/s]
|
712 |
71%|βββββββ | 542/763 [06:05<01:49, 2.02it/s]
|
713 |
71%|βββββββ | 543/763 [06:05<01:48, 2.02it/s]
|
714 |
71%|ββββββββ | 544/763 [06:06<01:48, 2.02it/s]
|
715 |
71%|ββββββββ | 545/763 [06:06<01:47, 2.02it/s]
|
716 |
72%|ββββββββ | 546/763 [06:07<01:47, 2.02it/s]
|
717 |
72%|ββββββββ | 547/763 [06:07<01:46, 2.02it/s]
|
718 |
72%|ββββββββ | 548/763 [06:08<01:46, 2.02it/s]
|
719 |
72%|ββββββββ | 549/763 [06:08<01:45, 2.02it/s]
|
720 |
72%|ββββββββ | 550/763 [06:09<01:45, 2.02it/s]{'loss': 2.436, 'grad_norm': 0.36740800738334656, 'learning_rate': 0.00021960255753653008, 'epoch': 0.72}
|
721 |
-
|
722 |
|
723 |
72%|ββββββββ | 550/763 [06:09<01:45, 2.02it/s]
|
724 |
72%|ββββββββ | 551/763 [06:09<01:44, 2.02it/s]
|
725 |
72%|ββββββββ | 552/763 [06:10<01:44, 2.02it/s]
|
726 |
72%|ββββββββ | 553/763 [06:10<01:43, 2.02it/s]
|
727 |
73%|ββββββββ | 554/763 [06:11<01:43, 2.02it/s]
|
728 |
73%|ββββββββ | 555/763 [06:11<01:42, 2.02it/s]
|
729 |
73%|ββββββββ | 556/763 [06:12<01:42, 2.02it/s]
|
730 |
73%|ββββββββ | 557/763 [06:12<01:41, 2.02it/s]
|
731 |
73%|ββββββββ | 558/763 [06:13<01:41, 2.02it/s]
|
732 |
73%|ββββββββ | 559/763 [06:13<01:40, 2.02it/s]
|
733 |
73%|ββββββββ | 560/763 [06:14<01:40, 2.02it/s]
|
734 |
74%|ββββββββ | 561/763 [06:14<01:39, 2.02it/s]
|
735 |
74%|ββββββββ | 562/763 [06:15<01:39, 2.02it/s]
|
736 |
74%|ββββββββ | 563/763 [06:15<01:38, 2.02it/s]
|
737 |
74%|ββββββββ | 564/763 [06:16<01:38, 2.02it/s]
|
738 |
74%|ββββββββ | 565/763 [06:16<01:37, 2.02it/s]
|
739 |
74%|ββββββββ | 566/763 [06:17<01:37, 2.02it/s]
|
740 |
74%|ββββββββ | 567/763 [06:17<01:36, 2.02it/s]
|
741 |
74%|ββββββββ | 568/763 [06:18<01:36, 2.02it/s]
|
742 |
75%|ββββββββ | 569/763 [06:18<01:36, 2.02it/s]
|
743 |
75%|ββββββββ | 570/763 [06:19<01:35, 2.02it/s]
|
744 |
75%|ββββββββ | 571/763 [06:19<01:34, 2.02it/s]
|
745 |
75%|ββββββββ | 572/763 [06:20<01:34, 2.02it/s]
|
746 |
75%|ββββββββ | 573/763 [06:20<01:33, 2.02it/s]
|
747 |
75%|ββββββββ | 574/763 [06:21<01:33, 2.02it/s]
|
748 |
75%|ββββββββ | 575/763 [06:21<01:32, 2.02it/s]{'loss': 2.4095, 'grad_norm': 0.3562281131744385, 'learning_rate': 0.00017414565541703342, 'epoch': 0.75}
|
749 |
-
|
750 |
|
751 |
75%|ββββββββ | 575/763 [06:21<01:32, 2.02it/s]
|
752 |
75%|ββββββββ | 576/763 [06:22<01:32, 2.02it/s]
|
753 |
76%|ββββββββ | 577/763 [06:22<01:32, 2.02it/s]
|
754 |
76%|ββββββββ | 578/763 [06:23<01:31, 2.02it/s]
|
755 |
76%|ββββββββ | 579/763 [06:23<01:31, 2.02it/s]
|
756 |
76%|ββββββββ | 580/763 [06:24<01:30, 2.02it/s]
|
757 |
76%|ββββββββ | 581/763 [06:24<01:29, 2.02it/s]
|
758 |
76%|ββββββββ | 582/763 [06:25<01:29, 2.02it/s]
|
759 |
76%|ββββββββ | 583/763 [06:25<01:28, 2.02it/s]
|
760 |
77%|ββββββββ | 584/763 [06:26<01:28, 2.02it/s]
|
761 |
77%|ββββββββ | 585/763 [06:26<01:28, 2.02it/s]
|
762 |
77%|ββββββββ | 586/763 [06:27<01:27, 2.02it/s]
|
763 |
77%|ββββββββ | 587/763 [06:27<01:27, 2.02it/s]
|
764 |
77%|ββββββββ | 588/763 [06:28<01:26, 2.02it/s]
|
765 |
77%|ββββββββ | 589/763 [06:28<01:25, 2.02it/s]
|
766 |
77%|ββββββββ | 590/763 [06:29<01:25, 2.02it/s]
|
767 |
77%|ββββββββ | 591/763 [06:29<01:24, 2.02it/s]
|
768 |
78%|ββββββββ | 592/763 [06:30<01:24, 2.02it/s]
|
769 |
78%|ββββββββ | 593/763 [06:30<01:24, 2.02it/s]
|
770 |
78%|ββββββββ | 594/763 [06:31<01:23, 2.02it/s]
|
771 |
78%|ββββββββ | 595/763 [06:31<01:23, 2.02it/s]
|
772 |
78%|ββββββββ | 596/763 [06:32<01:22, 2.02it/s]
|
773 |
78%|ββββββββ | 597/763 [06:32<01:22, 2.02it/s]
|
774 |
78%|ββββββββ | 598/763 [06:33<01:21, 2.02it/s]
|
775 |
79%|ββββββββ | 599/763 [06:33<01:21, 2.02it/s]
|
776 |
79%|ββββββββ | 600/763 [06:34<01:20, 2.02it/s]
|
777 |
{'loss': 2.3795, 'grad_norm': 0.35245397686958313, 'learning_rate': 0.0001329553403026331, 'epoch': 0.79}
|
778 |
-
|
779 |
79%|ββββββββ | 600/763 [06:34<01:20, 2.02it/s]
|
780 |
79%|ββββββββ | 601/763 [06:34<01:20, 2.02it/s]
|
781 |
79%|ββββββββ | 602/763 [06:35<01:19, 2.02it/s]
|
782 |
79%|ββββββββ | 603/763 [06:35<01:18, 2.03it/s]
|
783 |
79%|ββββββββ | 604/763 [06:36<01:18, 2.02it/s]
|
784 |
79%|ββββββββ | 605/763 [06:36<01:17, 2.03it/s]
|
785 |
79%|ββββββββ | 606/763 [06:37<01:17, 2.03it/s]
|
786 |
80%|ββββββββ | 607/763 [06:37<01:17, 2.03it/s]
|
787 |
80%|ββββββββ | 608/763 [06:38<01:16, 2.02it/s]
|
788 |
80%|ββββββββ | 609/763 [06:38<01:22, 1.86it/s]
|
789 |
80%|ββββββββ | 610/763 [06:39<01:20, 1.91it/s]
|
790 |
80%|ββββββββ | 611/763 [06:39<01:18, 1.94it/s]
|
791 |
80%|ββββββββ | 612/763 [06:40<01:16, 1.97it/s]
|
792 |
80%|ββββββββ | 613/763 [06:40<01:15, 1.98it/s]
|
793 |
80%|ββββββββ | 614/763 [06:41<01:14, 2.00it/s]
|
794 |
81%|ββββββββ | 615/763 [06:41<01:13, 2.00it/s]
|
795 |
81%|ββββββββ | 616/763 [06:42<01:19, 1.85it/s]
|
796 |
81%|ββββββββ | 617/763 [06:42<01:16, 1.90it/s]
|
797 |
81%|ββββββββ | 618/763 [06:43<01:14, 1.94it/s]
|
798 |
81%|ββββββββ | 619/763 [06:43<01:13, 1.96it/s]
|
799 |
81%|βββββββββ | 620/763 [06:44<01:12, 1.98it/s]
|
800 |
81%|βββββββββ | 621/763 [06:44<01:11, 1.99it/s]
|
801 |
82%|βββββββββ | 622/763 [06:45<01:10, 2.00it/s]
|
802 |
82%|βββββββββ | 623/763 [06:45<01:09, 2.01it/s]
|
803 |
82%|βββββββββ | 624/763 [06:46<01:09, 2.01it/s]
|
804 |
82%|βββββββββ | 625/763 [06:46<01:08, 2.01it/s]{'loss': 2.3607, 'grad_norm': 0.3452637791633606, 'learning_rate': 9.657093924581261e-05, 'epoch': 0.82}
|
805 |
|
806 |
-
|
807 |
82%|βββββββββ | 625/763 [06:46<01:08, 2.01it/s]
|
808 |
82%|βββββββββ | 626/763 [06:47<01:07, 2.02it/s]
|
809 |
82%|βββββββββ | 627/763 [06:47<01:07, 2.02it/s]
|
810 |
82%|βββββββββ | 628/763 [06:48<01:06, 2.02it/s]
|
811 |
82%|βββββββββ | 629/763 [06:48<01:06, 2.02it/s]
|
812 |
83%|βββββββββ | 630/763 [06:49<01:05, 2.02it/s]
|
813 |
83%|βββββββββ | 631/763 [06:49<01:05, 2.02it/s]
|
814 |
83%|βββββββββ | 632/763 [06:50<01:04, 2.02it/s]
|
815 |
83%|βββββββββ | 633/763 [06:50<01:04, 2.03it/s]
|
816 |
83%|βββββββββ | 634/763 [06:51<01:03, 2.02it/s]
|
817 |
83%|βββββββββ | 635/763 [06:51<01:03, 2.03it/s]
|
818 |
83%|βββββββββ | 636/763 [06:52<01:02, 2.03it/s]
|
819 |
83%|βββββββββ | 637/763 [06:52<01:02, 2.02it/s]
|
820 |
84%|βββββββββ | 638/763 [06:53<01:01, 2.02it/s]
|
821 |
84%|βββββββββ | 639/763 [06:53<01:01, 2.02it/s]
|
822 |
84%|βββββββββ | 640/763 [06:54<01:00, 2.02it/s]
|
823 |
84%|βββββββββ | 641/763 [06:54<01:00, 2.02it/s]
|
824 |
84%|βββββββββ | 642/763 [06:55<00:59, 2.02it/s]
|
825 |
84%|βββββββββ | 643/763 [06:55<00:59, 2.02it/s]
|
826 |
84%|βββββββββ | 644/763 [06:56<00:58, 2.02it/s]
|
827 |
85%|βββββββββ | 645/763 [06:56<00:58, 2.02it/s]
|
828 |
85%|βββββββββ | 646/763 [06:57<00:57, 2.02it/s]
|
829 |
85%|βββββββββ | 647/763 [06:57<00:57, 2.02it/s]
|
830 |
85%|βββββββββ | 648/763 [06:58<00:56, 2.02it/s]
|
831 |
85%|βββββββββ | 649/763 [06:58<00:56, 2.02it/s]
|
832 |
85%|βββββββββ | 650/763 [06:59<00:55, 2.02it/s]{'loss': 2.3353, 'grad_norm': 0.34281203150749207, 'learning_rate': 6.546885286948184e-05, 'epoch': 0.85}
|
833 |
-
|
834 |
|
835 |
85%|βββββββββ | 650/763 [06:59<00:55, 2.02it/s]
|
836 |
85%|βββββββββ | 651/763 [06:59<00:55, 2.01it/s]
|
837 |
85%|βββββββββ | 652/763 [07:00<00:54, 2.02it/s]
|
838 |
86%|βββββββββ | 653/763 [07:00<00:54, 2.02it/s]
|
839 |
86%|βββββββββ | 654/763 [07:01<00:53, 2.02it/s]
|
840 |
86%|βββββββββ | 655/763 [07:01<00:53, 2.02it/s]
|
841 |
86%|βββββββββ | 656/763 [07:02<00:52, 2.03it/s]
|
842 |
86%|βββββββββ | 657/763 [07:02<00:52, 2.03it/s]
|
843 |
86%|βββββββββ | 658/763 [07:03<00:51, 2.03it/s]
|
844 |
86%|βββββββββ | 659/763 [07:03<00:51, 2.03it/s]
|
845 |
87%|βββββββββ | 660/763 [07:04<00:50, 2.03it/s]
|
846 |
87%|βββββββββ | 661/763 [07:04<00:50, 2.03it/s]
|
847 |
87%|βββββββββ | 662/763 [07:05<00:49, 2.03it/s]
|
848 |
87%|βββββββββ | 663/763 [07:05<00:49, 2.03it/s]
|
849 |
87%|βββββββββ | 664/763 [07:05<00:48, 2.03it/s]
|
850 |
87%|βββββββββ | 665/763 [07:06<00:48, 2.03it/s]
|
851 |
87%|βββββββββ | 666/763 [07:06<00:47, 2.03it/s]
|
852 |
87%|βββββββββ | 667/763 [07:07<00:47, 2.03it/s]
|
853 |
88%|βββββββββ | 668/763 [07:07<00:46, 2.03it/s]
|
854 |
88%|βββββββββ | 669/763 [07:08<00:46, 2.03it/s]
|
855 |
88%|ββββοΏ½οΏ½οΏ½ββββ | 670/763 [07:08<00:45, 2.03it/s]
|
856 |
88%|βββββββββ | 671/763 [07:09<00:45, 2.03it/s]
|
857 |
88%|βββββββββ | 672/763 [07:09<00:44, 2.03it/s]
|
858 |
88%|βββββββββ | 673/763 [07:10<00:44, 2.03it/s]
|
859 |
88%|βββββββββ | 674/763 [07:10<00:43, 2.03it/s]
|
860 |
88%|βββββββββ | 675/763 [07:11<00:43, 2.03it/s]{'loss': 2.3277, 'grad_norm': 0.33725234866142273, 'learning_rate': 4.0056317596204094e-05, 'epoch': 0.88}
|
861 |
-
|
862 |
|
863 |
88%|βββββββββ | 675/763 [07:11<00:43, 2.03it/s]
|
864 |
89%|βββββββββ | 676/763 [07:11<00:42, 2.02it/s]
|
865 |
89%|βββββββββ | 677/763 [07:12<00:42, 2.02it/s]
|
866 |
89%|βββββββββ | 678/763 [07:12<00:41, 2.03it/s]
|
867 |
89%|βββββββββ | 679/763 [07:13<00:41, 2.03it/s]
|
868 |
89%|βββββββββ | 680/763 [07:13<00:40, 2.03it/s]
|
869 |
89%|βββββββββ | 681/763 [07:14<00:40, 2.03it/s]
|
870 |
89%|βββββββββ | 682/763 [07:14<00:39, 2.03it/s]
|
871 |
90%|βββββββββ | 683/763 [07:15<00:39, 2.03it/s]
|
872 |
90%|βββββββββ | 684/763 [07:15<00:38, 2.03it/s]
|
873 |
90%|βββββββββ | 685/763 [07:16<00:38, 2.03it/s]
|
874 |
90%|βββββββββ | 686/763 [07:16<00:37, 2.03it/s]
|
875 |
90%|βββββββββ | 687/763 [07:17<00:37, 2.03it/s]
|
876 |
90%|βββββββββ | 688/763 [07:17<00:36, 2.03it/s]
|
877 |
90%|βββββββββ | 689/763 [07:18<00:36, 2.03it/s]
|
878 |
90%|βββββββββ | 690/763 [07:18<00:36, 2.03it/s]
|
879 |
91%|βββββββββ | 691/763 [07:19<00:35, 2.03it/s]
|
880 |
91%|βββββββββ | 692/763 [07:19<00:35, 2.03it/s]
|
881 |
91%|βββββββββ | 693/763 [07:20<00:34, 2.03it/s]
|
882 |
91%|βββββββββ | 694/763 [07:20<00:34, 2.03it/s]
|
883 |
91%|βββββββββ | 695/763 [07:21<00:33, 2.03it/s]
|
884 |
91%|βββββββββ | 696/763 [07:21<00:33, 2.03it/s]
|
885 |
91%|ββββββββββ| 697/763 [07:22<00:32, 2.03it/s]
|
886 |
91%|ββββββββββ| 698/763 [07:22<00:32, 2.03it/s]
|
887 |
92%|ββββββββββ| 699/763 [07:23<00:31, 2.03it/s]
|
888 |
92%|ββββββββββ| 700/763 [07:23<00:31, 2.03it/s]{'loss': 2.3087, 'grad_norm': 0.33684036135673523, 'learning_rate': 2.0666073481669712e-05, 'epoch': 0.92}
|
889 |
-
|
890 |
|
891 |
92%|ββββββββββ| 700/763 [07:23<00:31, 2.03it/s]
|
892 |
92%|ββββββββββ| 701/763 [07:24<00:30, 2.02it/s]
|
893 |
92%|ββββββββββ| 702/763 [07:24<00:30, 2.02it/s]
|
894 |
92%|ββββββββββ| 703/763 [07:25<00:29, 2.03it/s]
|
895 |
92%|ββββββββββ| 704/763 [07:25<00:29, 2.03it/s]
|
896 |
92%|ββββββββββ| 705/763 [07:26<00:28, 2.03it/s]
|
897 |
93%|ββββββββββ| 706/763 [07:26<00:28, 2.03it/s]
|
898 |
93%|ββββββββββ| 707/763 [07:27<00:27, 2.03it/s]
|
899 |
93%|ββββββββββ| 708/763 [07:27<00:27, 2.03it/s]
|
900 |
93%|ββββββββββ| 709/763 [07:28<00:26, 2.03it/s]
|
901 |
93%|ββββββββββ| 710/763 [07:28<00:26, 2.03it/s]
|
902 |
93%|ββββββββββ| 711/763 [07:29<00:25, 2.03it/s]
|
903 |
93%|ββββββββββ| 712/763 [07:29<00:25, 2.03it/s]
|
904 |
93%|ββββββββββ| 713/763 [07:30<00:24, 2.03it/s]
|
905 |
94%|ββββββββββ| 714/763 [07:30<00:24, 2.02it/s]
|
906 |
94%|ββββββββββ| 715/763 [07:31<00:23, 2.00it/s]
|
907 |
94%|ββββββββββ| 716/763 [07:31<00:23, 1.99it/s]
|
908 |
94%|ββββββββββ| 717/763 [07:32<00:23, 1.98it/s]
|
909 |
94%|ββββββββββ| 718/763 [07:32<00:22, 1.98it/s]
|
910 |
94%|ββββββββββ| 719/763 [07:33<00:22, 1.97it/s]
|
911 |
94%|ββββββββββ| 720/763 [07:33<00:21, 1.98it/s]
|
912 |
94%|ββββββββββ| 721/763 [07:34<00:21, 1.99it/s]
|
913 |
95%|ββββββββββ| 722/763 [07:34<00:20, 2.00it/s]
|
914 |
95%|ββββββββββ| 723/763 [07:35<00:19, 2.01it/s]
|
915 |
95%|ββββββββββ| 724/763 [07:35<00:19, 2.01it/s]
|
916 |
95%|ββββββββββ| 725/763 [07:36<00:18, 2.02it/s]
|
917 |
|
918 |
-
|
919 |
95%|ββββββββββ| 725/763 [07:36<00:18, 2.02it/s]
|
920 |
95%|ββββββββββ| 726/763 [07:36<00:18, 2.02it/s]
|
921 |
95%|ββββββββββ| 727/763 [07:37<00:17, 2.02it/s]
|
922 |
95%|ββββββββββ| 728/763 [07:37<00:17, 2.02it/s]
|
923 |
96%|ββββββββββ| 729/763 [07:38<00:16, 2.02it/s]
|
924 |
96%|ββββββββββ| 730/763 [07:38<00:16, 2.03it/s]
|
925 |
96%|ββββββββββ| 731/763 [07:39<00:15, 2.03it/s]
|
926 |
96%|ββββββββββ| 732/763 [07:39<00:15, 2.03it/s]
|
927 |
96%|ββββββββββ| 733/763 [07:40<00:14, 2.03it/s]
|
928 |
96%|βββββββοΏ½οΏ½ββ| 734/763 [07:40<00:14, 2.03it/s]
|
929 |
96%|ββββββββββ| 735/763 [07:41<00:13, 2.03it/s]
|
930 |
96%|ββββββββββ| 736/763 [07:41<00:13, 2.03it/s]
|
931 |
97%|ββββββββββ| 737/763 [07:42<00:12, 2.03it/s]
|
932 |
97%|ββββββββββ| 738/763 [07:42<00:12, 2.03it/s]
|
933 |
97%|ββββββββββ| 739/763 [07:43<00:11, 2.03it/s]
|
934 |
97%|ββββββββββ| 740/763 [07:43<00:11, 2.03it/s]
|
935 |
97%|ββββββββββ| 741/763 [07:44<00:10, 2.03it/s]
|
936 |
97%|ββββββββββ| 742/763 [07:44<00:10, 2.03it/s]
|
937 |
97%|ββββββββββ| 743/763 [07:45<00:09, 2.02it/s]
|
938 |
98%|ββββββββββ| 744/763 [07:45<00:09, 2.00it/s]
|
939 |
98%|ββββββββββ| 745/763 [07:46<00:09, 1.98it/s]
|
940 |
98%|ββββββββββ| 746/763 [07:46<00:08, 1.98it/s]
|
941 |
98%|ββββββββββ| 747/763 [07:47<00:08, 1.98it/s]
|
942 |
98%|ββββββββββ| 748/763 [07:47<00:07, 1.98it/s]
|
943 |
98%|ββββββββββ| 749/763 [07:48<00:07, 1.99it/s]
|
944 |
98%|ββββββββββ| 750/763 [07:48<00:06, 2.00it/s]{'loss': 2.2993, 'grad_norm': 0.3305748701095581, 'learning_rate': 8.858291115876327e-07, 'epoch': 0.98}
|
945 |
-
|
946 |
|
947 |
98%|ββββββββββ| 750/763 [07:48<00:06, 2.00it/s]
|
948 |
98%|ββββββββββ| 751/763 [07:49<00:05, 2.00it/s]
|
949 |
99%|ββββββββββ| 752/763 [07:49<00:05, 2.01it/s]
|
950 |
99%|ββββββββββ| 753/763 [07:50<00:04, 2.01it/s]
|
951 |
99%|ββββββββββ| 754/763 [07:50<00:04, 2.02it/s]
|
952 |
99%|ββββββββββ| 755/763 [07:51<00:03, 2.02it/s]
|
953 |
99%|ββββββββββ| 756/763 [07:51<00:03, 2.02it/s]
|
954 |
99%|ββββββββββ| 757/763 [07:52<00:02, 2.02it/s]
|
955 |
99%|ββββββββββ| 758/763 [07:52<00:02, 2.02it/s]
|
956 |
99%|ββββββββββ| 759/763 [07:53<00:01, 2.02it/s]
|
957 |
|
958 |
-
|
959 |
1%| | 1/143 [00:05<12:46, 5.40s/it]
|
960 |
1%|β | 2/143 [00:06<06:44, 2.87s/it]
|
961 |
2%|β | 3/143 [00:07<04:20, 1.86s/it]
|
962 |
3%|β | 4/143 [00:07<03:05, 1.34s/it]
|
963 |
3%|β | 5/143 [00:08<02:24, 1.05s/it]
|
964 |
4%|β | 6/143 [00:08<01:58, 1.16it/s]
|
965 |
5%|β | 7/143 [00:09<01:41, 1.35it/s]
|
966 |
6%|β | 8/143 [00:09<01:29, 1.50it/s]
|
967 |
6%|β | 9/143 [00:10<01:22, 1.63it/s]
|
968 |
7%|β | 10/143 [00:10<01:16, 1.73it/s]
|
969 |
8%|β | 11/143 [00:11<01:12, 1.81it/s]
|
970 |
8%|β | 12/143 [00:11<01:10, 1.87it/s]
|
971 |
9%|β | 13/143 [00:12<01:08, 1.91it/s]
|
972 |
10%|β | 14/143 [00:12<01:06, 1.94it/s]
|
973 |
10%|β | 15/143 [00:13<01:05, 1.96it/s]
|
974 |
11%|β | 16/143 [00:13<01:04, 1.98it/s]
|
975 |
12%|ββ | 17/143 [00:14<01:03, 1.99it/s]
|
976 |
13%|ββ | 18/143 [00:14<01:02, 2.00it/s]
|
977 |
13%|ββ | 19/143 [00:15<01:01, 2.00it/s]
|
978 |
14%|ββ | 20/143 [00:15<01:01, 2.01it/s]
|
979 |
15%|ββ | 21/143 [00:16<01:00, 2.01it/s]
|
980 |
15%|ββ | 22/143 [00:16<01:00, 2.01it/s]
|
981 |
16%|ββ | 23/143 [00:17<01:01, 1.96it/s]
|
982 |
17%|ββ | 24/143 [00:17<01:00, 1.98it/s]
|
983 |
17%|ββ | 25/143 [00:18<00:59, 1.99it/s]
|
984 |
|
|
|
985 |
17%|ββ | 25/143 [00:18<00:59, 1.99it/s]
|
986 |
18%|ββ | 26/143 [00:18<00:58, 1.99it/s]
|
987 |
19%|ββ | 27/143 [00:19<00:57, 2.00it/s]
|
988 |
20%|ββ | 28/143 [00:19<00:57, 2.01it/s]
|
989 |
20%|ββ | 29/143 [00:20<00:56, 2.01it/s]
|
990 |
21%|ββ | 30/143 [00:20<00:56, 2.02it/s]
|
991 |
22%|βββ | 31/143 [00:21<00:55, 2.02it/s]
|
992 |
22%|βββ | 32/143 [00:21<00:54, 2.02it/s]
|
993 |
23%|βββ | 33/143 [00:22<00:54, 2.02it/s]
|
994 |
24%|βββ | 34/143 [00:22<00:53, 2.02it/s]
|
995 |
24%|βββ | 35/143 [00:23<00:53, 2.02it/s]
|
996 |
25%|βββ | 36/143 [00:23<00:52, 2.02it/s]
|
997 |
26%|βββ | 37/143 [00:24<00:52, 2.03it/s]
|
998 |
27%|βββ | 38/143 [00:24<00:51, 2.03it/s]
|
999 |
27%|βββ | 39/143 [00:25<00:51, 2.02it/s]
|
1000 |
28%|βββ | 40/143 [00:25<00:50, 2.02it/s]
|
1001 |
29%|βββ | 41/143 [00:26<00:50, 2.01it/s]
|
1002 |
29%|βββ | 42/143 [00:26<00:50, 2.02it/s]
|
1003 |
30%|βββ | 43/143 [00:27<00:49, 2.01it/s]
|
1004 |
31%|βοΏ½οΏ½β | 44/143 [00:27<00:49, 2.02it/s]
|
1005 |
31%|ββββ | 45/143 [00:28<00:48, 2.02it/s]
|
1006 |
32%|ββββ | 46/143 [00:28<00:48, 2.01it/s]
|
1007 |
33%|ββββ | 47/143 [00:29<00:47, 2.00it/s]
|
1008 |
34%|ββββ | 48/143 [00:29<00:47, 2.00it/s]
|
1009 |
34%|ββββ | 49/143 [00:30<00:46, 2.01it/s]
|
1010 |
35%|ββββ | 50/143 [00:30<00:46, 2.01it/s]
|
1011 |
|
|
|
1012 |
35%|ββββ | 50/143 [00:30<00:46, 2.01it/s]
|
1013 |
36%|ββββ | 51/143 [00:31<00:45, 2.01it/s]
|
1014 |
36%|ββββ | 52/143 [00:31<00:45, 2.01it/s]
|
1015 |
37%|ββββ | 53/143 [00:32<00:44, 2.01it/s]
|
1016 |
38%|ββββ | 54/143 [00:32<00:44, 2.01it/s]
|
1017 |
38%|ββββ | 55/143 [00:33<00:43, 2.01it/s]
|
1018 |
39%|ββββ | 56/143 [00:33<00:43, 2.02it/s]
|
1019 |
40%|ββββ | 57/143 [00:34<00:42, 2.02it/s]
|
1020 |
41%|ββββ | 58/143 [00:34<00:42, 2.02it/s]
|
1021 |
41%|βββββ | 59/143 [00:35<00:41, 2.02it/s]
|
1022 |
42%|βββββ | 60/143 [00:35<00:41, 2.02it/s]
|
1023 |
43%|βββββ | 61/143 [00:36<00:40, 2.02it/s]
|
1024 |
43%|βββββ | 62/143 [00:36<00:40, 2.02it/s]
|
1025 |
44%|βββββ | 63/143 [00:37<00:39, 2.02it/s]
|
1026 |
45%|βββββ | 64/143 [00:37<00:39, 2.02it/s]
|
1027 |
45%|βββββ | 65/143 [00:38<00:38, 2.02it/s]
|
1028 |
46%|βββββ | 66/143 [00:38<00:38, 2.02it/s]
|
1029 |
47%|βββββ | 67/143 [00:39<00:37, 2.02it/s]
|
1030 |
48%|βββββ | 68/143 [00:39<00:37, 2.01it/s]
|
1031 |
48%|βββββ | 69/143 [00:40<00:36, 2.02it/s]
|
1032 |
49%|βββββ | 70/143 [00:40<00:36, 2.02it/s]
|
1033 |
50%|βββββ | 71/143 [00:41<00:35, 2.02it/s]
|
1034 |
50%|βββββ | 72/143 [00:41<00:35, 2.02it/s]
|
1035 |
51%|βββββ | 73/143 [00:41<00:34, 2.02it/s]
|
1036 |
52%|ββββββ | 74/143 [00:42<00:34, 2.02it/s]
|
1037 |
52%|ββββββ | 75/143 [00:42<00:33, 2.02it/s]
|
1038 |
|
|
|
1039 |
52%|ββββββ | 75/143 [00:42<00:33, 2.02it/s]
|
1040 |
53%|ββββββ | 76/143 [00:43<00:33, 2.02it/s]
|
1041 |
54%|ββββββ | 77/143 [00:43<00:32, 2.02it/s]
|
1042 |
55%|ββββββ | 78/143 [00:44<00:32, 2.02it/s]
|
1043 |
55%|ββββββ | 79/143 [00:44<00:31, 2.02it/s]
|
1044 |
56%|ββββββ | 80/143 [00:45<00:31, 2.02it/s]
|
1045 |
57%|ββββββ | 81/143 [00:45<00:30, 2.02it/s]
|
1046 |
57%|ββββββ | 82/143 [00:46<00:30, 2.02it/s]
|
1047 |
58%|ββββββ | 83/143 [00:46<00:29, 2.02it/s]
|
1048 |
59%|ββββββ | 84/143 [00:47<00:29, 2.02it/s]
|
1049 |
59%|ββββββ | 85/143 [00:47<00:28, 2.02it/s]
|
1050 |
60%|ββββββ | 86/143 [00:48<00:28, 2.02it/s]
|
1051 |
61%|ββββββ | 87/143 [00:48<00:27, 2.02it/s]
|
1052 |
62%|βββββββ | 88/143 [00:49<00:27, 2.02it/s]
|
1053 |
62%|βββββββ | 89/143 [00:49<00:26, 2.02it/s]
|
1054 |
63%|βββββββ | 90/143 [00:50<00:26, 2.03it/s]
|
1055 |
64%|βββββββ | 91/143 [00:50<00:25, 2.02it/s]
|
1056 |
64%|βββββββ | 92/143 [00:51<00:25, 2.03it/s]
|
1057 |
65%|βββββββ | 93/143 [00:51<00:24, 2.02it/s]
|
1058 |
66%|βββββββ | 94/143 [00:52<00:24, 2.02it/s]
|
1059 |
66%|βββββββ | 95/143 [00:52<00:23, 2.02it/s]
|
1060 |
67%|βββββββ | 96/143 [00:53<00:23, 2.02it/s]
|
1061 |
68%|βββββββ | 97/143 [00:53<00:22, 2.02it/s]
|
1062 |
69%|βββββββ | 98/143 [00:54<00:22, 2.02it/s]
|
1063 |
69%|βββββββ | 99/143 [00:54<00:21, 2.02it/s]
|
1064 |
70%|βββββββ | 100/143 [00:55<00:21, 2.02it/s]
|
1065 |
|
|
|
1066 |
70%|βββββββ | 100/143 [00:55<00:21, 2.02it/s]
|
1067 |
71%|βββββββ | 101/143 [00:55<00:21, 2.00it/s]
|
1068 |
71%|ββββββββ | 102/143 [00:56<00:20, 2.00it/s]
|
1069 |
72%|ββββββββ | 103/143 [00:56<00:20, 2.00it/s]
|
1070 |
73%|ββββββββ | 104/143 [00:57<00:19, 2.00it/s]
|
1071 |
73%|ββββββββ | 105/143 [00:57<00:18, 2.00it/s]
|
1072 |
74%|ββββββββ | 106/143 [00:58<00:18, 2.00it/s]
|
1073 |
75%|ββββββββ | 107/143 [00:58<00:17, 2.01it/s]
|
1074 |
76%|ββββββββ | 108/143 [00:59<00:17, 2.01it/s]
|
1075 |
76%|ββββββββ | 109/143 [00:59<00:16, 2.01it/s]
|
1076 |
77%|ββββββββ | 110/143 [01:00<00:16, 2.01it/s]
|
1077 |
78%|ββββββββ | 111/143 [01:00<00:15, 2.00it/s]
|
1078 |
78%|ββββββββ | 112/143 [01:01<00:15, 2.01it/s]
|
1079 |
79%|ββββββββ | 113/143 [01:01<00:14, 2.01it/s]
|
1080 |
80%|ββββββββ | 114/143 [01:02<00:14, 2.00it/s]
|
1081 |
80%|ββββββββ | 115/143 [01:02<00:13, 2.01it/s]
|
1082 |
81%|ββββββββ | 116/143 [01:03<00:13, 2.01it/s]
|
1083 |
82%|ββοΏ½οΏ½ββββββ | 117/143 [01:03<00:12, 2.01it/s]
|
1084 |
83%|βββββββββ | 118/143 [01:04<00:12, 2.01it/s]
|
1085 |
83%|βββββββββ | 119/143 [01:04<00:11, 2.01it/s]
|
1086 |
84%|βββββββββ | 120/143 [01:05<00:11, 2.01it/s]
|
1087 |
85%|βββββββββ | 121/143 [01:05<00:10, 2.01it/s]
|
1088 |
85%|βββββββββ | 122/143 [01:06<00:10, 2.01it/s]
|
1089 |
86%|βββββββββ | 123/143 [01:06<00:09, 2.01it/s]
|
1090 |
87%|βββββββββ | 124/143 [01:07<00:09, 2.01it/s]
|
1091 |
87%|βββββββββ | 125/143 [01:07<00:08, 2.01it/s]
|
1092 |
|
|
|
1093 |
87%|βββββββββ | 125/143 [01:07<00:08, 2.01it/s]
|
1094 |
88%|βββββββββ | 126/143 [01:08<00:08, 2.01it/s]
|
1095 |
89%|βββββββββ | 127/143 [01:08<00:07, 2.00it/s]
|
1096 |
90%|βββββββββ | 128/143 [01:09<00:07, 2.01it/s]
|
1097 |
90%|βββββββββ | 129/143 [01:09<00:06, 2.01it/s]
|
1098 |
91%|βββββββββ | 130/143 [01:10<00:06, 2.01it/s]
|
1099 |
92%|ββββββββββ| 131/143 [01:10<00:05, 2.01it/s]
|
1100 |
92%|ββββββββββ| 132/143 [01:11<00:05, 2.01it/s]
|
1101 |
93%|ββββββββββ| 133/143 [01:11<00:04, 2.01it/s]
|
1102 |
94%|ββββββββββ| 134/143 [01:12<00:04, 2.01it/s]
|
1103 |
94%|ββββββββββ| 135/143 [01:12<00:03, 2.01it/s]
|
1104 |
95%|ββββββββββ| 136/143 [01:13<00:03, 2.01it/s]
|
1105 |
96%|ββββββββββ| 137/143 [01:13<00:02, 2.01it/s]
|
1106 |
97%|ββββββββββ| 138/143 [01:14<00:02, 2.01it/s]
|
1107 |
97%|ββββββββββ| 139/143 [01:14<00:01, 2.01it/s]
|
1108 |
98%|ββββββββββ| 140/143 [01:15<00:01, 2.01it/s]
|
1109 |
99%|ββββββββββ| 141/143 [01:15<00:00, 2.01it/s]
|
1110 |
99%|ββββββββββ| 142/143 [01:16<00:00, 2.01it/s]
|
1111 |
|
|
|
1112 |
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
|
|
1 |
+
slurm submission log: 2024-05-10 17:55:25.295279
|
2 |
created following sbatch script:
|
3 |
|
4 |
###############################
|
|
|
7 |
|
8 |
#SBATCH --account=nlp
|
9 |
#SBATCH --cpus-per-task=16
|
10 |
+
#SBATCH --dependency=afterok:7594444
|
11 |
#SBATCH --gres=gpu:2
|
12 |
+
#SBATCH --job-name=tthrush-job-4654014
|
13 |
#SBATCH --mem=400G
|
14 |
+
#SBATCH --nodelist=sphinx2
|
15 |
#SBATCH --open-mode=append
|
16 |
+
#SBATCH --output=/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_4/pythia-70m_sciq/train_job_output.txt
|
17 |
#SBATCH --partition=sphinx
|
18 |
#SBATCH --time=14-0
|
19 |
|
20 |
# activate your desired anaconda environment
|
21 |
+
. /nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/etc/profile.d/conda.sh ; conda activate pretraining-coreset-selection
|
22 |
|
23 |
# cd to working directory
|
24 |
cd .
|
25 |
|
26 |
# launch commands
|
27 |
+
srun --unbuffered run_as_child_processes 'torchrun --master_port 29502 --nproc_per_node=2 train_llm.py --dataset_id /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/train_data_4/sciq --output_dir /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_4/pythia-70m_sciq --output_hub_id pythia-70m_sciq --model_id EleutherAI/pythia-70m --num_train_epochs 1 --learning_rate 1e-3 --warmup_ratio=0.1 --gradient_accumulation_steps 2'
|
28 |
|
29 |
###############################
|
30 |
|
|
|
34 |
###############################
|
35 |
slurm submission output
|
36 |
|
37 |
+
Submitted batch job 7594445
|
38 |
|
39 |
|
40 |
|
41 |
###############################
|
42 |
|
43 |
+
/var/lib/slurm/slurmd/job7594445/slurm_script: line 16: /nlp/scr/tthrush/miniconda3/envs/pretraining-coreset-selection/etc/profile.d/conda.sh: No such file or directory
|
|
|
44 |
|
45 |
+
CommandNotFoundError: Your shell has not been properly configured to use 'conda activate'.
|
46 |
+
To initialize your shell, run
|
47 |
|
48 |
+
$ conda init <SHELL_NAME>
|
49 |
|
50 |
+
Currently supported shells are:
|
51 |
+
- bash
|
52 |
+
- fish
|
53 |
+
- tcsh
|
54 |
+
- xonsh
|
55 |
+
- zsh
|
56 |
+
- powershell
|
|
|
|
|
|
|
57 |
|
58 |
+
See 'conda init --help' for more information and options.
|
|
|
59 |
|
60 |
+
IMPORTANT: You may need to close and restart your shell after running 'conda init'.
|
|
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
###############################
|
64 |
+
start time: 2024-05-10 18:38:19.114921
|
65 |
machine: sphinx2
|
66 |
conda env: pretraining-coreset-selection
|
67 |
###############################
|
68 |
running following processes
|
69 |
|
70 |
+
torchrun --master_port 29502 --nproc_per_node=2 train_llm.py --dataset_id /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/train_data_4/sciq --output_dir /juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_4/pythia-70m_sciq --output_hub_id pythia-70m_sciq --model_id EleutherAI/pythia-70m --num_train_epochs 1 --learning_rate 1e-3 --warmup_ratio=0.1 --gradient_accumulation_steps 2
|
71 |
|
72 |
|
73 |
###############################
|
74 |
command outputs:
|
75 |
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
0%| | 0/763 [00:00<?, ?it/s][rank0]:[W reducer.cpp:1360] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
78 |
+
[2024-05-10 18:38:20,807] torch.distributed.run: [WARNING]
|
79 |
+
[2024-05-10 18:38:20,807] torch.distributed.run: [WARNING] *****************************************
|
80 |
+
[2024-05-10 18:38:20,807] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
81 |
+
[2024-05-10 18:38:20,807] torch.distributed.run: [WARNING] *****************************************
|
82 |
+
05/10/2024 18:38:25 - INFO - __main__ - Script parameters ScriptArguments(dataset_id='/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/train_data_4/sciq', output_dir='/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_4/pythia-70m_sciq', output_hub_id='pythia-70m_sciq', hf_hub_token=True, model_id='EleutherAI/pythia-70m', per_device_train_batch_size=256, num_train_epochs=1, learning_rate=0.001, gradient_accumulation_steps=2, from_scratch=True, warmup_ratio=0.1, adam_beta1=0.9, adam_beta2=0.95, adam_epsilon=1e-08, weight_decay=0.01, lr_scheduler_type='cosine', local_rank=0, resume_from_checkpoint=False, deepspeed=None, peft=False)
|
83 |
+
05/10/2024 18:38:25 - INFO - __main__ - Script parameters ScriptArguments(dataset_id='/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/train_data_4/sciq', output_dir='/juice5/scr5/tthrush/pretraining-coreset-selection/llm_pretraining/llms_4/pythia-70m_sciq', output_hub_id='pythia-70m_sciq', hf_hub_token=True, model_id='EleutherAI/pythia-70m', per_device_train_batch_size=256, num_train_epochs=1, learning_rate=0.001, gradient_accumulation_steps=2, from_scratch=True, warmup_ratio=0.1, adam_beta1=0.9, adam_beta2=0.95, adam_epsilon=1e-08, weight_decay=0.01, lr_scheduler_type='cosine', local_rank=0, resume_from_checkpoint=False, deepspeed=None, peft=False)
|
84 |
+
|
85 |
0%| | 0/143 [00:00<?, ?it/s][rank0]:[W reducer.cpp:1360] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
86 |
[rank1]:[W reducer.cpp:1360] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration, which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
|
|
|
87 |
0%| | 1/763 [00:45<9:35:14, 45.29s/it]
|
88 |
0%| | 2/763 [00:58<5:37:14, 26.59s/it]
|
89 |
0%| | 3/763 [01:09<4:02:06, 19.11s/it]
|
90 |
1%| | 4/763 [01:15<3:00:21, 14.26s/it]
|
91 |
1%| | 5/763 [01:20<2:18:15, 10.94s/it]
|
92 |
1%| | 6/763 [01:25<1:50:17, 8.74s/it]
|
93 |
1%| | 7/763 [01:28<1:27:09, 6.92s/it]
|
94 |
1%| | 8/763 [01:31<1:10:29, 5.60s/it]
|
95 |
1%| | 9/763 [01:33<56:45, 4.52s/it]
|
96 |
1%|β | 10/763 [01:35<45:43, 3.64s/it]
|
97 |
1%|β | 11/763 [01:36<37:31, 2.99s/it]
|
98 |
2%|β | 12/763 [01:38<31:52, 2.55s/it]
|
99 |
2%|β | 13/763 [01:39<27:34, 2.21s/it]
|
100 |
2%|β | 14/763 [01:40<23:46, 1.90s/it]
|
101 |
2%|β | 15/763 [01:41<20:46, 1.67s/it]
|
102 |
2%|β | 16/763 [01:42<17:33, 1.41s/it]
|
103 |
2%|β | 17/763 [01:43<14:55, 1.20s/it]
|
104 |
2%|β | 18/763 [01:44<13:45, 1.11s/it]
|
105 |
2%|β | 19/763 [01:45<12:10, 1.02it/s]
|
106 |
3%|β | 20/763 [01:45<11:06, 1.12it/s]
|
107 |
3%|β | 21/763 [01:46<10:04, 1.23it/s]
|
108 |
3%|β | 22/763 [01:46<09:17, 1.33it/s]
|
109 |
3%|β | 23/763 [01:47<08:41, 1.42it/s]
|
110 |
3%|β | 24/763 [01:48<08:17, 1.49it/s]
|
111 |
3%|β | 25/763 [01:48<08:02, 1.53it/s]
|
112 |
{'loss': 9.8343, 'grad_norm': 0.9369164705276489, 'learning_rate': 0.0003246753246753247, 'epoch': 0.03}
|
|
|
113 |
3%|β | 25/763 [01:48<08:02, 1.53it/s]
|
114 |
3%|β | 26/763 [01:49<08:00, 1.53it/s]
|
115 |
4%|β | 27/763 [01:49<07:33, 1.62it/s]
|
116 |
4%|β | 28/763 [01:50<07:18, 1.68it/s]
|
117 |
4%|β | 29/763 [01:51<07:21, 1.66it/s]
|
118 |
4%|β | 30/763 [01:51<07:10, 1.70it/s]
|
119 |
4%|β | 31/763 [01:52<06:59, 1.75it/s]
|
120 |
4%|β | 32/763 [01:52<06:53, 1.77it/s]
|
121 |
4%|β | 33/763 [01:53<06:42, 1.81it/s]
|
122 |
4%|β | 34/763 [01:53<06:35, 1.84it/s]
|
123 |
5%|β | 35/763 [01:54<06:28, 1.87it/s]
|
124 |
5%|β | 36/763 [01:54<06:28, 1.87it/s]
|
125 |
5%|β | 37/763 [01:55<06:24, 1.89it/s]
|
126 |
5%|β | 38/763 [01:55<06:21, 1.90it/s]
|
127 |
5%|β | 39/763 [01:56<06:18, 1.92it/s]
|
128 |
5%|β | 40/763 [01:56<06:14, 1.93it/s]
|
129 |
5%|β | 41/763 [01:57<06:11, 1.94it/s]
|
130 |
6%|β | 42/763 [01:57<06:10, 1.95it/s]
|
131 |
6%|β | 43/763 [01:58<06:09, 1.95it/s]
|
132 |
6%|β | 44/763 [01:58<06:05, 1.97it/s]
|
133 |
6%|β | 45/763 [01:59<06:07, 1.95it/s]
|
134 |
6%|β | 46/763 [01:59<06:06, 1.96it/s]
|
135 |
6%|β | 47/763 [02:00<06:03, 1.97it/s]
|
136 |
6%|β | 48/763 [02:00<06:01, 1.98it/s]
|
137 |
6%|β | 49/763 [02:01<06:00, 1.98it/s]
|
138 |
7%|β | 50/763 [02:01<05:58, 1.99it/s]
|
139 |
{'loss': 7.3748, 'grad_norm': 0.3545825183391571, 'learning_rate': 0.0006493506493506494, 'epoch': 0.07}
|
|
|
140 |
7%|β | 50/763 [02:01<05:58, 1.99it/s]
|
141 |
7%|β | 51/763 [02:02<05:58, 1.99it/s]
|
142 |
7%|β | 52/763 [02:02<05:57, 1.99it/s]
|
143 |
7%|β | 53/763 [02:03<05:57, 1.99it/s]
|
144 |
7%|β | 54/763 [02:03<05:55, 1.99it/s]
|
145 |
7%|β | 55/763 [02:04<05:54, 2.00it/s]
|
146 |
7%|β | 56/763 [02:04<05:54, 2.00it/s]
|
147 |
7%|β | 57/763 [02:05<05:53, 2.00it/s]
|
148 |
8%|β | 58/763 [02:05<05:53, 1.99it/s]
|
149 |
8%|β | 59/763 [02:06<05:53, 1.99it/s]
|
150 |
8%|β | 60/763 [02:06<05:52, 2.00it/s]
|
151 |
8%|β | 61/763 [02:07<05:52, 1.99it/s]
|
152 |
8%|β | 62/763 [02:07<05:51, 2.00it/s]
|
153 |
8%|β | 63/763 [02:08<05:50, 2.00it/s]
|
154 |
8%|β | 64/763 [02:08<05:48, 2.01it/s]
|
155 |
9%|β | 65/763 [02:09<05:47, 2.01it/s]
|
156 |
9%|β | 66/763 [02:09<05:57, 1.95it/s]
|
157 |
9%|β | 67/763 [02:10<05:53, 1.97it/s]
|
158 |
9%|β | 68/763 [02:10<05:51, 1.98it/s]
|
159 |
9%|β | 69/763 [02:11<05:49, 1.99it/s]
|
160 |
9%|β | 70/763 [02:11<05:47, 1.99it/s]
|
161 |
9%|β | 71/763 [02:12<05:46, 2.00it/s]
|
162 |
9%|β | 72/763 [02:12<05:45, 2.00it/s]
|
163 |
10%|β | 73/763 [02:13<05:43, 2.01it/s]
|
164 |
10%|β | 74/763 [02:13<05:42, 2.01it/s]
|
165 |
10%|β | 75/763 [02:14<05:42, 2.01it/s]
|
166 |
|
|
|
167 |
10%|β | 75/763 [02:14<05:42, 2.01it/s]
|
168 |
10%|β | 76/763 [02:14<05:41, 2.01it/s]
|
169 |
10%|β | 77/763 [02:15<05:41, 2.01it/s]
|
170 |
10%|β | 78/763 [02:15<05:40, 2.01it/s]
|
171 |
10%|β | 79/763 [02:16<05:39, 2.01it/s]
|
172 |
10%|β | 80/763 [02:16<05:39, 2.01it/s]
|
173 |
11%|β | 81/763 [02:17<05:38, 2.01it/s]
|
174 |
11%|β | 82/763 [02:17<05:38, 2.01it/s]
|
175 |
11%|β | 83/763 [02:18<05:37, 2.01it/s]
|
176 |
11%|β | 84/763 [02:18<05:37, 2.01it/s]
|
177 |
11%|β | 85/763 [02:19<05:37, 2.01it/s]
|
178 |
11%|ββ | 86/763 [02:19<05:36, 2.01it/s]
|
179 |
11%|ββ | 87/763 [02:20<05:34, 2.02it/s]
|
180 |
12%|ββ | 88/763 [02:20<05:34, 2.02it/s]
|
181 |
12%|ββ | 89/763 [02:21<05:34, 2.02it/s]
|
182 |
12%|ββ | 90/763 [02:21<05:33, 2.02it/s]
|
183 |
12%|ββ | 91/763 [02:22<05:32, 2.02it/s]
|
184 |
12%|ββ | 92/763 [02:22<05:31, 2.02it/s]
|
185 |
12%|ββ | 93/763 [02:23<05:32, 2.02it/s]
|
186 |
12%|ββ | 94/763 [02:23<05:31, 2.02it/s]
|
187 |
12%|ββ | 95/763 [02:24<05:30, 2.02it/s]
|
188 |
13%|ββ | 96/763 [02:24<05:30, 2.02it/s]
|
189 |
13%|ββ | 97/763 [02:25<05:30, 2.01it/s]
|
190 |
13%|ββ | 98/763 [02:25<05:29, 2.02it/s]
|
191 |
13%|ββ | 99/763 [02:26<05:29, 2.02it/s]
|
192 |
13%|ββ | 100/763 [02:26<05:32, 2.00it/s]{'loss': 5.0788, 'grad_norm': 0.4961775541305542, 'learning_rate': 0.0009972289418801728, 'epoch': 0.13}
|
|
|
193 |
|
194 |
13%|ββ | 100/763 [02:26<05:32, 2.00it/s]
|
195 |
13%|ββ | 101/763 [02:27<05:31, 2.00it/s]
|
196 |
13%|ββ | 102/763 [02:27<05:30, 2.00it/s]
|
197 |
13%|ββ | 103/763 [02:28<05:28, 2.01it/s]
|
198 |
14%|ββ | 104/763 [02:28<05:27, 2.01it/s]
|
199 |
14%|ββ | 105/763 [02:29<05:27, 2.01it/s]
|
200 |
14%|ββ | 106/763 [02:29<05:26, 2.01it/s]
|
201 |
14%|ββ | 107/763 [02:30<05:25, 2.01it/s]
|
202 |
14%|ββ | 108/763 [02:30<05:24, 2.02it/s]
|
203 |
14%|ββ | 109/763 [02:31<05:23, 2.02it/s]
|
204 |
14%|ββ | 110/763 [02:31<05:23, 2.02it/s]
|
205 |
15%|ββ | 111/763 [02:32<05:23, 2.02it/s]
|
206 |
15%|ββ | 112/763 [02:32<05:22, 2.02it/s]
|
207 |
15%|ββ | 113/763 [02:33<05:21, 2.02it/s]
|
208 |
15%|ββ | 114/763 [02:33<05:21, 2.02it/s]
|
209 |
15%|ββ | 115/763 [02:34<05:20, 2.02it/s]
|
210 |
15%|ββ | 116/763 [02:34<05:19, 2.02it/s]
|
211 |
15%|ββ | 117/763 [02:35<05:19, 2.02it/s]
|
212 |
15%|ββ | 118/763 [02:35<05:18, 2.02it/s]
|
213 |
16%|ββ | 119/763 [02:36<05:18, 2.02it/s]
|
214 |
16%|ββ | 120/763 [02:36<05:17, 2.02it/s]
|
215 |
16%|ββ | 121/763 [02:37<05:17, 2.02it/s]
|
216 |
16%|ββ | 122/763 [02:37<05:17, 2.02it/s]
|
217 |
16%|ββ | 123/763 [02:38<05:16, 2.02it/s]
|
218 |
16%|ββ | 124/763 [02:38<05:15, 2.02it/s]
|
219 |
16%|ββ | 125/763 [02:39<05:16, 2.02it/s]
|
220 |
{'loss': 4.6084, 'grad_norm': 0.2889716327190399, 'learning_rate': 0.0009879683689693263, 'epoch': 0.16}
|
|
|
221 |
16%|ββ | 125/763 [02:39<05:16, 2.02it/s]
|
222 |
17%|ββ | 126/763 [02:39<05:15, 2.02it/s]
|
223 |
17%|ββ | 127/763 [02:40<05:15, 2.01it/s]
|
224 |
17%|ββ | 128/763 [02:40<05:15, 2.02it/s]
|
225 |
17%|ββ | 129/763 [02:41<05:14, 2.02it/s]
|
226 |
17%|ββ | 130/763 [02:41<05:13, 2.02it/s]
|
227 |
17%|ββ | 131/763 [02:42<05:12, 2.02it/s]
|
228 |
17%|ββ | 132/763 [02:42<05:12, 2.02it/s]
|
229 |
17%|ββ | 133/763 [02:43<05:12, 2.02it/s]
|
230 |
18%|ββ | 134/763 [02:43<05:11, 2.02it/s]
|
231 |
18%|ββ | 135/763 [02:44<05:11, 2.02it/s]
|
232 |
18%|ββ | 136/763 [02:44<05:10, 2.02it/s]
|
233 |
18%|ββ | 137/763 [02:45<05:09, 2.02it/s]
|
234 |
18%|ββ | 138/763 [02:45<05:09, 2.02it/s]
|
235 |
18%|ββ | 139/763 [02:46<05:08, 2.02it/s]
|
236 |
18%|ββ | 140/763 [02:46<05:08, 2.02it/s]
|
237 |
18%|ββ | 141/763 [02:47<05:07, 2.02it/s]
|
238 |
19%|ββ | 142/763 [02:47<05:07, 2.02it/s]
|
239 |
19%|ββ | 143/763 [02:48<05:06, 2.02it/s]
|
240 |
19%|ββ | 144/763 [02:48<05:05, 2.02it/s]
|
241 |
19%|ββ | 145/763 [02:49<05:05, 2.02it/s]
|
242 |
19%|ββ | 146/763 [02:49<05:04, 2.02it/s]
|
243 |
19%|ββ | 147/763 [02:50<05:04, 2.02it/s]
|
244 |
19%|ββ | 148/763 [02:50<05:04, 2.02it/s]
|
245 |
20%|ββ | 149/763 [02:51<05:03, 2.02it/s]
|
246 |
20%|ββ | 150/763 [02:51<05:03, 2.02it/s]{'loss': 4.2413, 'grad_norm': 0.44144755601882935, 'learning_rate': 0.0009723185625357323, 'epoch': 0.2}
|
|
|
247 |
|
248 |
20%|ββ | 150/763 [02:51<05:03, 2.02it/s]
|
249 |
20%|ββ | 151/763 [02:52<05:03, 2.02it/s]
|
250 |
20%|ββ | 152/763 [02:52<05:02, 2.02it/s]
|
251 |
20%|ββ | 153/763 [02:53<05:01, 2.02it/s]
|
252 |
20%|ββ | 154/763 [02:53<05:01, 2.02it/s]
|
253 |
20%|ββ | 155/763 [02:54<05:00, 2.02it/s]
|
254 |
20%|ββ | 156/763 [02:54<05:00, 2.02it/s]
|
255 |
21%|ββ | 157/763 [02:55<04:59, 2.02it/s]
|
256 |
21%|ββ | 158/763 [02:55<04:59, 2.02it/s]
|
257 |
21%|ββ | 159/763 [02:56<04:58, 2.02it/s]
|
258 |
21%|ββ | 160/763 [02:56<04:58, 2.02it/s]
|
259 |
21%|ββ | 161/763 [02:57<04:57, 2.02it/s]
|
260 |
21%|ββ | 162/763 [02:57<04:57, 2.02it/s]
|
261 |
21%|βββ | 163/763 [02:58<04:57, 2.02it/s]
|
262 |
21%|βββ | 164/763 [02:58<04:56, 2.02it/s]
|
263 |
22%|βββ | 165/763 [02:59<04:55, 2.02it/s]
|
264 |
22%|βββ | 166/763 [02:59<04:55, 2.02it/s]
|
265 |
22%|βββ | 167/763 [03:00<04:54, 2.02it/s]
|
266 |
22%|βββ | 168/763 [03:00<04:54, 2.02it/s]
|
267 |
22%|βββ | 169/763 [03:01<04:53, 2.02it/s]
|
268 |
22%|βββ | 170/763 [03:01<04:52, 2.02it/s]
|
269 |
22%|βββ | 171/763 [03:02<04:52, 2.02it/s]
|
270 |
23%|βββ | 172/763 [03:02<04:51, 2.02it/s]
|
271 |
23%|βββ | 173/763 [03:03<04:51, 2.02it/s]
|
272 |
23%|βββ | 174/763 [03:03<04:51, 2.02it/s]
|
273 |
23%|βββ | 175/763 [03:04<04:50, 2.02it/s]{'loss': 3.9435, 'grad_norm': 0.39299631118774414, 'learning_rate': 0.0009504844339512095, 'epoch': 0.23}
|
|
|
274 |
|
275 |
23%|βββ | 175/763 [03:04<04:50, 2.02it/s]
|
276 |
23%|βββ | 176/763 [03:04<04:50, 2.02it/s]
|
277 |
23%|βββ | 177/763 [03:04<04:50, 2.02it/s]
|
278 |
23%|βββ | 178/763 [03:05<04:49, 2.02it/s]
|
279 |
23%|βββ | 179/763 [03:05<04:48, 2.02it/s]
|
280 |
24%|βββ | 180/763 [03:06<04:48, 2.02it/s]
|
281 |
24%|βββ | 181/763 [03:06<04:47, 2.02it/s]
|
282 |
24%|βββ | 182/763 [03:07<04:47, 2.02it/s]
|
283 |
24%|βββ | 183/763 [03:07<04:46, 2.02it/s]
|
284 |
24%|βββ | 184/763 [03:08<04:46, 2.02it/s]
|
285 |
24%|βββ | 185/763 [03:08<04:46, 2.02it/s]
|
286 |
24%|βββ | 186/763 [03:09<04:45, 2.02it/s]
|
287 |
25%|βββ | 187/763 [03:09<04:45, 2.02it/s]
|
288 |
25%|βββ | 188/763 [03:10<04:44, 2.02it/s]
|
289 |
25%|βββ | 189/763 [03:10<04:43, 2.02it/s]
|
290 |
25%|βββ | 190/763 [03:11<04:43, 2.02it/s]
|
291 |
25%|βββ | 191/763 [03:11<04:42, 2.02it/s]
|
292 |
25%|βββ | 192/763 [03:12<04:42, 2.02it/s]
|
293 |
25%|βββ | 193/763 [03:12<04:41, 2.02it/s]
|
294 |
25%|βββ | 194/763 [03:13<04:41, 2.02it/s]
|
295 |
26%|βββ | 195/763 [03:13<04:41, 2.02it/s]
|
296 |
26%|βββ | 196/763 [03:14<04:40, 2.02it/s]
|
297 |
26%|βββ | 197/763 [03:14<04:39, 2.02it/s]
|
298 |
26%|βββ | 198/763 [03:15<04:39, 2.02it/s]
|
299 |
26%|βββ | 199/763 [03:15<04:38, 2.02it/s]
|
300 |
26%|βββ | 200/763 [03:16<04:38, 2.02it/s]{'loss': 3.6901, 'grad_norm': 0.34031203389167786, 'learning_rate': 0.0009227518692591244, 'epoch': 0.26}
|
|
|
301 |
|
302 |
26%|βββ | 200/763 [03:16<04:38, 2.02it/s]
|
303 |
26%|βββ | 201/763 [03:16<04:38, 2.02it/s]
|
304 |
26%|βββ | 202/763 [03:17<04:37, 2.02it/s]
|
305 |
27%|βββ | 203/763 [03:17<04:36, 2.02it/s]
|
306 |
27%|βββ | 204/763 [03:18<04:36, 2.02it/s]
|
307 |
27%|βββ | 205/763 [03:18<04:35, 2.03it/s]
|
308 |
27%|βββ | 206/763 [03:19<04:35, 2.03it/s]
|
309 |
27%|βββ | 207/763 [03:19<04:34, 2.02it/s]
|
310 |
27%|βββ | 208/763 [03:20<04:34, 2.02it/s]
|
311 |
27%|βββ | 209/763 [03:20<04:33, 2.02it/s]
|
312 |
28%|βββ | 210/763 [03:21<04:33, 2.02it/s]
|
313 |
28%|βββ | 211/763 [03:21<04:32, 2.02it/s]
|
314 |
28%|βββ | 212/763 [03:22<04:32, 2.02it/s]
|
315 |
28%|βββ | 213/763 [03:22<04:31, 2.02it/s]
|
316 |
28%|βββ | 214/763 [03:23<04:31, 2.02it/s]
|
317 |
28%|βββ | 215/763 [03:23<04:30, 2.03it/s]
|
318 |
28%|βββ | 216/763 [03:24<04:30, 2.02it/s]
|
319 |
28%|βββ | 217/763 [03:24<04:29, 2.03it/s]
|
320 |
29%|βββ | 218/763 [03:25<04:29, 2.03it/s]
|
321 |
29%|βββ | 219/763 [03:25<04:28, 2.03it/s]
|
322 |
29%|βββ | 220/763 [03:26<04:28, 2.03it/s]
|
323 |
29%|βββ | 221/763 [03:26<04:27, 2.02it/s]
|
324 |
29%|βββ | 222/763 [03:27<04:26, 2.03it/s]
|
325 |
29%|βββ | 223/763 [03:27<04:26, 2.03it/s]
|
326 |
29%|βββ | 224/763 [03:28<04:26, 2.02it/s]
|
327 |
29%|βββ | 225/763 [03:28<04:25, 2.02it/s]{'loss': 3.4966, 'grad_norm': 0.37306490540504456, 'learning_rate': 0.0008894839859139472, 'epoch': 0.29}
|
|
|
328 |
|
329 |
29%|βββ | 225/763 [03:28<04:25, 2.02it/s]
|
330 |
30%|βββ | 226/763 [03:29<04:25, 2.02it/s]
|
331 |
30%|βββ | 227/763 [03:29<04:24, 2.02it/s]
|
332 |
30%|βββ | 228/763 [03:30<04:24, 2.02it/s]
|
333 |
30%|βββ | 229/763 [03:30<04:23, 2.02it/s]
|
334 |
30%|βββ | 230/763 [03:31<04:23, 2.02it/s]
|
335 |
30%|βββ | 231/763 [03:31<04:22, 2.03it/s]
|
336 |
30%|βββ | 232/763 [03:32<04:22, 2.02it/s]
|
337 |
31%|βββ | 233/763 [03:32<04:22, 2.02it/s]
|
338 |
31%|βββ | 234/763 [03:33<04:21, 2.02it/s]
|
339 |
31%|βββ | 235/763 [03:33<04:20, 2.02it/s]
|
340 |
31%|βββ | 236/763 [03:34<04:20, 2.02it/s]
|
341 |
31%|βββ | 237/763 [03:34<04:20, 2.02it/s]
|
342 |
31%|βββ | 238/763 [03:35<04:19, 2.02it/s]
|
343 |
31%|ββββ | 239/763 [03:35<04:18, 2.02it/s]
|
344 |
31%|ββββ | 240/763 [03:36<04:18, 2.02it/s]
|
345 |
32%|ββββ | 241/763 [03:36<04:17, 2.02it/s]
|
346 |
32%|ββββ | 242/763 [03:37<04:17, 2.02it/s]
|
347 |
32%|ββββ | 243/763 [03:37<04:17, 2.02it/s]
|
348 |
32%|ββββ | 244/763 [03:38<04:16, 2.02it/s]
|
349 |
32%|ββββ | 245/763 [03:38<04:16, 2.02it/s]
|
350 |
32%|ββββ | 246/763 [03:39<04:15, 2.02it/s]
|
351 |
32%|ββββ | 247/763 [03:39<04:15, 2.02it/s]
|
352 |
33%|ββββ | 248/763 [03:40<04:14, 2.02it/s]
|
353 |
33%|ββββ | 249/763 [03:40<04:14, 2.02it/s]
|
354 |
33%|ββββ | 250/763 [03:41<04:13, 2.03it/s]{'loss': 3.3449, 'grad_norm': 0.4239332973957062, 'learning_rate': 0.0008511163782882168, 'epoch': 0.33}
|
|
|
355 |
|
356 |
33%|ββββ | 250/763 [03:41<04:13, 2.03it/s]
|
357 |
33%|ββββ | 251/763 [03:41<04:13, 2.02it/s]
|
358 |
33%|ββββ | 252/763 [03:42<04:12, 2.02it/s]
|
359 |
33%|ββββ | 253/763 [03:42<04:12, 2.02it/s]
|
360 |
33%|ββββ | 254/763 [03:43<04:11, 2.02it/s]
|
361 |
33%|ββββ | 255/763 [03:43<04:11, 2.02it/s]
|
362 |
34%|ββββ | 256/763 [03:44<04:10, 2.02it/s]
|
363 |
34%|ββββ | 257/763 [03:44<04:09, 2.02it/s]
|
364 |
34%|ββββ | 258/763 [03:45<04:09, 2.03it/s]
|
365 |
34%|ββββ | 259/763 [03:45<04:08, 2.03it/s]
|
366 |
34%|ββββ | 260/763 [03:46<04:08, 2.02it/s]
|
367 |
34%|ββββ | 261/763 [03:46<04:08, 2.02it/s]
|
368 |
34%|ββββ | 262/763 [03:47<04:07, 2.02it/s]
|
369 |
34%|ββββ | 263/763 [03:47<04:07, 2.02it/s]
|
370 |
35%|ββββ | 264/763 [03:47<04:06, 2.02it/s]
|
371 |
35%|ββββ | 265/763 [03:48<04:06, 2.02it/s]
|
372 |
35%|ββββ | 266/763 [03:48<04:05, 2.02it/s]
|
373 |
35%|ββββ | 267/763 [03:49<04:05, 2.02it/s]
|
374 |
35%|ββββ | 268/763 [03:49<04:04, 2.02it/s]
|
375 |
35%|ββββ | 269/763 [03:50<04:04, 2.02it/s]
|
376 |
35%|ββββ | 270/763 [03:50<04:03, 2.02it/s]
|
377 |
36%|ββββ | 271/763 [03:51<04:03, 2.02it/s]
|
378 |
36%|ββββ | 272/763 [03:51<04:02, 2.02it/s]
|
379 |
36%|ββββ | 273/763 [03:52<04:02, 2.02it/s]
|
380 |
36%|ββββ | 274/763 [03:52<04:01, 2.02it/s]
|
381 |
36%|ββββ | 275/763 [03:53<04:01, 2.02it/s]
|
382 |
|
|
|
383 |
36%|ββββ | 275/763 [03:53<04:01, 2.02it/s]
|
384 |
36%|ββββ | 276/763 [03:53<04:00, 2.02it/s]
|
385 |
36%|ββββ | 277/763 [03:54<04:00, 2.02it/s]
|
386 |
36%|ββββ | 278/763 [03:54<03:59, 2.02it/s]
|
387 |
37%|ββββ | 279/763 [03:55<03:59, 2.02it/s]
|
388 |
37%|ββββ | 280/763 [03:55<03:58, 2.02it/s]
|
389 |
37%|ββββ | 281/763 [03:56<03:58, 2.02it/s]
|
390 |
37%|ββββ | 282/763 [03:56<03:57, 2.02it/s]
|
391 |
37%|ββββ | 283/763 [03:57<03:57, 2.02it/s]
|
392 |
37%|ββββ | 284/763 [03:57<03:56, 2.02it/s]
|
393 |
37%|ββββ | 285/763 [03:58<03:56, 2.02it/s]
|
394 |
37%|ββββ | 286/763 [03:58<03:55, 2.02it/s]
|
395 |
38%|ββββ | 287/763 [03:59<03:55, 2.02it/s]
|
396 |
38%|ββββ | 288/763 [03:59<03:55, 2.02it/s]
|
397 |
38%|ββββ | 289/763 [04:00<03:54, 2.02it/s]
|
398 |
38%|ββββ | 290/763 [04:00<03:54, 2.02it/s]
|
399 |
38%|ββββ | 291/763 [04:01<03:53, 2.02it/s]
|
400 |
38%|ββββ | 292/763 [04:01<03:53, 2.02it/s]
|
401 |
38%|ββββ | 293/763 [04:02<03:52, 2.02it/s]
|
402 |
39%|ββββ | 294/763 [04:02<03:51, 2.02it/s]
|
403 |
39%|ββββ | 295/763 [04:03<03:51, 2.02it/s]
|
404 |
39%|ββββ | 296/763 [04:03<03:50, 2.02it/s]
|
405 |
39%|ββββ | 297/763 [04:04<03:50, 2.02it/s]
|
406 |
39%|ββββ | 298/763 [04:04<03:49, 2.02it/s]
|
407 |
39%|ββββ | 299/763 [04:05<03:49, 2.02it/s]
|
408 |
39%|ββββ | 300/763 [04:05<03:48, 2.02it/s]
|
409 |
{'loss': 3.0883, 'grad_norm': 0.3450576961040497, 'learning_rate': 0.0007611516571398591, 'epoch': 0.39}
|
|
|
410 |
39%|ββββ | 300/763 [04:05<03:48, 2.02it/s]
|
411 |
39%|ββββ | 301/763 [04:06<03:48, 2.02it/s]
|
412 |
40%|ββββ | 302/763 [04:06<03:48, 2.02it/s]
|
413 |
40%|ββββ | 303/763 [04:07<03:47, 2.02it/s]
|
414 |
40%|ββββ | 304/763 [04:07<03:47, 2.02it/s]
|
415 |
40%|ββββ | 305/763 [04:08<03:46, 2.02it/s]
|
416 |
40%|ββββ | 306/763 [04:08<03:45, 2.02it/s]
|
417 |
40%|ββββ | 307/763 [04:09<03:45, 2.02it/s]
|
418 |
40%|ββββ | 308/763 [04:09<03:44, 2.02it/s]
|
419 |
40%|ββββ | 309/763 [04:10<03:44, 2.02it/s]
|
420 |
41%|ββββ | 310/763 [04:10<03:43, 2.02it/s]
|
421 |
41%|ββββ | 311/763 [04:11<03:43, 2.02it/s]
|
422 |
41%|ββββ | 312/763 [04:11<03:42, 2.02it/s]
|
423 |
41%|ββββ | 313/763 [04:12<03:42, 2.02it/s]
|
424 |
41%|ββββ | 314/763 [04:12<03:41, 2.02it/s]
|
425 |
41%|βββββ | 315/763 [04:13<03:41, 2.02it/s]
|
426 |
41%|βββββ | 316/763 [04:13<03:40, 2.02it/s]
|
427 |
42%|οΏ½οΏ½οΏ½ββββ | 317/763 [04:14<03:40, 2.02it/s]
|
428 |
42%|βββββ | 318/763 [04:14<03:39, 2.02it/s]
|
429 |
42%|βββββ | 319/763 [04:15<03:39, 2.02it/s]
|
430 |
42%|βββββ | 320/763 [04:15<03:38, 2.02it/s]
|
431 |
42%|βββββ | 321/763 [04:16<03:38, 2.02it/s]
|
432 |
42%|βββββ | 322/763 [04:16<03:38, 2.02it/s]
|
433 |
42%|βββββ | 323/763 [04:17<03:37, 2.02it/s]
|
434 |
42%|βββββ | 324/763 [04:17<03:36, 2.02it/s]
|
435 |
43%|βββββ | 325/763 [04:18<03:36, 2.03it/s]
|
436 |
|
|
|
437 |
43%|βββββ | 325/763 [04:18<03:36, 2.03it/s]
|
438 |
43%|βββββ | 326/763 [04:18<03:36, 2.02it/s]
|
439 |
43%|βββββ | 327/763 [04:19<03:35, 2.02it/s]
|
440 |
43%|βββββ | 328/763 [04:19<03:35, 2.02it/s]
|
441 |
43%|βββββ | 329/763 [04:20<03:34, 2.02it/s]
|
442 |
43%|βββββ | 330/763 [04:20<03:33, 2.02it/s]
|
443 |
43%|βββββ | 331/763 [04:21<03:33, 2.02it/s]
|
444 |
44%|βββββ | 332/763 [04:21<03:33, 2.02it/s]
|
445 |
44%|βββββ | 333/763 [04:22<03:32, 2.02it/s]
|
446 |
44%|βββββ | 334/763 [04:22<03:31, 2.02it/s]
|
447 |
44%|βββββ | 335/763 [04:23<03:31, 2.02it/s]
|
448 |
44%|βββββ | 336/763 [04:23<03:31, 2.02it/s]
|
449 |
44%|βββββ | 337/763 [04:24<03:30, 2.02it/s]
|
450 |
44%|βββββ | 338/763 [04:24<03:30, 2.02it/s]
|
451 |
44%|βββββ | 339/763 [04:25<03:29, 2.02it/s]
|
452 |
45%|βββββ | 340/763 [04:25<03:29, 2.02it/s]
|
453 |
45%|βββββ | 341/763 [04:26<03:28, 2.02it/s]
|
454 |
45%|βββββ | 342/763 [04:26<03:28, 2.02it/s]
|
455 |
45%|βββββ | 343/763 [04:27<03:27, 2.02it/s]
|
456 |
45%|βββββ | 344/763 [04:27<03:26, 2.02it/s]
|
457 |
45%|βββββ | 345/763 [04:28<03:26, 2.03it/s]
|
458 |
45%|βββββ | 346/763 [04:28<03:26, 2.02it/s]
|
459 |
45%|βββββ | 347/763 [04:29<03:25, 2.02it/s]
|
460 |
46%|βββββ | 348/763 [04:29<03:25, 2.02it/s]
|
461 |
46%|βββββ | 349/763 [04:30<03:24, 2.02it/s]
|
462 |
46%|βββββ | 350/763 [04:30<03:24, 2.02it/s]{'loss': 2.9083, 'grad_norm': 0.3510383367538452, 'learning_rate': 0.0006575541090118104, 'epoch': 0.46}
|
|
|
463 |
|
464 |
46%|βββββ | 350/763 [04:30<03:24, 2.02it/s]
|
465 |
46%|βββββ | 351/763 [04:31<03:24, 2.02it/s]
|
466 |
46%|βββββ | 352/763 [04:31<03:23, 2.02it/s]
|
467 |
46%|βββββ | 353/763 [04:32<03:22, 2.02it/s]
|
468 |
46%|βββββ | 354/763 [04:32<03:22, 2.02it/s]
|
469 |
47%|βββββ | 355/763 [04:32<03:21, 2.02it/s]
|
470 |
47%|βββββ | 356/763 [04:33<03:21, 2.02it/s]
|
471 |
47%|βββββ | 357/763 [04:33<03:20, 2.02it/s]
|
472 |
47%|βββββ | 358/763 [04:34<03:20, 2.02it/s]
|
473 |
47%|βββββ | 359/763 [04:34<03:19, 2.02it/s]
|
474 |
47%|βββββ | 360/763 [04:35<03:19, 2.02it/s]
|
475 |
47%|βββββ | 361/763 [04:35<03:18, 2.02it/s]
|
476 |
47%|βββββ | 362/763 [04:36<03:18, 2.02it/s]
|
477 |
48%|βββββ | 363/763 [04:36<03:17, 2.02it/s]
|
478 |
48%|βββββ | 364/763 [04:37<03:17, 2.02it/s]
|
479 |
48%|βββββ | 365/763 [04:37<03:16, 2.02it/s]
|
480 |
48%|βββββ | 366/763 [04:38<03:16, 2.02it/s]
|
481 |
48%|βββββ | 367/763 [04:38<03:15, 2.02it/s]
|
482 |
48%|βββββ | 368/763 [04:39<03:15, 2.02it/s]
|
483 |
48%|βββββ | 369/763 [04:39<03:14, 2.02it/s]
|
484 |
48%|βββββ | 370/763 [04:40<03:14, 2.02it/s]
|
485 |
49%|βββββ | 371/763 [04:40<03:13, 2.02it/s]
|
486 |
49%|βββββ | 372/763 [04:41<03:13, 2.02it/s]
|
487 |
49%|βββββ | 373/763 [04:41<03:12, 2.03it/s]
|
488 |
49%|βββββ | 374/763 [04:42<03:12, 2.03it/s]
|
489 |
49%|βββββ | 375/763 [04:42<03:11, 2.02it/s]{'loss': 2.8176, 'grad_norm': 0.3813249170780182, 'learning_rate': 0.0006023127766192824, 'epoch': 0.49}
|
|
|
490 |
|
491 |
49%|βββββ | 375/763 [04:42<03:11, 2.02it/s]
|
492 |
49%|βββββ | 376/763 [04:43<03:11, 2.02it/s]
|
493 |
49%|βββββ | 377/763 [04:43<03:10, 2.02it/s]
|
494 |
50%|βββββ | 378/763 [04:44<03:10, 2.02it/s]
|
495 |
50%|βββββ | 379/763 [04:44<03:09, 2.02it/s]
|
496 |
50%|βββββ | 380/763 [04:45<03:09, 2.03it/s]
|
497 |
50%|βββββ | 381/763 [04:45<03:08, 2.02it/s]
|
498 |
50%|βββββ | 382/763 [04:46<03:08, 2.02it/s]
|
499 |
50%|βββββ | 383/763 [04:46<03:07, 2.02it/s]
|
500 |
50%|βββββ | 384/763 [04:47<03:07, 2.02it/s]
|
501 |
50%|βββββ | 385/763 [04:47<03:07, 2.02it/s]
|
502 |
51%|βββββ | 386/763 [04:48<03:06, 2.02it/s]
|
503 |
51%|βββββ | 387/763 [04:48<03:05, 2.02it/s]
|
504 |
51%|βββββ | 388/763 [04:49<03:05, 2.02it/s]
|
505 |
51%|βββββ | 389/763 [04:49<03:04, 2.03it/s]
|
506 |
51%|βββββ | 390/763 [04:50<03:04, 2.02it/s]
|
507 |
51%|βββββ | 391/763 [04:50<03:03, 2.03it/s]
|
508 |
51%|ββββββ | 392/763 [04:51<03:03, 2.02it/s]
|
509 |
52%|ββββββ | 393/763 [04:51<03:02, 2.02it/s]
|
510 |
52%|ββββββ | 394/763 [04:52<03:02, 2.02it/s]
|
511 |
52%|ββββββ | 395/763 [04:52<03:02, 2.02it/s]
|
512 |
52%|ββββββ | 396/763 [04:53<03:01, 2.02it/s]
|
513 |
52%|ββββββ | 397/763 [04:53<03:00, 2.02it/s]
|
514 |
52%|ββββββ | 398/763 [04:54<03:00, 2.02it/s]
|
515 |
52%|ββββββ | 399/763 [04:54<02:59, 2.02it/s]
|
516 |
52%|ββββββ | 400/763 [04:55<02:59, 2.02it/s]{'loss': 2.7491, 'grad_norm': 0.34457528591156006, 'learning_rate': 0.0005457318077590012, 'epoch': 0.52}
|
|
|
517 |
|
518 |
52%|ββββββ | 400/763 [04:55<02:59, 2.02it/s]
|
519 |
53%|ββββββ | 401/763 [04:55<02:59, 2.02it/s]
|
520 |
53%|ββββββ | 402/763 [04:56<02:58, 2.02it/s]
|
521 |
53%|ββββββ | 403/763 [04:56<02:58, 2.02it/s]
|
522 |
53%|ββββββ | 404/763 [04:57<02:57, 2.02it/s]
|
523 |
53%|ββββββ | 405/763 [04:57<02:57, 2.02it/s]
|
524 |
53%|ββββββ | 406/763 [04:58<02:56, 2.02it/s]
|
525 |
53%|ββββββ | 407/763 [04:58<02:56, 2.02it/s]
|
526 |
53%|ββββββ | 408/763 [04:59<02:55, 2.02it/s]
|
527 |
54%|ββββββ | 409/763 [04:59<02:55, 2.02it/s]
|
528 |
54%|ββββββ | 410/763 [05:00<02:54, 2.02it/s]
|
529 |
54%|ββββββ | 411/763 [05:00<02:54, 2.02it/s]
|
530 |
54%|ββββββ | 412/763 [05:01<02:53, 2.02it/s]
|
531 |
54%|ββββββ | 413/763 [05:01<02:53, 2.02it/s]
|
532 |
54%|ββββββ | 414/763 [05:02<02:52, 2.02it/s]
|
533 |
54%|ββββββ | 415/763 [05:02<02:51, 2.02it/s]
|
534 |
55%|ββββββ | 416/763 [05:03<02:51, 2.02it/s]
|
535 |
55%|ββββββ | 417/763 [05:03<02:50, 2.03it/s]
|
536 |
55%|ββββββ | 418/763 [05:04<02:50, 2.03it/s]
|
537 |
55%|ββββββ | 419/763 [05:04<02:49, 2.03it/s]
|
538 |
55%|ββββββ | 420/763 [05:05<02:49, 2.03it/s]
|
539 |
55%|ββββββ | 421/763 [05:05<02:48, 2.03it/s]
|
540 |
55%|ββββββ | 422/763 [05:06<02:48, 2.03it/s]
|
541 |
55%|ββββββ | 423/763 [05:06<02:47, 2.02it/s]
|
542 |
56%|ββββββ | 424/763 [05:07<02:47, 2.03it/s]
|
543 |
56%|ββββββ | 425/763 [05:07<02:47, 2.02it/s]{'loss': 2.6858, 'grad_norm': 0.365237295627594, 'learning_rate': 0.0004885520476290998, 'epoch': 0.56}
|
|
|
544 |
|
545 |
56%|ββββββ | 425/763 [05:07<02:47, 2.02it/s]
|
546 |
56%|ββββββ | 426/763 [05:08<02:46, 2.02it/s]
|
547 |
56%|ββββββ | 427/763 [05:08<02:46, 2.02it/s]
|
548 |
56%|ββββββ | 428/763 [05:09<02:45, 2.02it/s]
|
549 |
56%|ββββββ | 429/763 [05:09<02:45, 2.02it/s]
|
550 |
56%|ββββββ | 430/763 [05:10<02:44, 2.02it/s]
|
551 |
56%|ββββββ | 431/763 [05:10<02:43, 2.03it/s]
|
552 |
57%|ββββββ | 432/763 [05:11<02:43, 2.03it/s]
|
553 |
57%|ββββββ | 433/763 [05:11<02:42, 2.02it/s]
|
554 |
57%|ββββββ | 434/763 [05:12<02:42, 2.02it/s]
|
555 |
57%|ββββββ | 435/763 [05:12<02:42, 2.02it/s]
|
556 |
57%|ββββββ | 436/763 [05:13<02:41, 2.02it/s]
|
557 |
57%|ββββββ | 437/763 [05:13<02:40, 2.03it/s]
|
558 |
57%|ββββββ | 438/763 [05:14<02:40, 2.02it/s]
|
559 |
58%|ββββββ | 439/763 [05:14<02:40, 2.02it/s]
|
560 |
58%|ββββββ | 440/763 [05:15<02:39, 2.02it/s]
|
561 |
58%|ββββββ | 441/763 [05:15<02:38, 2.03it/s]
|
562 |
58%|ββββββ | 442/763 [05:15<02:38, 2.02it/s]
|
563 |
58%|ββββββ | 443/763 [05:16<02:37, 2.03it/s]
|
564 |
58%|ββββββ | 444/763 [05:16<02:37, 2.02it/s]
|
565 |
58%|ββββββ | 445/763 [05:17<02:36, 2.03it/s]
|
566 |
58%|ββββββ | 446/763 [05:17<02:36, 2.03it/s]
|
567 |
59%|ββββββ | 447/763 [05:18<02:36, 2.03it/s]
|
568 |
59%|ββββββ | 448/763 [05:18<02:35, 2.03it/s]
|
569 |
59%|ββββββ | 449/763 [05:19<02:34, 2.03it/s]
|
570 |
59%|ββββββ | 450/763 [05:19<02:34, 2.03it/s]{'loss': 2.6339, 'grad_norm': 0.3599332869052887, 'learning_rate': 0.00043152218172535383, 'epoch': 0.59}
|
|
|
571 |
|
572 |
59%|ββββββ | 450/763 [05:19<02:34, 2.03it/s]
|
573 |
59%|ββββββ | 451/763 [05:20<02:34, 2.02it/s]
|
574 |
59%|ββββββ | 452/763 [05:20<02:33, 2.02it/s]
|
575 |
59%|ββββββ | 453/763 [05:21<02:33, 2.02it/s]
|
576 |
60%|ββββββ | 454/763 [05:21<02:32, 2.02it/s]
|
577 |
60%|ββββββ | 455/763 [05:22<02:32, 2.02it/s]
|
578 |
60%|ββββββ | 456/763 [05:22<02:31, 2.02it/s]
|
579 |
60%|ββββββ | 457/763 [05:23<02:31, 2.02it/s]
|
580 |
60%|ββββββ | 458/763 [05:23<02:30, 2.03it/s]
|
581 |
60%|ββββββ | 459/763 [05:24<02:30, 2.02it/s]
|
582 |
60%|ββββββ | 460/763 [05:24<02:29, 2.02it/s]
|
583 |
60%|ββββββ | 461/763 [05:25<02:29, 2.02it/s]
|
584 |
61%|ββββββ | 462/763 [05:25<02:28, 2.02it/s]
|
585 |
61%|ββββββ | 463/763 [05:26<02:28, 2.02it/s]
|
586 |
61%|ββββββ | 464/763 [05:26<02:27, 2.02it/s]
|
587 |
61%|ββββββ | 465/763 [05:27<02:27, 2.02it/s]
|
588 |
61%|ββββββ | 466/763 [05:27<02:26, 2.02it/s]
|
589 |
61%|ββββββ | 467/763 [05:28<02:26, 2.02it/s]
|
590 |
61%|βββββββ | 468/763 [05:28<02:25, 2.02it/s]
|
591 |
61%|βββββββ | 469/763 [05:29<02:25, 2.03it/s]
|
592 |
62%|βββββββ | 470/763 [05:29<02:24, 2.02it/s]
|
593 |
62%|βββββββ | 471/763 [05:30<02:24, 2.02it/s]
|
594 |
62%|βββββββ | 472/763 [05:30<02:23, 2.02it/s]
|
595 |
62%|βββββββ | 473/763 [05:31<02:23, 2.02it/s]
|
596 |
62%|βββββββ | 474/763 [05:31<02:22, 2.02it/s]
|
597 |
62%|βββββββ | 475/763 [05:32<02:22, 2.02it/s]{'loss': 2.5753, 'grad_norm': 0.36764204502105713, 'learning_rate': 0.0003753889328974423, 'epoch': 0.62}
|
|
|
598 |
|
599 |
62%|βββββββ | 475/763 [05:32<02:22, 2.02it/s]
|
600 |
62%|βββββββ | 476/763 [05:32<02:22, 2.01it/s]
|
601 |
63%|βββββββ | 477/763 [05:33<02:21, 2.02it/s]
|
602 |
63%|βββββββ | 478/763 [05:33<02:21, 2.02it/s]
|
603 |
63%|βββββββ | 479/763 [05:34<02:20, 2.02it/s]
|
604 |
63%|βββββββ | 480/763 [05:34<02:20, 2.02it/s]
|
605 |
63%|βββββββ | 481/763 [05:35<02:19, 2.02it/s]
|
606 |
63%|βββββββ | 482/763 [05:35<02:19, 2.02it/s]
|
607 |
63%|βββββββ | 483/763 [05:36<02:18, 2.02it/s]
|
608 |
63%|βββββββ | 484/763 [05:36<02:17, 2.02it/s]
|
609 |
64%|βββββββ | 485/763 [05:37<02:17, 2.02it/s]
|
610 |
64%|βββββββ | 486/763 [05:37<02:16, 2.02it/s]
|
611 |
64%|βββββββ | 487/763 [05:38<02:16, 2.02it/s]
|
612 |
64%|βββββββ | 488/763 [05:38<02:15, 2.02it/s]
|
613 |
64%|βββββββ | 489/763 [05:39<02:15, 2.02it/s]
|
614 |
64%|βββββββ | 490/763 [05:39<02:14, 2.03it/s]
|
615 |
64%|βββββββ | 491/763 [05:40<02:14, 2.02it/s]
|
616 |
64%|βββββββ | 492/763 [05:40<02:13, 2.03it/s]
|
617 |
65%|βββββββ | 493/763 [05:41<02:13, 2.02it/s]
|
618 |
65%|βββββββ | 494/763 [05:41<02:12, 2.03it/s]
|
619 |
65%|βββββββ | 495/763 [05:42<02:12, 2.03it/s]
|
620 |
65%|βββββββ | 496/763 [05:42<02:11, 2.02it/s]
|
621 |
65%|βββββββ | 497/763 [05:43<02:11, 2.03it/s]
|
622 |
65%|βββββββ | 498/763 [05:43<02:10, 2.03it/s]
|
623 |
65%|βββββββ | 499/763 [05:44<02:10, 2.02it/s]
|
624 |
66%|βββββββ | 500/763 [05:44<02:09, 2.02it/s]{'loss': 2.5207, 'grad_norm': 0.35472121834754944, 'learning_rate': 0.00032088728410319416, 'epoch': 0.66}
|
|
|
625 |
|
626 |
66%|βββββββ | 500/763 [05:44<02:09, 2.02it/s]
|
627 |
66%|βββββββ | 501/763 [05:45<02:09, 2.02it/s]
|
628 |
66%|βββββββ | 502/763 [05:45<02:09, 2.02it/s]
|
629 |
66%|βββββββ | 503/763 [05:46<02:08, 2.02it/s]
|
630 |
66%|βββββββ | 504/763 [05:46<02:08, 2.02it/s]
|
631 |
66%|βββββββ | 505/763 [05:47<02:07, 2.02it/s]
|
632 |
66%|βββββββ | 506/763 [05:47<02:07, 2.02it/s]
|
633 |
66%|βββββββ | 507/763 [05:48<02:06, 2.02it/s]
|
634 |
67%|βββββββ | 508/763 [05:48<02:05, 2.03it/s]
|
635 |
67%|βββββββ | 509/763 [05:49<02:05, 2.03it/s]
|
636 |
67%|βββββββ | 510/763 [05:49<02:04, 2.03it/s]
|
637 |
67%|βββββββ | 511/763 [05:50<02:04, 2.02it/s]
|
638 |
67%|βββββββ | 512/763 [05:50<02:03, 2.02it/s]
|
639 |
67%|βββββββ | 513/763 [05:51<02:03, 2.02it/s]
|
640 |
67%|βββββββ | 514/763 [05:51<02:03, 2.02it/s]
|
641 |
67%|βββββββ | 515/763 [05:52<02:02, 2.02it/s]
|
642 |
68%|βββββββ | 516/763 [05:52<02:01, 2.02it/s]
|
643 |
68%|βββββββ | 517/763 [05:53<02:01, 2.02it/s]
|
644 |
68%|βββββββ | 518/763 [05:53<02:00, 2.03it/s]
|
645 |
68%|βββββββ | 519/763 [05:54<02:00, 2.03it/s]
|
646 |
68%|βββββββ | 520/763 [05:54<01:59, 2.03it/s]
|
647 |
68%|βββββββ | 521/763 [05:55<01:59, 2.03it/s]
|
648 |
68%|βββββββ | 522/763 [05:55<01:59, 2.02it/s]
|
649 |
69%|βββββββ | 523/763 [05:56<01:58, 2.03it/s]
|
650 |
69%|βββββββ | 524/763 [05:56<01:58, 2.02it/s]
|
651 |
69%|βββββββ | 525/763 [05:57<01:57, 2.03it/s]{'loss': 2.4755, 'grad_norm': 0.342868447303772, 'learning_rate': 0.0002687308548795825, 'epoch': 0.69}
|
|
|
652 |
|
653 |
69%|βββββββ | 525/763 [05:57<01:57, 2.03it/s]
|
654 |
69%|βββββββ | 526/763 [05:57<01:57, 2.02it/s]
|
655 |
69%|βββββββ | 527/763 [05:57<01:56, 2.02it/s]
|
656 |
69%|βββββββ | 528/763 [05:58<01:56, 2.02it/s]
|
657 |
69%|βββββββ | 529/763 [05:58<01:55, 2.03it/s]
|
658 |
69%|βββββββ | 530/763 [05:59<01:55, 2.02it/s]
|
659 |
70%|βββββββ | 531/763 [05:59<01:54, 2.02it/s]
|
660 |
70%|βββββββ | 532/763 [06:00<01:54, 2.02it/s]
|
661 |
70%|βββββοΏ½οΏ½οΏ½β | 533/763 [06:00<01:53, 2.02it/s]
|
662 |
70%|βββββββ | 534/763 [06:01<01:53, 2.02it/s]
|
663 |
70%|βββββββ | 535/763 [06:01<01:52, 2.02it/s]
|
664 |
70%|βββββββ | 536/763 [06:02<01:52, 2.02it/s]
|
665 |
70%|βββββββ | 537/763 [06:02<01:51, 2.02it/s]
|
666 |
71%|βββββββ | 538/763 [06:03<01:51, 2.02it/s]
|
667 |
71%|βββββββ | 539/763 [06:03<01:50, 2.02it/s]
|
668 |
71%|βββββββ | 540/763 [06:04<01:50, 2.02it/s]
|
669 |
71%|βββββββ | 541/763 [06:04<01:49, 2.02it/s]
|
670 |
71%|βββββββ | 542/763 [06:05<01:49, 2.02it/s]
|
671 |
71%|βββββββ | 543/763 [06:05<01:48, 2.02it/s]
|
672 |
71%|ββββββββ | 544/763 [06:06<01:48, 2.02it/s]
|
673 |
71%|ββββββββ | 545/763 [06:06<01:47, 2.02it/s]
|
674 |
72%|ββββββββ | 546/763 [06:07<01:47, 2.02it/s]
|
675 |
72%|ββββββββ | 547/763 [06:07<01:46, 2.02it/s]
|
676 |
72%|ββββββββ | 548/763 [06:08<01:46, 2.02it/s]
|
677 |
72%|ββββββββ | 549/763 [06:08<01:45, 2.02it/s]
|
678 |
72%|ββββββββ | 550/763 [06:09<01:45, 2.02it/s]{'loss': 2.436, 'grad_norm': 0.36740800738334656, 'learning_rate': 0.00021960255753653008, 'epoch': 0.72}
|
|
|
679 |
|
680 |
72%|ββββββββ | 550/763 [06:09<01:45, 2.02it/s]
|
681 |
72%|ββββββββ | 551/763 [06:09<01:44, 2.02it/s]
|
682 |
72%|ββββββββ | 552/763 [06:10<01:44, 2.02it/s]
|
683 |
72%|ββββββββ | 553/763 [06:10<01:43, 2.02it/s]
|
684 |
73%|ββββββββ | 554/763 [06:11<01:43, 2.02it/s]
|
685 |
73%|ββββββββ | 555/763 [06:11<01:42, 2.02it/s]
|
686 |
73%|ββββββββ | 556/763 [06:12<01:42, 2.02it/s]
|
687 |
73%|ββββββββ | 557/763 [06:12<01:41, 2.02it/s]
|
688 |
73%|ββββββββ | 558/763 [06:13<01:41, 2.02it/s]
|
689 |
73%|ββββββββ | 559/763 [06:13<01:40, 2.02it/s]
|
690 |
73%|ββββββββ | 560/763 [06:14<01:40, 2.02it/s]
|
691 |
74%|ββββββββ | 561/763 [06:14<01:39, 2.02it/s]
|
692 |
74%|ββββββββ | 562/763 [06:15<01:39, 2.02it/s]
|
693 |
74%|ββββββββ | 563/763 [06:15<01:38, 2.02it/s]
|
694 |
74%|ββββββββ | 564/763 [06:16<01:38, 2.02it/s]
|
695 |
74%|ββββββββ | 565/763 [06:16<01:37, 2.02it/s]
|
696 |
74%|ββββββββ | 566/763 [06:17<01:37, 2.02it/s]
|
697 |
74%|ββββββββ | 567/763 [06:17<01:36, 2.02it/s]
|
698 |
74%|ββββββββ | 568/763 [06:18<01:36, 2.02it/s]
|
699 |
75%|ββββββββ | 569/763 [06:18<01:36, 2.02it/s]
|
700 |
75%|ββββββββ | 570/763 [06:19<01:35, 2.02it/s]
|
701 |
75%|ββββββββ | 571/763 [06:19<01:34, 2.02it/s]
|
702 |
75%|ββββββββ | 572/763 [06:20<01:34, 2.02it/s]
|
703 |
75%|ββββββββ | 573/763 [06:20<01:33, 2.02it/s]
|
704 |
75%|ββββββββ | 574/763 [06:21<01:33, 2.02it/s]
|
705 |
75%|ββββββββ | 575/763 [06:21<01:32, 2.02it/s]{'loss': 2.4095, 'grad_norm': 0.3562281131744385, 'learning_rate': 0.00017414565541703342, 'epoch': 0.75}
|
|
|
706 |
|
707 |
75%|ββββββββ | 575/763 [06:21<01:32, 2.02it/s]
|
708 |
75%|ββββββββ | 576/763 [06:22<01:32, 2.02it/s]
|
709 |
76%|ββββββββ | 577/763 [06:22<01:32, 2.02it/s]
|
710 |
76%|ββββββββ | 578/763 [06:23<01:31, 2.02it/s]
|
711 |
76%|ββββββββ | 579/763 [06:23<01:31, 2.02it/s]
|
712 |
76%|ββββββββ | 580/763 [06:24<01:30, 2.02it/s]
|
713 |
76%|ββββββββ | 581/763 [06:24<01:29, 2.02it/s]
|
714 |
76%|ββββββββ | 582/763 [06:25<01:29, 2.02it/s]
|
715 |
76%|ββββββββ | 583/763 [06:25<01:28, 2.02it/s]
|
716 |
77%|ββββββββ | 584/763 [06:26<01:28, 2.02it/s]
|
717 |
77%|ββββββββ | 585/763 [06:26<01:28, 2.02it/s]
|
718 |
77%|ββββββββ | 586/763 [06:27<01:27, 2.02it/s]
|
719 |
77%|ββββββββ | 587/763 [06:27<01:27, 2.02it/s]
|
720 |
77%|ββββββββ | 588/763 [06:28<01:26, 2.02it/s]
|
721 |
77%|ββββββββ | 589/763 [06:28<01:25, 2.02it/s]
|
722 |
77%|ββββββββ | 590/763 [06:29<01:25, 2.02it/s]
|
723 |
77%|ββββββββ | 591/763 [06:29<01:24, 2.02it/s]
|
724 |
78%|ββββββββ | 592/763 [06:30<01:24, 2.02it/s]
|
725 |
78%|ββββββββ | 593/763 [06:30<01:24, 2.02it/s]
|
726 |
78%|ββββββββ | 594/763 [06:31<01:23, 2.02it/s]
|
727 |
78%|ββββββββ | 595/763 [06:31<01:23, 2.02it/s]
|
728 |
78%|ββββββββ | 596/763 [06:32<01:22, 2.02it/s]
|
729 |
78%|ββββββββ | 597/763 [06:32<01:22, 2.02it/s]
|
730 |
78%|ββββββββ | 598/763 [06:33<01:21, 2.02it/s]
|
731 |
79%|ββββββββ | 599/763 [06:33<01:21, 2.02it/s]
|
732 |
79%|ββββββββ | 600/763 [06:34<01:20, 2.02it/s]
|
733 |
{'loss': 2.3795, 'grad_norm': 0.35245397686958313, 'learning_rate': 0.0001329553403026331, 'epoch': 0.79}
|
|
|
734 |
79%|ββββββββ | 600/763 [06:34<01:20, 2.02it/s]
|
735 |
79%|ββββββββ | 601/763 [06:34<01:20, 2.02it/s]
|
736 |
79%|ββββββββ | 602/763 [06:35<01:19, 2.02it/s]
|
737 |
79%|ββββββββ | 603/763 [06:35<01:18, 2.03it/s]
|
738 |
79%|ββββββββ | 604/763 [06:36<01:18, 2.02it/s]
|
739 |
79%|ββββββββ | 605/763 [06:36<01:17, 2.03it/s]
|
740 |
79%|ββββββββ | 606/763 [06:37<01:17, 2.03it/s]
|
741 |
80%|ββββββββ | 607/763 [06:37<01:17, 2.03it/s]
|
742 |
80%|ββββββββ | 608/763 [06:38<01:16, 2.02it/s]
|
743 |
80%|ββββββββ | 609/763 [06:38<01:22, 1.86it/s]
|
744 |
80%|ββββββββ | 610/763 [06:39<01:20, 1.91it/s]
|
745 |
80%|ββββββββ | 611/763 [06:39<01:18, 1.94it/s]
|
746 |
80%|ββββββββ | 612/763 [06:40<01:16, 1.97it/s]
|
747 |
80%|ββββββββ | 613/763 [06:40<01:15, 1.98it/s]
|
748 |
80%|ββββββββ | 614/763 [06:41<01:14, 2.00it/s]
|
749 |
81%|ββββββββ | 615/763 [06:41<01:13, 2.00it/s]
|
750 |
81%|ββββββββ | 616/763 [06:42<01:19, 1.85it/s]
|
751 |
81%|ββββββββ | 617/763 [06:42<01:16, 1.90it/s]
|
752 |
81%|ββββββββ | 618/763 [06:43<01:14, 1.94it/s]
|
753 |
81%|ββββββββ | 619/763 [06:43<01:13, 1.96it/s]
|
754 |
81%|βββββββββ | 620/763 [06:44<01:12, 1.98it/s]
|
755 |
81%|βββββββββ | 621/763 [06:44<01:11, 1.99it/s]
|
756 |
82%|βββββββββ | 622/763 [06:45<01:10, 2.00it/s]
|
757 |
82%|βββββββββ | 623/763 [06:45<01:09, 2.01it/s]
|
758 |
82%|βββββββββ | 624/763 [06:46<01:09, 2.01it/s]
|
759 |
82%|βββββββββ | 625/763 [06:46<01:08, 2.01it/s]{'loss': 2.3607, 'grad_norm': 0.3452637791633606, 'learning_rate': 9.657093924581261e-05, 'epoch': 0.82}
|
760 |
|
|
|
761 |
82%|βββββββββ | 625/763 [06:46<01:08, 2.01it/s]
|
762 |
82%|βββββββββ | 626/763 [06:47<01:07, 2.02it/s]
|
763 |
82%|βββββββββ | 627/763 [06:47<01:07, 2.02it/s]
|
764 |
82%|βββββββββ | 628/763 [06:48<01:06, 2.02it/s]
|
765 |
82%|βββββββββ | 629/763 [06:48<01:06, 2.02it/s]
|
766 |
83%|βββββββββ | 630/763 [06:49<01:05, 2.02it/s]
|
767 |
83%|βββββββββ | 631/763 [06:49<01:05, 2.02it/s]
|
768 |
83%|βββββββββ | 632/763 [06:50<01:04, 2.02it/s]
|
769 |
83%|βββββββββ | 633/763 [06:50<01:04, 2.03it/s]
|
770 |
83%|βββββββββ | 634/763 [06:51<01:03, 2.02it/s]
|
771 |
83%|βββββββββ | 635/763 [06:51<01:03, 2.03it/s]
|
772 |
83%|βββββββββ | 636/763 [06:52<01:02, 2.03it/s]
|
773 |
83%|βββββββββ | 637/763 [06:52<01:02, 2.02it/s]
|
774 |
84%|βββββββββ | 638/763 [06:53<01:01, 2.02it/s]
|
775 |
84%|βββββββββ | 639/763 [06:53<01:01, 2.02it/s]
|
776 |
84%|βββββββββ | 640/763 [06:54<01:00, 2.02it/s]
|
777 |
84%|βββββββββ | 641/763 [06:54<01:00, 2.02it/s]
|
778 |
84%|βββββββββ | 642/763 [06:55<00:59, 2.02it/s]
|
779 |
84%|βββββββββ | 643/763 [06:55<00:59, 2.02it/s]
|
780 |
84%|βββββββββ | 644/763 [06:56<00:58, 2.02it/s]
|
781 |
85%|βββββββββ | 645/763 [06:56<00:58, 2.02it/s]
|
782 |
85%|βββββββββ | 646/763 [06:57<00:57, 2.02it/s]
|
783 |
85%|βββββββββ | 647/763 [06:57<00:57, 2.02it/s]
|
784 |
85%|βββββββββ | 648/763 [06:58<00:56, 2.02it/s]
|
785 |
85%|βββββββββ | 649/763 [06:58<00:56, 2.02it/s]
|
786 |
85%|βββββββββ | 650/763 [06:59<00:55, 2.02it/s]{'loss': 2.3353, 'grad_norm': 0.34281203150749207, 'learning_rate': 6.546885286948184e-05, 'epoch': 0.85}
|
|
|
787 |
|
788 |
85%|βββββββββ | 650/763 [06:59<00:55, 2.02it/s]
|
789 |
85%|βββββββββ | 651/763 [06:59<00:55, 2.01it/s]
|
790 |
85%|βββββββββ | 652/763 [07:00<00:54, 2.02it/s]
|
791 |
86%|βββββββββ | 653/763 [07:00<00:54, 2.02it/s]
|
792 |
86%|βββββββββ | 654/763 [07:01<00:53, 2.02it/s]
|
793 |
86%|βββββββββ | 655/763 [07:01<00:53, 2.02it/s]
|
794 |
86%|βββββββββ | 656/763 [07:02<00:52, 2.03it/s]
|
795 |
86%|βββββββββ | 657/763 [07:02<00:52, 2.03it/s]
|
796 |
86%|βββββββββ | 658/763 [07:03<00:51, 2.03it/s]
|
797 |
86%|βββββββββ | 659/763 [07:03<00:51, 2.03it/s]
|
798 |
87%|βββββββββ | 660/763 [07:04<00:50, 2.03it/s]
|
799 |
87%|βββββββββ | 661/763 [07:04<00:50, 2.03it/s]
|
800 |
87%|βββββββββ | 662/763 [07:05<00:49, 2.03it/s]
|
801 |
87%|βββββββββ | 663/763 [07:05<00:49, 2.03it/s]
|
802 |
87%|βββββββββ | 664/763 [07:05<00:48, 2.03it/s]
|
803 |
87%|βββββββββ | 665/763 [07:06<00:48, 2.03it/s]
|
804 |
87%|βββββββββ | 666/763 [07:06<00:47, 2.03it/s]
|
805 |
87%|βββββββββ | 667/763 [07:07<00:47, 2.03it/s]
|
806 |
88%|βββββββββ | 668/763 [07:07<00:46, 2.03it/s]
|
807 |
88%|βββββββββ | 669/763 [07:08<00:46, 2.03it/s]
|
808 |
88%|ββββοΏ½οΏ½οΏ½ββββ | 670/763 [07:08<00:45, 2.03it/s]
|
809 |
88%|βββββββββ | 671/763 [07:09<00:45, 2.03it/s]
|
810 |
88%|βββββββββ | 672/763 [07:09<00:44, 2.03it/s]
|
811 |
88%|βββββββββ | 673/763 [07:10<00:44, 2.03it/s]
|
812 |
88%|βββββββββ | 674/763 [07:10<00:43, 2.03it/s]
|
813 |
88%|βββββββββ | 675/763 [07:11<00:43, 2.03it/s]{'loss': 2.3277, 'grad_norm': 0.33725234866142273, 'learning_rate': 4.0056317596204094e-05, 'epoch': 0.88}
|
|
|
814 |
|
815 |
88%|βββββββββ | 675/763 [07:11<00:43, 2.03it/s]
|
816 |
89%|βββββββββ | 676/763 [07:11<00:42, 2.02it/s]
|
817 |
89%|βββββββββ | 677/763 [07:12<00:42, 2.02it/s]
|
818 |
89%|βββββββββ | 678/763 [07:12<00:41, 2.03it/s]
|
819 |
89%|βββββββββ | 679/763 [07:13<00:41, 2.03it/s]
|
820 |
89%|βββββββββ | 680/763 [07:13<00:40, 2.03it/s]
|
821 |
89%|βββββββββ | 681/763 [07:14<00:40, 2.03it/s]
|
822 |
89%|βββββββββ | 682/763 [07:14<00:39, 2.03it/s]
|
823 |
90%|βββββββββ | 683/763 [07:15<00:39, 2.03it/s]
|
824 |
90%|βββββββββ | 684/763 [07:15<00:38, 2.03it/s]
|
825 |
90%|βββββββββ | 685/763 [07:16<00:38, 2.03it/s]
|
826 |
90%|βββββββββ | 686/763 [07:16<00:37, 2.03it/s]
|
827 |
90%|βββββββββ | 687/763 [07:17<00:37, 2.03it/s]
|
828 |
90%|βββββββββ | 688/763 [07:17<00:36, 2.03it/s]
|
829 |
90%|βββββββββ | 689/763 [07:18<00:36, 2.03it/s]
|
830 |
90%|βββββββββ | 690/763 [07:18<00:36, 2.03it/s]
|
831 |
91%|βββββββββ | 691/763 [07:19<00:35, 2.03it/s]
|
832 |
91%|βββββββββ | 692/763 [07:19<00:35, 2.03it/s]
|
833 |
91%|βββββββββ | 693/763 [07:20<00:34, 2.03it/s]
|
834 |
91%|βββββββββ | 694/763 [07:20<00:34, 2.03it/s]
|
835 |
91%|βββββββββ | 695/763 [07:21<00:33, 2.03it/s]
|
836 |
91%|βββββββββ | 696/763 [07:21<00:33, 2.03it/s]
|
837 |
91%|ββββββββββ| 697/763 [07:22<00:32, 2.03it/s]
|
838 |
91%|ββββββββββ| 698/763 [07:22<00:32, 2.03it/s]
|
839 |
92%|ββββββββββ| 699/763 [07:23<00:31, 2.03it/s]
|
840 |
92%|ββββββββββ| 700/763 [07:23<00:31, 2.03it/s]{'loss': 2.3087, 'grad_norm': 0.33684036135673523, 'learning_rate': 2.0666073481669712e-05, 'epoch': 0.92}
|
|
|
841 |
|
842 |
92%|ββββββββββ| 700/763 [07:23<00:31, 2.03it/s]
|
843 |
92%|ββββββββββ| 701/763 [07:24<00:30, 2.02it/s]
|
844 |
92%|ββββββββββ| 702/763 [07:24<00:30, 2.02it/s]
|
845 |
92%|ββββββββββ| 703/763 [07:25<00:29, 2.03it/s]
|
846 |
92%|ββββββββββ| 704/763 [07:25<00:29, 2.03it/s]
|
847 |
92%|ββββββββββ| 705/763 [07:26<00:28, 2.03it/s]
|
848 |
93%|ββββββββββ| 706/763 [07:26<00:28, 2.03it/s]
|
849 |
93%|ββββββββββ| 707/763 [07:27<00:27, 2.03it/s]
|
850 |
93%|ββββββββββ| 708/763 [07:27<00:27, 2.03it/s]
|
851 |
93%|ββββββββββ| 709/763 [07:28<00:26, 2.03it/s]
|
852 |
93%|ββββββββββ| 710/763 [07:28<00:26, 2.03it/s]
|
853 |
93%|ββββββββββ| 711/763 [07:29<00:25, 2.03it/s]
|
854 |
93%|ββββββββββ| 712/763 [07:29<00:25, 2.03it/s]
|
855 |
93%|ββββββββββ| 713/763 [07:30<00:24, 2.03it/s]
|
856 |
94%|ββββββββββ| 714/763 [07:30<00:24, 2.02it/s]
|
857 |
94%|ββββββββββ| 715/763 [07:31<00:23, 2.00it/s]
|
858 |
94%|ββββββββββ| 716/763 [07:31<00:23, 1.99it/s]
|
859 |
94%|ββββββββββ| 717/763 [07:32<00:23, 1.98it/s]
|
860 |
94%|ββββββββββ| 718/763 [07:32<00:22, 1.98it/s]
|
861 |
94%|ββββββββββ| 719/763 [07:33<00:22, 1.97it/s]
|
862 |
94%|ββββββββββ| 720/763 [07:33<00:21, 1.98it/s]
|
863 |
94%|ββββββββββ| 721/763 [07:34<00:21, 1.99it/s]
|
864 |
95%|ββββββββββ| 722/763 [07:34<00:20, 2.00it/s]
|
865 |
95%|ββββββββββ| 723/763 [07:35<00:19, 2.01it/s]
|
866 |
95%|ββββββββββ| 724/763 [07:35<00:19, 2.01it/s]
|
867 |
95%|ββββββββββ| 725/763 [07:36<00:18, 2.02it/s]
|
868 |
|
|
|
869 |
95%|ββββββββββ| 725/763 [07:36<00:18, 2.02it/s]
|
870 |
95%|ββββββββββ| 726/763 [07:36<00:18, 2.02it/s]
|
871 |
95%|ββββββββββ| 727/763 [07:37<00:17, 2.02it/s]
|
872 |
95%|ββββββββββ| 728/763 [07:37<00:17, 2.02it/s]
|
873 |
96%|ββββββββββ| 729/763 [07:38<00:16, 2.02it/s]
|
874 |
96%|ββββββββββ| 730/763 [07:38<00:16, 2.03it/s]
|
875 |
96%|ββββββββββ| 731/763 [07:39<00:15, 2.03it/s]
|
876 |
96%|ββββββββββ| 732/763 [07:39<00:15, 2.03it/s]
|
877 |
96%|ββββββββββ| 733/763 [07:40<00:14, 2.03it/s]
|
878 |
96%|βββββββοΏ½οΏ½ββ| 734/763 [07:40<00:14, 2.03it/s]
|
879 |
96%|ββββββββββ| 735/763 [07:41<00:13, 2.03it/s]
|
880 |
96%|ββββββββββ| 736/763 [07:41<00:13, 2.03it/s]
|
881 |
97%|ββββββββββ| 737/763 [07:42<00:12, 2.03it/s]
|
882 |
97%|ββββββββββ| 738/763 [07:42<00:12, 2.03it/s]
|
883 |
97%|ββββββββββ| 739/763 [07:43<00:11, 2.03it/s]
|
884 |
97%|ββββββββββ| 740/763 [07:43<00:11, 2.03it/s]
|
885 |
97%|ββββββββββ| 741/763 [07:44<00:10, 2.03it/s]
|
886 |
97%|ββββββββββ| 742/763 [07:44<00:10, 2.03it/s]
|
887 |
97%|ββββββββββ| 743/763 [07:45<00:09, 2.02it/s]
|
888 |
98%|ββββββββββ| 744/763 [07:45<00:09, 2.00it/s]
|
889 |
98%|ββββββββββ| 745/763 [07:46<00:09, 1.98it/s]
|
890 |
98%|ββββββββββ| 746/763 [07:46<00:08, 1.98it/s]
|
891 |
98%|ββββββββββ| 747/763 [07:47<00:08, 1.98it/s]
|
892 |
98%|ββββββββββ| 748/763 [07:47<00:07, 1.98it/s]
|
893 |
98%|ββββββββββ| 749/763 [07:48<00:07, 1.99it/s]
|
894 |
98%|ββββββββββ| 750/763 [07:48<00:06, 2.00it/s]{'loss': 2.2993, 'grad_norm': 0.3305748701095581, 'learning_rate': 8.858291115876327e-07, 'epoch': 0.98}
|
|
|
895 |
|
896 |
98%|ββββββββββ| 750/763 [07:48<00:06, 2.00it/s]
|
897 |
98%|ββββββββββ| 751/763 [07:49<00:05, 2.00it/s]
|
898 |
99%|ββββββββββ| 752/763 [07:49<00:05, 2.01it/s]
|
899 |
99%|ββββββββββ| 753/763 [07:50<00:04, 2.01it/s]
|
900 |
99%|ββββββββββ| 754/763 [07:50<00:04, 2.02it/s]
|
901 |
99%|ββββββββββ| 755/763 [07:51<00:03, 2.02it/s]
|
902 |
99%|ββββββββββ| 756/763 [07:51<00:03, 2.02it/s]
|
903 |
99%|ββββββββββ| 757/763 [07:52<00:02, 2.02it/s]
|
904 |
99%|ββββββββββ| 758/763 [07:52<00:02, 2.02it/s]
|
905 |
99%|ββββββββββ| 759/763 [07:53<00:01, 2.02it/s]
|
906 |
|
907 |
+
|
908 |
1%| | 1/143 [00:05<12:46, 5.40s/it]
|
909 |
1%|β | 2/143 [00:06<06:44, 2.87s/it]
|
910 |
2%|β | 3/143 [00:07<04:20, 1.86s/it]
|
911 |
3%|β | 4/143 [00:07<03:05, 1.34s/it]
|
912 |
3%|β | 5/143 [00:08<02:24, 1.05s/it]
|
913 |
4%|β | 6/143 [00:08<01:58, 1.16it/s]
|
914 |
5%|β | 7/143 [00:09<01:41, 1.35it/s]
|
915 |
6%|β | 8/143 [00:09<01:29, 1.50it/s]
|
916 |
6%|β | 9/143 [00:10<01:22, 1.63it/s]
|
917 |
7%|β | 10/143 [00:10<01:16, 1.73it/s]
|
918 |
8%|β | 11/143 [00:11<01:12, 1.81it/s]
|
919 |
8%|β | 12/143 [00:11<01:10, 1.87it/s]
|
920 |
9%|β | 13/143 [00:12<01:08, 1.91it/s]
|
921 |
10%|β | 14/143 [00:12<01:06, 1.94it/s]
|
922 |
10%|β | 15/143 [00:13<01:05, 1.96it/s]
|
923 |
11%|β | 16/143 [00:13<01:04, 1.98it/s]
|
924 |
12%|ββ | 17/143 [00:14<01:03, 1.99it/s]
|
925 |
13%|ββ | 18/143 [00:14<01:02, 2.00it/s]
|
926 |
13%|ββ | 19/143 [00:15<01:01, 2.00it/s]
|
927 |
14%|ββ | 20/143 [00:15<01:01, 2.01it/s]
|
928 |
15%|ββ | 21/143 [00:16<01:00, 2.01it/s]
|
929 |
15%|ββ | 22/143 [00:16<01:00, 2.01it/s]
|
930 |
16%|ββ | 23/143 [00:17<01:01, 1.96it/s]
|
931 |
17%|ββ | 24/143 [00:17<01:00, 1.98it/s]
|
932 |
17%|ββ | 25/143 [00:18<00:59, 1.99it/s]
|
933 |
|
934 |
+
|
935 |
17%|ββ | 25/143 [00:18<00:59, 1.99it/s]
|
936 |
18%|ββ | 26/143 [00:18<00:58, 1.99it/s]
|
937 |
19%|ββ | 27/143 [00:19<00:57, 2.00it/s]
|
938 |
20%|ββ | 28/143 [00:19<00:57, 2.01it/s]
|
939 |
20%|ββ | 29/143 [00:20<00:56, 2.01it/s]
|
940 |
21%|ββ | 30/143 [00:20<00:56, 2.02it/s]
|
941 |
22%|βββ | 31/143 [00:21<00:55, 2.02it/s]
|
942 |
22%|βββ | 32/143 [00:21<00:54, 2.02it/s]
|
943 |
23%|βββ | 33/143 [00:22<00:54, 2.02it/s]
|
944 |
24%|βββ | 34/143 [00:22<00:53, 2.02it/s]
|
945 |
24%|βββ | 35/143 [00:23<00:53, 2.02it/s]
|
946 |
25%|βββ | 36/143 [00:23<00:52, 2.02it/s]
|
947 |
26%|βββ | 37/143 [00:24<00:52, 2.03it/s]
|
948 |
27%|βββ | 38/143 [00:24<00:51, 2.03it/s]
|
949 |
27%|βββ | 39/143 [00:25<00:51, 2.02it/s]
|
950 |
28%|βββ | 40/143 [00:25<00:50, 2.02it/s]
|
951 |
29%|βββ | 41/143 [00:26<00:50, 2.01it/s]
|
952 |
29%|βββ | 42/143 [00:26<00:50, 2.02it/s]
|
953 |
30%|βββ | 43/143 [00:27<00:49, 2.01it/s]
|
954 |
31%|βοΏ½οΏ½β | 44/143 [00:27<00:49, 2.02it/s]
|
955 |
31%|ββββ | 45/143 [00:28<00:48, 2.02it/s]
|
956 |
32%|ββββ | 46/143 [00:28<00:48, 2.01it/s]
|
957 |
33%|ββββ | 47/143 [00:29<00:47, 2.00it/s]
|
958 |
34%|ββββ | 48/143 [00:29<00:47, 2.00it/s]
|
959 |
34%|ββββ | 49/143 [00:30<00:46, 2.01it/s]
|
960 |
35%|ββββ | 50/143 [00:30<00:46, 2.01it/s]
|
961 |
|
962 |
+
|
963 |
35%|ββββ | 50/143 [00:30<00:46, 2.01it/s]
|
964 |
36%|ββββ | 51/143 [00:31<00:45, 2.01it/s]
|
965 |
36%|ββββ | 52/143 [00:31<00:45, 2.01it/s]
|
966 |
37%|ββββ | 53/143 [00:32<00:44, 2.01it/s]
|
967 |
38%|ββββ | 54/143 [00:32<00:44, 2.01it/s]
|
968 |
38%|ββββ | 55/143 [00:33<00:43, 2.01it/s]
|
969 |
39%|ββββ | 56/143 [00:33<00:43, 2.02it/s]
|
970 |
40%|ββββ | 57/143 [00:34<00:42, 2.02it/s]
|
971 |
41%|ββββ | 58/143 [00:34<00:42, 2.02it/s]
|
972 |
41%|βββββ | 59/143 [00:35<00:41, 2.02it/s]
|
973 |
42%|βββββ | 60/143 [00:35<00:41, 2.02it/s]
|
974 |
43%|βββββ | 61/143 [00:36<00:40, 2.02it/s]
|
975 |
43%|βββββ | 62/143 [00:36<00:40, 2.02it/s]
|
976 |
44%|βββββ | 63/143 [00:37<00:39, 2.02it/s]
|
977 |
45%|βββββ | 64/143 [00:37<00:39, 2.02it/s]
|
978 |
45%|βββββ | 65/143 [00:38<00:38, 2.02it/s]
|
979 |
46%|βββββ | 66/143 [00:38<00:38, 2.02it/s]
|
980 |
47%|βββββ | 67/143 [00:39<00:37, 2.02it/s]
|
981 |
48%|βββββ | 68/143 [00:39<00:37, 2.01it/s]
|
982 |
48%|βββββ | 69/143 [00:40<00:36, 2.02it/s]
|
983 |
49%|βββββ | 70/143 [00:40<00:36, 2.02it/s]
|
984 |
50%|βββββ | 71/143 [00:41<00:35, 2.02it/s]
|
985 |
50%|βββββ | 72/143 [00:41<00:35, 2.02it/s]
|
986 |
51%|βββββ | 73/143 [00:41<00:34, 2.02it/s]
|
987 |
52%|ββββββ | 74/143 [00:42<00:34, 2.02it/s]
|
988 |
52%|ββββββ | 75/143 [00:42<00:33, 2.02it/s]
|
989 |
|
990 |
+
|
991 |
52%|ββββββ | 75/143 [00:42<00:33, 2.02it/s]
|
992 |
53%|ββββββ | 76/143 [00:43<00:33, 2.02it/s]
|
993 |
54%|ββββββ | 77/143 [00:43<00:32, 2.02it/s]
|
994 |
55%|ββββββ | 78/143 [00:44<00:32, 2.02it/s]
|
995 |
55%|ββββββ | 79/143 [00:44<00:31, 2.02it/s]
|
996 |
56%|ββββββ | 80/143 [00:45<00:31, 2.02it/s]
|
997 |
57%|ββββββ | 81/143 [00:45<00:30, 2.02it/s]
|
998 |
57%|ββββββ | 82/143 [00:46<00:30, 2.02it/s]
|
999 |
58%|ββββββ | 83/143 [00:46<00:29, 2.02it/s]
|
1000 |
59%|ββββββ | 84/143 [00:47<00:29, 2.02it/s]
|
1001 |
59%|ββββββ | 85/143 [00:47<00:28, 2.02it/s]
|
1002 |
60%|ββββββ | 86/143 [00:48<00:28, 2.02it/s]
|
1003 |
61%|ββββββ | 87/143 [00:48<00:27, 2.02it/s]
|
1004 |
62%|βββββββ | 88/143 [00:49<00:27, 2.02it/s]
|
1005 |
62%|βββββββ | 89/143 [00:49<00:26, 2.02it/s]
|
1006 |
63%|βββββββ | 90/143 [00:50<00:26, 2.03it/s]
|
1007 |
64%|βββββββ | 91/143 [00:50<00:25, 2.02it/s]
|
1008 |
64%|βββββββ | 92/143 [00:51<00:25, 2.03it/s]
|
1009 |
65%|βββββββ | 93/143 [00:51<00:24, 2.02it/s]
|
1010 |
66%|βββββββ | 94/143 [00:52<00:24, 2.02it/s]
|
1011 |
66%|βββββββ | 95/143 [00:52<00:23, 2.02it/s]
|
1012 |
67%|βββββββ | 96/143 [00:53<00:23, 2.02it/s]
|
1013 |
68%|βββββββ | 97/143 [00:53<00:22, 2.02it/s]
|
1014 |
69%|βββββββ | 98/143 [00:54<00:22, 2.02it/s]
|
1015 |
69%|βββββββ | 99/143 [00:54<00:21, 2.02it/s]
|
1016 |
70%|βββββββ | 100/143 [00:55<00:21, 2.02it/s]
|
1017 |
|
1018 |
+
|
1019 |
70%|βββββββ | 100/143 [00:55<00:21, 2.02it/s]
|
1020 |
71%|βββββββ | 101/143 [00:55<00:21, 2.00it/s]
|
1021 |
71%|ββββββββ | 102/143 [00:56<00:20, 2.00it/s]
|
1022 |
72%|ββββββββ | 103/143 [00:56<00:20, 2.00it/s]
|
1023 |
73%|ββββββββ | 104/143 [00:57<00:19, 2.00it/s]
|
1024 |
73%|ββββββββ | 105/143 [00:57<00:18, 2.00it/s]
|
1025 |
74%|ββββββββ | 106/143 [00:58<00:18, 2.00it/s]
|
1026 |
75%|ββββββββ | 107/143 [00:58<00:17, 2.01it/s]
|
1027 |
76%|ββββββββ | 108/143 [00:59<00:17, 2.01it/s]
|
1028 |
76%|ββββββββ | 109/143 [00:59<00:16, 2.01it/s]
|
1029 |
77%|ββββββββ | 110/143 [01:00<00:16, 2.01it/s]
|
1030 |
78%|ββββββββ | 111/143 [01:00<00:15, 2.00it/s]
|
1031 |
78%|ββββββββ | 112/143 [01:01<00:15, 2.01it/s]
|
1032 |
79%|ββββββββ | 113/143 [01:01<00:14, 2.01it/s]
|
1033 |
80%|ββββββββ | 114/143 [01:02<00:14, 2.00it/s]
|
1034 |
80%|ββββββββ | 115/143 [01:02<00:13, 2.01it/s]
|
1035 |
81%|ββββββββ | 116/143 [01:03<00:13, 2.01it/s]
|
1036 |
82%|ββοΏ½οΏ½ββββββ | 117/143 [01:03<00:12, 2.01it/s]
|
1037 |
83%|βββββββββ | 118/143 [01:04<00:12, 2.01it/s]
|
1038 |
83%|βββββββββ | 119/143 [01:04<00:11, 2.01it/s]
|
1039 |
84%|βββββββββ | 120/143 [01:05<00:11, 2.01it/s]
|
1040 |
85%|βββββββββ | 121/143 [01:05<00:10, 2.01it/s]
|
1041 |
85%|βββββββββ | 122/143 [01:06<00:10, 2.01it/s]
|
1042 |
86%|βββββββββ | 123/143 [01:06<00:09, 2.01it/s]
|
1043 |
87%|βββββββββ | 124/143 [01:07<00:09, 2.01it/s]
|
1044 |
87%|βββββββββ | 125/143 [01:07<00:08, 2.01it/s]
|
1045 |
|
1046 |
+
|
1047 |
87%|βββββββββ | 125/143 [01:07<00:08, 2.01it/s]
|
1048 |
88%|βββββββββ | 126/143 [01:08<00:08, 2.01it/s]
|
1049 |
89%|βββββββββ | 127/143 [01:08<00:07, 2.00it/s]
|
1050 |
90%|βββββββββ | 128/143 [01:09<00:07, 2.01it/s]
|
1051 |
90%|βββββββββ | 129/143 [01:09<00:06, 2.01it/s]
|
1052 |
91%|βββββββββ | 130/143 [01:10<00:06, 2.01it/s]
|
1053 |
92%|ββββββββββ| 131/143 [01:10<00:05, 2.01it/s]
|
1054 |
92%|ββββββββββ| 132/143 [01:11<00:05, 2.01it/s]
|
1055 |
93%|ββββββββββ| 133/143 [01:11<00:04, 2.01it/s]
|
1056 |
94%|ββββββββββ| 134/143 [01:12<00:04, 2.01it/s]
|
1057 |
94%|ββββββββββ| 135/143 [01:12<00:03, 2.01it/s]
|
1058 |
95%|ββββββββββ| 136/143 [01:13<00:03, 2.01it/s]
|
1059 |
96%|ββββββββββ| 137/143 [01:13<00:02, 2.01it/s]
|
1060 |
97%|ββββββββββ| 138/143 [01:14<00:02, 2.01it/s]
|
1061 |
97%|ββββββββββ| 139/143 [01:14<00:01, 2.01it/s]
|
1062 |
98%|ββββββββββ| 140/143 [01:15<00:01, 2.01it/s]
|
1063 |
99%|ββββββββββ| 141/143 [01:15<00:00, 2.01it/s]
|
1064 |
99%|ββββββββββ| 142/143 [01:16<00:00, 2.01it/s]
|
1065 |
|
1066 |
+
|
1067 |
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5048
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f24c861d8e2a8e382ffd63335113b0a64b19c4f97d3e52926300fdc72cf223fe
|
3 |
size 5048
|