diff --git "a/nohup.out" "b/nohup.out" --- "a/nohup.out" +++ "b/nohup.out" @@ -1,6 +1,6 @@ -2021-07-06 15:10:54.627071: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory -[15:10:55] - WARNING - __main__ - Process rank: -1, device: cpu, n_gpu: 0distributed training: False, 16-bits training: False -[15:10:55] - INFO - __main__ - Training/evaluation parameters TrainingArguments( +2021-07-07 09:18:29.533336: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory +[09:18:30] - WARNING - __main__ - Process rank: -1, device: cpu, n_gpu: 0distributed training: False, 16-bits training: False +[09:18:30] - INFO - __main__ - Training/evaluation parameters TrainingArguments( _n_gpu=0, adafactor=False, adam_beta1=0.9, @@ -36,7 +36,7 @@ local_rank=-1, log_level=-1, log_level_replica=-1, log_on_each_node=True, -logging_dir=./runs/Jul06_15-10-55_t1v-n-b95d739e-w-0, +logging_dir=./runs/Jul07_09-18-30_t1v-n-b95d739e-w-0, logging_first_step=False, logging_steps=500, logging_strategy=IntervalStrategy.STEPS, @@ -75,73616 +75,24472 @@ warmup_ratio=0.0, warmup_steps=1000, weight_decay=0.01, ) -[15:10:55] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 -[15:10:56] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 -[15:10:56] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[15:10:56] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 -[15:10:56] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[15:10:56] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 -[15:10:56] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) -[15:10:56] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 -[15:10:56] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 -[15:10:56] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[15:10:56] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 -[15:10:56] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[15:10:57] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 -[15:10:57] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) -[15:10:57] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 -[15:10:57] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 -[15:10:57] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[15:10:57] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 -[15:10:57] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[15:10:57] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 -[15:10:57] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) -[15:10:57] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-484e12c5eef7e8e7.arrow -[15:10:58] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-c71dccca2ce1349d.arrow -[15:10:58] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-cddb40395f5104e7.arrow -[15:10:59] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-e6293c8aef77ae9a.arrow -[15:10:59] - INFO - absl - Starting the local TPU driver. -[15:10:59] - INFO - absl - Unable to initialize backend 'tpu_driver': Not found: Unable to find driver in registry given worker: local:// -[15:10:59] - INFO - absl - Unable to initialize backend 'gpu': Not found: Could not find registered platform with name: "cuda". Available platform names are: TPU Interpreter Host -2021-07-06 15:11:02.669720: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory -2021-07-06 15:11:02.669764: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303) +[09:18:30] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 +[09:18:30] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 +[09:18:30] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[09:18:31] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 +[09:18:31] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[09:18:31] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 +[09:18:31] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) +[09:18:31] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 +[09:18:31] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 +[09:18:31] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[09:18:31] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 +[09:18:31] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[09:18:31] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 +[09:18:31] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) +[09:18:32] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 +[09:18:32] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 +[09:18:32] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[09:18:32] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 +[09:18:32] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[09:18:32] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 +[09:18:32] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) +[09:18:32] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-484e12c5eef7e8e7.arrow +[09:18:33] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-c71dccca2ce1349d.arrow +[09:18:33] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-cddb40395f5104e7.arrow +[09:18:33] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-e6293c8aef77ae9a.arrow +[09:18:33] - INFO - absl - Starting the local TPU driver. +[09:18:33] - INFO - absl - Unable to initialize backend 'tpu_driver': Not found: Unable to find driver in registry given worker: local:// +[09:18:33] - INFO - absl - Unable to initialize backend 'gpu': Not found: Could not find registered platform with name: "cuda". Available platform names are: TPU Interpreter Host +2021-07-07 09:18:37.315233: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory +2021-07-07 09:18:37.315272: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303) /home/wilso/hf/lib/python3.8/site-packages/jax/lib/xla_bridge.py:382: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. warnings.warn( /home/wilso/hf/lib/python3.8/site-packages/jax/lib/xla_bridge.py:369: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. warnings.warn( Epoch ... (1/18): 0%| | 0/18 [00:00 - model.save_pretrained( - File "/home/wilso/transformers/src/transformers/modeling_flax_utils.py", line 456, in save_pretrained - url = self._push_to_hub(repo, commit_message=commit_message) - File "/home/wilso/transformers/src/transformers/file_utils.py", line 2107, in _push_to_hub - return repo.push_to_hub(commit_message=commit_message) - File "/home/wilso/hf/lib/python3.8/site-packages/huggingface_hub/repository.py", line 434, in push_to_hub - return self.git_push() - File "/home/wilso/hf/lib/python3.8/site-packages/huggingface_hub/repository.py", line 422, in git_push - raise EnvironmentError(exc.stderr) -OSError: fatal: could not read Username for 'https://huggingface.co': No such device or address - -2021-07-06 18:03:23.641412: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory -[18:03:24] - WARNING - __main__ - Process rank: -1, device: cpu, n_gpu: 0distributed training: False, 16-bits training: False -[18:03:24] - INFO - __main__ - Training/evaluation parameters TrainingArguments( -_n_gpu=0, -adafactor=False, -adam_beta1=0.9, -adam_beta2=0.98, -adam_epsilon=1e-08, -dataloader_drop_last=False, -dataloader_num_workers=0, -dataloader_pin_memory=True, -ddp_find_unused_parameters=None, -debug=[], -deepspeed=None, -disable_tqdm=False, -do_eval=False, -do_predict=False, -do_train=False, -eval_accumulation_steps=None, -eval_steps=500, -evaluation_strategy=IntervalStrategy.NO, -fp16=False, -fp16_backend=auto, -fp16_full_eval=False, -fp16_opt_level=O1, -gradient_accumulation_steps=1, -greater_is_better=None, -group_by_length=False, -ignore_data_skip=False, -label_names=None, -label_smoothing_factor=0.0, -learning_rate=0.0003, -length_column_name=length, -load_best_model_at_end=False, -local_rank=-1, -log_level=-1, -log_level_replica=-1, -log_on_each_node=True, -logging_dir=./runs/Jul06_18-03-24_t1v-n-b95d739e-w-0, -logging_first_step=False, -logging_steps=500, -logging_strategy=IntervalStrategy.STEPS, -lr_scheduler_type=SchedulerType.LINEAR, -max_grad_norm=1.0, -max_steps=-1, -metric_for_best_model=None, -mp_parameters=, -no_cuda=False, -num_train_epochs=18.0, -output_dir=./, -overwrite_output_dir=True, -past_index=-1, -per_device_eval_batch_size=128, -per_device_train_batch_size=128, -prediction_loss_only=False, -push_to_hub=True, -push_to_hub_model_id=, -push_to_hub_organization=None, -push_to_hub_token=None, -remove_unused_columns=True, -report_to=['tensorboard'], -resume_from_checkpoint=None, -run_name=./, -save_on_each_node=False, -save_steps=500, -save_strategy=IntervalStrategy.STEPS, -save_total_limit=None, -seed=42, -sharded_ddp=[], -skip_memory_metrics=True, -tpu_metrics_debug=False, -tpu_num_cores=None, -use_legacy_prediction_loop=False, -warmup_ratio=0.0, -warmup_steps=1000, -weight_decay=0.01, -) -[18:03:24] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 -[18:03:25] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 -[18:03:25] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[18:03:25] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 -[18:03:25] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[18:03:25] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 -[18:03:25] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) -[18:03:25] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 -[18:03:26] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 -[18:03:26] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[18:03:26] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 -[18:03:26] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[18:03:26] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 -[18:03:26] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) -[18:03:26] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 -[18:03:26] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 -[18:03:26] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[18:03:26] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 -[18:03:26] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[18:03:26] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 -[18:03:26] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) -[18:03:26] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-484e12c5eef7e8e7.arrow -[18:03:27] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-c71dccca2ce1349d.arrow -[18:03:27] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-cddb40395f5104e7.arrow -[18:03:28] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-e6293c8aef77ae9a.arrow -[18:03:28] - INFO - absl - Starting the local TPU driver. -[18:03:28] - INFO - absl - Unable to initialize backend 'tpu_driver': Not found: Unable to find driver in registry given worker: local:// -[18:03:28] - INFO - absl - Unable to initialize backend 'gpu': Not found: Could not find registered platform with name: "cuda". Available platform names are: TPU Interpreter Host -2021-07-06 18:03:31.086563: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory -2021-07-06 18:03:31.086633: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303) -/home/wilso/hf/lib/python3.8/site-packages/jax/lib/xla_bridge.py:382: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. - warnings.warn( -/home/wilso/hf/lib/python3.8/site-packages/jax/lib/xla_bridge.py:369: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. - warnings.warn( - Epoch ... (1/18): 0%| | 0/18 [00:00 - model.save_pretrained( - File "/home/wilso/transformers/src/transformers/modeling_flax_utils.py", line 456, in save_pretrained - url = self._push_to_hub(repo, commit_message=commit_message) - File "/home/wilso/transformers/src/transformers/file_utils.py", line 2107, in _push_to_hub - return repo.push_to_hub(commit_message=commit_message) - File "/home/wilso/hf/lib/python3.8/site-packages/huggingface_hub/repository.py", line 434, in push_to_hub - return self.git_push() - File "/home/wilso/hf/lib/python3.8/site-packages/huggingface_hub/repository.py", line 422, in git_push - raise EnvironmentError(exc.stderr) -OSError: fatal: could not read Username for 'https://huggingface.co': No such device or address - -2021-07-07 03:56:49.927723: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory -[03:56:50] - WARNING - __main__ - Process rank: -1, device: cpu, n_gpu: 0distributed training: False, 16-bits training: False -[03:56:50] - INFO - __main__ - Training/evaluation parameters TrainingArguments( -_n_gpu=0, -adafactor=False, -adam_beta1=0.9, -adam_beta2=0.98, -adam_epsilon=1e-08, -dataloader_drop_last=False, -dataloader_num_workers=0, -dataloader_pin_memory=True, -ddp_find_unused_parameters=None, -debug=[], -deepspeed=None, -disable_tqdm=False, -do_eval=False, -do_predict=False, -do_train=False, -eval_accumulation_steps=None, -eval_steps=500, -evaluation_strategy=IntervalStrategy.NO, -fp16=False, -fp16_backend=auto, -fp16_full_eval=False, -fp16_opt_level=O1, -gradient_accumulation_steps=1, -greater_is_better=None, -group_by_length=False, -ignore_data_skip=False, -label_names=None, -label_smoothing_factor=0.0, -learning_rate=0.0003, -length_column_name=length, -load_best_model_at_end=False, -local_rank=-1, -log_level=-1, -log_level_replica=-1, -log_on_each_node=True, -logging_dir=./runs/Jul07_03-56-50_t1v-n-b95d739e-w-0, -logging_first_step=False, -logging_steps=500, -logging_strategy=IntervalStrategy.STEPS, -lr_scheduler_type=SchedulerType.LINEAR, -max_grad_norm=1.0, -max_steps=-1, -metric_for_best_model=None, -mp_parameters=, -no_cuda=False, -num_train_epochs=18.0, -output_dir=./, -overwrite_output_dir=True, -past_index=-1, -per_device_eval_batch_size=128, -per_device_train_batch_size=128, -prediction_loss_only=False, -push_to_hub=True, -push_to_hub_model_id=, -push_to_hub_organization=None, -push_to_hub_token=None, -remove_unused_columns=True, -report_to=['tensorboard'], -resume_from_checkpoint=None, -run_name=./, -save_on_each_node=False, -save_steps=500, -save_strategy=IntervalStrategy.STEPS, -save_total_limit=None, -seed=42, -sharded_ddp=[], -skip_memory_metrics=True, -tpu_metrics_debug=False, -tpu_num_cores=None, -use_legacy_prediction_loop=False, -warmup_ratio=0.0, -warmup_steps=1000, -weight_decay=0.01, -) -[03:56:51] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 -[03:56:51] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 -[03:56:51] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[03:56:51] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 -[03:56:51] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[03:56:51] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 -[03:56:51] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) -[03:56:51] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 -[03:56:52] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 -[03:56:52] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[03:56:52] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 -[03:56:52] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[03:56:52] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 -[03:56:52] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) -[03:56:52] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 -[03:56:52] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 -[03:56:52] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[03:56:52] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 -[03:56:52] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[03:56:52] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 -[03:56:52] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) -[03:56:52] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-484e12c5eef7e8e7.arrow -[03:56:53] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-c71dccca2ce1349d.arrow -[03:56:53] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-cddb40395f5104e7.arrow -[03:56:54] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-e6293c8aef77ae9a.arrow -[03:56:54] - INFO - absl - Starting the local TPU driver. -[03:56:54] - INFO - absl - Unable to initialize backend 'tpu_driver': Not found: Unable to find driver in registry given worker: local:// -[03:56:54] - INFO - absl - Unable to initialize backend 'gpu': Not found: Could not find registered platform with name: "cuda". Available platform names are: Host Interpreter TPU -2021-07-07 03:56:57.424057: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory -2021-07-07 03:56:57.424162: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303) -/home/wilso/hf/lib/python3.8/site-packages/jax/lib/xla_bridge.py:382: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. - warnings.warn( -/home/wilso/hf/lib/python3.8/site-packages/jax/lib/xla_bridge.py:369: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. - warnings.warn( - Epoch ... (1/18): 0%| | 0/18 [00:00