diff --git "a/nohup.out" "b/nohup.out" --- "a/nohup.out" +++ "b/nohup.out" @@ -1,122293 +1,3 @@ -2021-07-07 09:18:29.533336: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory -[09:18:30] - WARNING - __main__ - Process rank: -1, device: cpu, n_gpu: 0distributed training: False, 16-bits training: False -[09:18:30] - INFO - __main__ - Training/evaluation parameters TrainingArguments( -_n_gpu=0, -adafactor=False, -adam_beta1=0.9, -adam_beta2=0.98, -adam_epsilon=1e-08, -dataloader_drop_last=False, -dataloader_num_workers=0, -dataloader_pin_memory=True, -ddp_find_unused_parameters=None, -debug=[], -deepspeed=None, -disable_tqdm=False, -do_eval=False, -do_predict=False, -do_train=False, -eval_accumulation_steps=None, -eval_steps=500, -evaluation_strategy=IntervalStrategy.NO, -fp16=False, -fp16_backend=auto, -fp16_full_eval=False, -fp16_opt_level=O1, -gradient_accumulation_steps=1, -greater_is_better=None, -group_by_length=False, -ignore_data_skip=False, -label_names=None, -label_smoothing_factor=0.0, -learning_rate=0.0003, -length_column_name=length, -load_best_model_at_end=False, -local_rank=-1, -log_level=-1, -log_level_replica=-1, -log_on_each_node=True, -logging_dir=./runs/Jul07_09-18-30_t1v-n-b95d739e-w-0, -logging_first_step=False, -logging_steps=500, -logging_strategy=IntervalStrategy.STEPS, -lr_scheduler_type=SchedulerType.LINEAR, -max_grad_norm=1.0, -max_steps=-1, -metric_for_best_model=None, -mp_parameters=, -no_cuda=False, -num_train_epochs=18.0, -output_dir=./, -overwrite_output_dir=True, -past_index=-1, -per_device_eval_batch_size=128, -per_device_train_batch_size=128, -prediction_loss_only=False, -push_to_hub=True, -push_to_hub_model_id=, -push_to_hub_organization=None, -push_to_hub_token=None, -remove_unused_columns=True, -report_to=['tensorboard'], -resume_from_checkpoint=None, -run_name=./, -save_on_each_node=False, -save_steps=500, -save_strategy=IntervalStrategy.STEPS, -save_total_limit=None, -seed=42, -sharded_ddp=[], -skip_memory_metrics=True, -tpu_metrics_debug=False, -tpu_num_cores=None, -use_legacy_prediction_loop=False, -warmup_ratio=0.0, -warmup_steps=1000, -weight_decay=0.01, -) -[09:18:30] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 -[09:18:30] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 -[09:18:30] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[09:18:31] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 -[09:18:31] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[09:18:31] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 -[09:18:31] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) -[09:18:31] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 -[09:18:31] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 -[09:18:31] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[09:18:31] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 -[09:18:31] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[09:18:31] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 -[09:18:31] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) -[09:18:32] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 -[09:18:32] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 -[09:18:32] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[09:18:32] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 -[09:18:32] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[09:18:32] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 -[09:18:32] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) -[09:18:32] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-484e12c5eef7e8e7.arrow -[09:18:33] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-c71dccca2ce1349d.arrow -[09:18:33] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-cddb40395f5104e7.arrow -[09:18:33] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-e6293c8aef77ae9a.arrow -[09:18:33] - INFO - absl - Starting the local TPU driver. -[09:18:33] - INFO - absl - Unable to initialize backend 'tpu_driver': Not found: Unable to find driver in registry given worker: local:// -[09:18:33] - INFO - absl - Unable to initialize backend 'gpu': Not found: Could not find registered platform with name: "cuda". Available platform names are: TPU Interpreter Host -2021-07-07 09:18:37.315233: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory -2021-07-07 09:18:37.315272: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303) -/home/wilso/hf/lib/python3.8/site-packages/jax/lib/xla_bridge.py:382: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. - warnings.warn( -/home/wilso/hf/lib/python3.8/site-packages/jax/lib/xla_bridge.py:369: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. - warnings.warn( - Epoch ... (1/18): 0%| | 0/18 [00:00