diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index be0424d9c1da1d5ebed05a465b6bf7e422b1fa72..0000000000000000000000000000000000000000 --- a/.gitattributes +++ /dev/null @@ -1,41 +0,0 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text -checkpoint-10000/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-12500/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-15000/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-20000/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-2500/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-25000/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-5000/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-7500/tokenizer.json filter=lfs diff=lfs merge=lfs -text -tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore deleted file mode 100644 index d6a0667dae0c5dc6b4271cddafa30d834e8bab98..0000000000000000000000000000000000000000 --- a/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -*/pilot_*/ -pilot_*/ -checkpoint-*/ -*/pilot_*/ -pilot_*/ -checkpoint-*/ -*/pilot_*/ -pilot_*/ -checkpoint-*/ -*/pilot_*/ -pilot_*/ diff --git a/all_results.json b/all_results.json deleted file mode 100644 index e2453e211bde83c5e4508f666c8c4463a8d42da0..0000000000000000000000000000000000000000 --- a/all_results.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "epoch": 2.13, - "eval_loss": 3.5801477432250977, - "eval_runtime": 484.8289, - "eval_samples": 4608, - "eval_samples_per_second": 9.504, - "eval_steps_per_second": 4.752, - "perplexity": 35.87884131122737, - "train_loss": 3.7238579296875, - "train_runtime": 52437.7197, - "train_samples": 94080, - "train_samples_per_second": 3.814, - "train_steps_per_second": 0.477 -} \ No newline at end of file diff --git a/checkpoint-10000/config.json b/checkpoint-10000/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-10000/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-10000/optimizer.pt b/checkpoint-10000/optimizer.pt deleted file mode 100644 index dc889fc23150671430530227893ac8e5cd43fcd7..0000000000000000000000000000000000000000 --- a/checkpoint-10000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:265ff09d0d7e02defabfebb992d27553e6608e9f187ed59c40d682194d9182de -size 2254269 diff --git a/checkpoint-10000/pytorch_model.bin b/checkpoint-10000/pytorch_model.bin deleted file mode 100644 index 4149cfe86275bfe2f178000910aab8a7ea0dfbee..0000000000000000000000000000000000000000 --- a/checkpoint-10000/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c5d0e81d3ced31978dd8a1717bf280d51acf1b2e014447b159c53fb5c1abeaa8 -size 2236955191 diff --git a/checkpoint-10000/rng_state.pth b/checkpoint-10000/rng_state.pth deleted file mode 100644 index 0e16a98c64ebc66b52ccb831ca38ca1d8b8a6933..0000000000000000000000000000000000000000 --- a/checkpoint-10000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:674676e662eeb93778c2b153ffad13aa90b43355da1956ce0b1e01e72f48c8d7 -size 14503 diff --git a/checkpoint-10000/scheduler.pt b/checkpoint-10000/scheduler.pt deleted file mode 100644 index 0278cf8d1585ae197bad514a213741000bcf63c6..0000000000000000000000000000000000000000 --- a/checkpoint-10000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ebae5cf74f470a9dc57b090feb9de29d57aa2d381061d1a61fd32b3c3221556b -size 623 diff --git a/checkpoint-10000/special_tokens_map.json b/checkpoint-10000/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-10000/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-10000/tokenizer.json b/checkpoint-10000/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-10000/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-10000/tokenizer_config.json b/checkpoint-10000/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-10000/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-10000/trainer_state.json b/checkpoint-10000/trainer_state.json deleted file mode 100644 index 227537a6893582d06c9cd39eb64d42bbf9ac0c18..0000000000000000000000000000000000000000 --- a/checkpoint-10000/trainer_state.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "best_metric": 3.7074594497680664, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_100000samples_-1vocab_original-frozen/checkpoint-10000", - "epoch": 0.8503401360544217, - "global_step": 10000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.21, - "learning_rate": 9e-05, - "loss": 4.2702, - "step": 2500 - }, - { - "epoch": 0.43, - "learning_rate": 8e-05, - "loss": 3.9326, - "step": 5000 - }, - { - "epoch": 0.43, - "eval_loss": 3.8811988830566406, - "eval_runtime": 485.7533, - "eval_samples_per_second": 9.486, - "eval_steps_per_second": 4.743, - "step": 5000 - }, - { - "epoch": 0.64, - "learning_rate": 7e-05, - "loss": 3.7946, - "step": 7500 - }, - { - "epoch": 0.85, - "learning_rate": 6e-05, - "loss": 3.7099, - "step": 10000 - }, - { - "epoch": 0.85, - "eval_loss": 3.7074594497680664, - "eval_runtime": 487.18, - "eval_samples_per_second": 9.459, - "eval_steps_per_second": 4.729, - "step": 10000 - } - ], - "max_steps": 25000, - "num_train_epochs": 3, - "total_flos": 1.4859311775744e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-10000/training_args.bin b/checkpoint-10000/training_args.bin deleted file mode 100644 index 6a78b5d574e7567392ad355276053f142e65e136..0000000000000000000000000000000000000000 --- a/checkpoint-10000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d14f6f41cf21f30f5eb683a0b9414800094d15f898e0254a9c31393958aed209 -size 3375 diff --git a/checkpoint-10000/wikiann-az-results.txt b/checkpoint-10000/wikiann-az-results.txt deleted file mode 100644 index bd75ec427a08988c156b7b9d40529da62b004cfa..0000000000000000000000000000000000000000 --- a/checkpoint-10000/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_100000samples_-1vocab_original-frozen/checkpoint-10000 -[0.3887915936952715, 0.36485532815808047, 0.36906854130052724, 0.3748698368622006, 0.39708939708939706, 0.39721739130434774, 0.3788668752172402, 0.37992831541218636, 0.361812521618817, 0.3837612768910479] -37.96 -1.18 -================================================== \ No newline at end of file diff --git a/checkpoint-12500/config.json b/checkpoint-12500/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-12500/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-12500/optimizer.pt b/checkpoint-12500/optimizer.pt deleted file mode 100644 index 13756f0316492c9ce7419ebef36f76a7d8aced29..0000000000000000000000000000000000000000 --- a/checkpoint-12500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:07596e05ce7be5596893cfe284dcd6e75b3e1b0cc9c8e73fdce06c61be54c644 -size 2254269 diff --git a/checkpoint-12500/pytorch_model.bin b/checkpoint-12500/pytorch_model.bin deleted file mode 100644 index d495ebb74f9d0d2f07801c90c0f6a226e1411705..0000000000000000000000000000000000000000 --- a/checkpoint-12500/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0ba9d8972e149d1195aa73c6870d8922e3881565ca843eb16ab4f2f87f626f74 -size 2236955191 diff --git a/checkpoint-12500/rng_state.pth b/checkpoint-12500/rng_state.pth deleted file mode 100644 index 9851569ccdad39d1c7a1bbe646199272e46807db..0000000000000000000000000000000000000000 --- a/checkpoint-12500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:84838ccd45951bbe1e5f964cffc9f60572ee4f5e111f59de86b1a56f3d7a6bd8 -size 14503 diff --git a/checkpoint-12500/scheduler.pt b/checkpoint-12500/scheduler.pt deleted file mode 100644 index 20fe61e31c8ad93792e2966e7a5aadf8fdfeb769..0000000000000000000000000000000000000000 --- a/checkpoint-12500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d270c6e3000cbdb534f7db7e774ca17393c2523690c8058754d752dd5b11a93a -size 623 diff --git a/checkpoint-12500/special_tokens_map.json b/checkpoint-12500/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-12500/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-12500/tokenizer.json b/checkpoint-12500/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-12500/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-12500/tokenizer_config.json b/checkpoint-12500/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-12500/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-12500/trainer_state.json b/checkpoint-12500/trainer_state.json deleted file mode 100644 index 43518b15ef6e6c50b032a9dfee8b78cc5f72cf21..0000000000000000000000000000000000000000 --- a/checkpoint-12500/trainer_state.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "best_metric": 3.255730152130127, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_100000samples_-1vocab_original-frozen/checkpoint-12500", - "epoch": 1.0629251700680271, - "global_step": 12500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.21, - "learning_rate": 0.0008, - "loss": 3.7096, - "step": 2500 - }, - { - "epoch": 0.21, - "eval_loss": 3.477477550506592, - "eval_runtime": 486.1756, - "eval_samples_per_second": 9.478, - "eval_steps_per_second": 4.739, - "step": 2500 - }, - { - "epoch": 0.43, - "learning_rate": 0.0006, - "loss": 3.3667, - "step": 5000 - }, - { - "epoch": 0.43, - "eval_loss": 3.3522424697875977, - "eval_runtime": 486.8305, - "eval_samples_per_second": 9.465, - "eval_steps_per_second": 4.733, - "step": 5000 - }, - { - "epoch": 0.64, - "learning_rate": 0.0004, - "loss": 3.29, - "step": 7500 - }, - { - "epoch": 0.64, - "eval_loss": 3.3035173416137695, - "eval_runtime": 485.9329, - "eval_samples_per_second": 9.483, - "eval_steps_per_second": 4.741, - "step": 7500 - }, - { - "epoch": 0.85, - "learning_rate": 0.0002, - "loss": 3.2496, - "step": 10000 - }, - { - "epoch": 0.85, - "eval_loss": 3.269397497177124, - "eval_runtime": 485.4366, - "eval_samples_per_second": 9.492, - "eval_steps_per_second": 4.746, - "step": 10000 - }, - { - "epoch": 1.06, - "learning_rate": 0.0, - "loss": 3.2319, - "step": 12500 - }, - { - "epoch": 1.06, - "eval_loss": 3.255730152130127, - "eval_runtime": 485.5726, - "eval_samples_per_second": 9.49, - "eval_steps_per_second": 4.745, - "step": 12500 - } - ], - "max_steps": 12500, - "num_train_epochs": 2, - "total_flos": 1.857413971968e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-12500/training_args.bin b/checkpoint-12500/training_args.bin deleted file mode 100644 index b756f4c53d69d220b82fd6ca639d45c2fbd66683..0000000000000000000000000000000000000000 --- a/checkpoint-12500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3803a6357491b3295e37b75071a44122ab2d037bb565a91e83c16087b12fef18 -size 3375 diff --git a/checkpoint-12500/wikiann-az-results.txt b/checkpoint-12500/wikiann-az-results.txt deleted file mode 100644 index 6365db8f2ff523eacc21a5ce752eb02bc7d31bba..0000000000000000000000000000000000000000 --- a/checkpoint-12500/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_100000samples_-1vocab_original-frozen/checkpoint-12500 -[0.3867132867132867, 0.35957921581128466, 0.3694581280788177, 0.37612768910478833, 0.39779005524861877, 0.3986135181975736, 0.36903939184519696, 0.3745907007203667, 0.36721991701244816, 0.38253638253638256] -37.82 -1.24 -================================================== \ No newline at end of file diff --git a/checkpoint-15000/config.json b/checkpoint-15000/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-15000/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-15000/optimizer.pt b/checkpoint-15000/optimizer.pt deleted file mode 100644 index 6ffca2ee8a81412d5903d12e336ff1f9a528165d..0000000000000000000000000000000000000000 --- a/checkpoint-15000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a395cb43c406f8705960f0663d5fd2c4c092932797966f26d35e97952836ca33 -size 2254269 diff --git a/checkpoint-15000/pytorch_model.bin b/checkpoint-15000/pytorch_model.bin deleted file mode 100644 index 4a701bee29ad46da26164ed9a241fe6b03ea6e3a..0000000000000000000000000000000000000000 --- a/checkpoint-15000/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:062822f9e2819dc1e9393a95d333476433854c3dcf75ada844ef378ee9204148 -size 2236955191 diff --git a/checkpoint-15000/rng_state.pth b/checkpoint-15000/rng_state.pth deleted file mode 100644 index 6a0bae8e3547586750ea7ecba200270d8296068b..0000000000000000000000000000000000000000 --- a/checkpoint-15000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:69a15e581e7afd6fd12d6dddef3da31e19b3dd058003b5c5d00781b54e093f7c -size 14503 diff --git a/checkpoint-15000/scheduler.pt b/checkpoint-15000/scheduler.pt deleted file mode 100644 index 8d3d39ad4b2183a90165fa1731ec456fda1ee5c9..0000000000000000000000000000000000000000 --- a/checkpoint-15000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:301727affc0c0a4c1f25106f7fd12c059ede0526ba52733c25be949ad3bc04d7 -size 623 diff --git a/checkpoint-15000/special_tokens_map.json b/checkpoint-15000/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-15000/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-15000/tokenizer.json b/checkpoint-15000/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-15000/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-15000/tokenizer_config.json b/checkpoint-15000/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-15000/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-15000/trainer_state.json b/checkpoint-15000/trainer_state.json deleted file mode 100644 index 46f5fc96715f7d9b72d4c9dc79e17c72ba9c7537..0000000000000000000000000000000000000000 --- a/checkpoint-15000/trainer_state.json +++ /dev/null @@ -1,76 +0,0 @@ -{ - "best_metric": 3.6284306049346924, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_100000samples_-1vocab_original-frozen/checkpoint-15000", - "epoch": 1.2755102040816326, - "global_step": 15000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.21, - "learning_rate": 9e-05, - "loss": 4.2702, - "step": 2500 - }, - { - "epoch": 0.43, - "learning_rate": 8e-05, - "loss": 3.9326, - "step": 5000 - }, - { - "epoch": 0.43, - "eval_loss": 3.8811988830566406, - "eval_runtime": 485.7533, - "eval_samples_per_second": 9.486, - "eval_steps_per_second": 4.743, - "step": 5000 - }, - { - "epoch": 0.64, - "learning_rate": 7e-05, - "loss": 3.7946, - "step": 7500 - }, - { - "epoch": 0.85, - "learning_rate": 6e-05, - "loss": 3.7099, - "step": 10000 - }, - { - "epoch": 0.85, - "eval_loss": 3.7074594497680664, - "eval_runtime": 487.18, - "eval_samples_per_second": 9.459, - "eval_steps_per_second": 4.729, - "step": 10000 - }, - { - "epoch": 1.06, - "learning_rate": 5e-05, - "loss": 3.657, - "step": 12500 - }, - { - "epoch": 1.28, - "learning_rate": 4e-05, - "loss": 3.6085, - "step": 15000 - }, - { - "epoch": 1.28, - "eval_loss": 3.6284306049346924, - "eval_runtime": 484.984, - "eval_samples_per_second": 9.501, - "eval_steps_per_second": 4.751, - "step": 15000 - } - ], - "max_steps": 25000, - "num_train_epochs": 3, - "total_flos": 2.2288967663616e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-15000/training_args.bin b/checkpoint-15000/training_args.bin deleted file mode 100644 index 6a78b5d574e7567392ad355276053f142e65e136..0000000000000000000000000000000000000000 --- a/checkpoint-15000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d14f6f41cf21f30f5eb683a0b9414800094d15f898e0254a9c31393958aed209 -size 3375 diff --git a/checkpoint-20000/config.json b/checkpoint-20000/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-20000/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-20000/optimizer.pt b/checkpoint-20000/optimizer.pt deleted file mode 100644 index 9f12a3e275949b03703219106f9c3cb563842d39..0000000000000000000000000000000000000000 --- a/checkpoint-20000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38839006d5b75b018803308ec39f56f6a164acda29a0f2fede066fe4236ef866 -size 2254269 diff --git a/checkpoint-20000/pytorch_model.bin b/checkpoint-20000/pytorch_model.bin deleted file mode 100644 index 465a597226473f8f9d1e97240dbf6b69632fa8b2..0000000000000000000000000000000000000000 --- a/checkpoint-20000/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ac58a7747254733f3a05bd54e93799e165c95d1762f7c9bb25d3f6f384b24ce6 -size 2236955191 diff --git a/checkpoint-20000/rng_state.pth b/checkpoint-20000/rng_state.pth deleted file mode 100644 index 3d890aa3229c86cd01884d1a2db43f99eea0f032..0000000000000000000000000000000000000000 --- a/checkpoint-20000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c2ab9b52e364e49a6593a765f851264b1dd5e9b0207961295e70ed4788ef648c -size 14503 diff --git a/checkpoint-20000/scheduler.pt b/checkpoint-20000/scheduler.pt deleted file mode 100644 index 0fce858fc59e1c04346ec17a91eea84ca7634ec2..0000000000000000000000000000000000000000 --- a/checkpoint-20000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:69341a1831197b0345e31eaac56abf9ad4527cc56eba4b526818b4ffb6ef6dad -size 623 diff --git a/checkpoint-20000/special_tokens_map.json b/checkpoint-20000/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-20000/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-20000/tokenizer.json b/checkpoint-20000/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-20000/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-20000/tokenizer_config.json b/checkpoint-20000/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-20000/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-20000/trainer_state.json b/checkpoint-20000/trainer_state.json deleted file mode 100644 index 5510c88438043f0a64d7f8a19722832a67b4a9b7..0000000000000000000000000000000000000000 --- a/checkpoint-20000/trainer_state.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "best_metric": 3.5915658473968506, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_100000samples_-1vocab_original-frozen/checkpoint-20000", - "epoch": 1.7006802721088436, - "global_step": 20000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.21, - "learning_rate": 9e-05, - "loss": 4.2702, - "step": 2500 - }, - { - "epoch": 0.43, - "learning_rate": 8e-05, - "loss": 3.9326, - "step": 5000 - }, - { - "epoch": 0.43, - "eval_loss": 3.8811988830566406, - "eval_runtime": 485.7533, - "eval_samples_per_second": 9.486, - "eval_steps_per_second": 4.743, - "step": 5000 - }, - { - "epoch": 0.64, - "learning_rate": 7e-05, - "loss": 3.7946, - "step": 7500 - }, - { - "epoch": 0.85, - "learning_rate": 6e-05, - "loss": 3.7099, - "step": 10000 - }, - { - "epoch": 0.85, - "eval_loss": 3.7074594497680664, - "eval_runtime": 487.18, - "eval_samples_per_second": 9.459, - "eval_steps_per_second": 4.729, - "step": 10000 - }, - { - "epoch": 1.06, - "learning_rate": 5e-05, - "loss": 3.657, - "step": 12500 - }, - { - "epoch": 1.28, - "learning_rate": 4e-05, - "loss": 3.6085, - "step": 15000 - }, - { - "epoch": 1.28, - "eval_loss": 3.6284306049346924, - "eval_runtime": 484.984, - "eval_samples_per_second": 9.501, - "eval_steps_per_second": 4.751, - "step": 15000 - }, - { - "epoch": 1.49, - "learning_rate": 3e-05, - "loss": 3.5817, - "step": 17500 - }, - { - "epoch": 1.7, - "learning_rate": 2e-05, - "loss": 3.5696, - "step": 20000 - }, - { - "epoch": 1.7, - "eval_loss": 3.5915658473968506, - "eval_runtime": 485.8978, - "eval_samples_per_second": 9.483, - "eval_steps_per_second": 4.742, - "step": 20000 - } - ], - "max_steps": 25000, - "num_train_epochs": 3, - "total_flos": 2.9718623551488e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-20000/training_args.bin b/checkpoint-20000/training_args.bin deleted file mode 100644 index 6a78b5d574e7567392ad355276053f142e65e136..0000000000000000000000000000000000000000 --- a/checkpoint-20000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d14f6f41cf21f30f5eb683a0b9414800094d15f898e0254a9c31393958aed209 -size 3375 diff --git a/checkpoint-2500/config.json b/checkpoint-2500/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-2500/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-2500/optimizer.pt b/checkpoint-2500/optimizer.pt deleted file mode 100644 index 6b07e58d08586de81ab57b13f4848ba9458ef2bd..0000000000000000000000000000000000000000 --- a/checkpoint-2500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:802b6e19dbabf54c1fc21cbda7ea52049caf2243dceb34c4b67b12148177fac0 -size 2254269 diff --git a/checkpoint-2500/pytorch_model.bin b/checkpoint-2500/pytorch_model.bin deleted file mode 100644 index 92dbb59ccee11740fa6022f5f15230facba4a56f..0000000000000000000000000000000000000000 --- a/checkpoint-2500/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:86d1f7b456cd8b5855dbd53324a63b25a21cd79f0d13da5e27de78a46f098406 -size 2236955191 diff --git a/checkpoint-2500/rng_state.pth b/checkpoint-2500/rng_state.pth deleted file mode 100644 index 5157722a3d73b9477272bc1662b7333826cc1ef9..0000000000000000000000000000000000000000 --- a/checkpoint-2500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:03a3c1141f8bebf59967bea65fa021fcc2ad8a3d7753ae183d1d946d3f5b6d8e -size 14503 diff --git a/checkpoint-2500/scheduler.pt b/checkpoint-2500/scheduler.pt deleted file mode 100644 index 39f754e4e40932a231da06fd74e846d4e0c1c2a3..0000000000000000000000000000000000000000 --- a/checkpoint-2500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8848be77d5e16f8ad560a7262091b3d3fcd8d0f3fa50682054480c93bc684fe6 -size 623 diff --git a/checkpoint-2500/special_tokens_map.json b/checkpoint-2500/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-2500/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-2500/tokenizer.json b/checkpoint-2500/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-2500/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-2500/tokenizer_config.json b/checkpoint-2500/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-2500/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-2500/trainer_state.json b/checkpoint-2500/trainer_state.json deleted file mode 100644 index 4beee85ef1a95f6cdc3287e5a8e1c99fbc9188fe..0000000000000000000000000000000000000000 --- a/checkpoint-2500/trainer_state.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "best_metric": 3.477477550506592, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_100000samples_-1vocab_original-frozen/checkpoint-2500", - "epoch": 0.21258503401360543, - "global_step": 2500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.21, - "learning_rate": 0.0008, - "loss": 3.7096, - "step": 2500 - }, - { - "epoch": 0.21, - "eval_loss": 3.477477550506592, - "eval_runtime": 486.1756, - "eval_samples_per_second": 9.478, - "eval_steps_per_second": 4.739, - "step": 2500 - } - ], - "max_steps": 12500, - "num_train_epochs": 2, - "total_flos": 3.714827943936e+16, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-2500/training_args.bin b/checkpoint-2500/training_args.bin deleted file mode 100644 index b756f4c53d69d220b82fd6ca639d45c2fbd66683..0000000000000000000000000000000000000000 --- a/checkpoint-2500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3803a6357491b3295e37b75071a44122ab2d037bb565a91e83c16087b12fef18 -size 3375 diff --git a/checkpoint-2500/wikiann-az-results.txt b/checkpoint-2500/wikiann-az-results.txt deleted file mode 100644 index 9d6c1d16a78fdc6c6c6996e883a3bca94aa020b4..0000000000000000000000000000000000000000 --- a/checkpoint-2500/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_100000samples_-1vocab_original-frozen/checkpoint-2500 -[0.3898840885142255, 0.35679099225897265, 0.36694677871148457, 0.38952879581151834, 0.38456189151599446, 0.39958158995815896, 0.36963484945547725, 0.3749565217391304, 0.36276193747852975, 0.38151494093120225] -37.76 -1.30 -================================================== \ No newline at end of file diff --git a/checkpoint-25000/config.json b/checkpoint-25000/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-25000/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-25000/optimizer.pt b/checkpoint-25000/optimizer.pt deleted file mode 100644 index ac57645dc07accf88dec7563627857135b18f63f..0000000000000000000000000000000000000000 --- a/checkpoint-25000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1ccc2a402173ce09109c844c54e6567e441a78307e697295c100643fb287eadb -size 2254269 diff --git a/checkpoint-25000/pytorch_model.bin b/checkpoint-25000/pytorch_model.bin deleted file mode 100644 index 35ca0a6b9d95fa2d3ea320d4c76eacb7f579a9bb..0000000000000000000000000000000000000000 --- a/checkpoint-25000/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5a88af31593d9a67c1c620eb8db4bf37161db7447cf27709c0edec31c4dd7a73 -size 2236955191 diff --git a/checkpoint-25000/rng_state.pth b/checkpoint-25000/rng_state.pth deleted file mode 100644 index 968c4beab115410a386c306620bb113cb38161a1..0000000000000000000000000000000000000000 --- a/checkpoint-25000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cc81a4ed07eaae97e601e6dad9894c368270413e9040807580143d45f03df338 -size 14503 diff --git a/checkpoint-25000/scheduler.pt b/checkpoint-25000/scheduler.pt deleted file mode 100644 index 7ad3b44dd75ce7d8d5e7e1e604001842c0cc94ff..0000000000000000000000000000000000000000 --- a/checkpoint-25000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2d68cbb33fa1a2e013758e6ff8a1c4cb984da09f9cb40986c80de7fb857fd18f -size 623 diff --git a/checkpoint-25000/special_tokens_map.json b/checkpoint-25000/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-25000/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-25000/tokenizer.json b/checkpoint-25000/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-25000/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-25000/tokenizer_config.json b/checkpoint-25000/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-25000/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-25000/trainer_state.json b/checkpoint-25000/trainer_state.json deleted file mode 100644 index 25678acc973d80bed135d8eb4f85288bb9a7bcaa..0000000000000000000000000000000000000000 --- a/checkpoint-25000/trainer_state.json +++ /dev/null @@ -1,116 +0,0 @@ -{ - "best_metric": 3.5801477432250977, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_100000samples_-1vocab_original-frozen/checkpoint-25000", - "epoch": 2.1258503401360542, - "global_step": 25000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.21, - "learning_rate": 9e-05, - "loss": 4.2702, - "step": 2500 - }, - { - "epoch": 0.43, - "learning_rate": 8e-05, - "loss": 3.9326, - "step": 5000 - }, - { - "epoch": 0.43, - "eval_loss": 3.8811988830566406, - "eval_runtime": 485.7533, - "eval_samples_per_second": 9.486, - "eval_steps_per_second": 4.743, - "step": 5000 - }, - { - "epoch": 0.64, - "learning_rate": 7e-05, - "loss": 3.7946, - "step": 7500 - }, - { - "epoch": 0.85, - "learning_rate": 6e-05, - "loss": 3.7099, - "step": 10000 - }, - { - "epoch": 0.85, - "eval_loss": 3.7074594497680664, - "eval_runtime": 487.18, - "eval_samples_per_second": 9.459, - "eval_steps_per_second": 4.729, - "step": 10000 - }, - { - "epoch": 1.06, - "learning_rate": 5e-05, - "loss": 3.657, - "step": 12500 - }, - { - "epoch": 1.28, - "learning_rate": 4e-05, - "loss": 3.6085, - "step": 15000 - }, - { - "epoch": 1.28, - "eval_loss": 3.6284306049346924, - "eval_runtime": 484.984, - "eval_samples_per_second": 9.501, - "eval_steps_per_second": 4.751, - "step": 15000 - }, - { - "epoch": 1.49, - "learning_rate": 3e-05, - "loss": 3.5817, - "step": 17500 - }, - { - "epoch": 1.7, - "learning_rate": 2e-05, - "loss": 3.5696, - "step": 20000 - }, - { - "epoch": 1.7, - "eval_loss": 3.5915658473968506, - "eval_runtime": 485.8978, - "eval_samples_per_second": 9.483, - "eval_steps_per_second": 4.742, - "step": 20000 - }, - { - "epoch": 1.91, - "learning_rate": 1e-05, - "loss": 3.5612, - "step": 22500 - }, - { - "epoch": 2.13, - "learning_rate": 0.0, - "loss": 3.5533, - "step": 25000 - }, - { - "epoch": 2.13, - "eval_loss": 3.5801477432250977, - "eval_runtime": 485.0169, - "eval_samples_per_second": 9.501, - "eval_steps_per_second": 4.75, - "step": 25000 - } - ], - "max_steps": 25000, - "num_train_epochs": 3, - "total_flos": 3.714827943936e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-25000/training_args.bin b/checkpoint-25000/training_args.bin deleted file mode 100644 index 6a78b5d574e7567392ad355276053f142e65e136..0000000000000000000000000000000000000000 --- a/checkpoint-25000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d14f6f41cf21f30f5eb683a0b9414800094d15f898e0254a9c31393958aed209 -size 3375 diff --git a/checkpoint-5000/config.json b/checkpoint-5000/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-5000/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-5000/optimizer.pt b/checkpoint-5000/optimizer.pt deleted file mode 100644 index b854e53c63ec6dfc6f85f39b04777dc692aa141f..0000000000000000000000000000000000000000 --- a/checkpoint-5000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:76e74c2a4cf3abb85ce5013e84f00d499475c7ee6d194a38ac63b54a4bc86aa9 -size 2254269 diff --git a/checkpoint-5000/pytorch_model.bin b/checkpoint-5000/pytorch_model.bin deleted file mode 100644 index 497be7c14072d1833ef1312e2515606b186bd59c..0000000000000000000000000000000000000000 --- a/checkpoint-5000/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:18d517d8bad7860a81f6e95bc931cd7bc8ae0ad52ec0e69600f4e27f8a6dcc27 -size 2236955191 diff --git a/checkpoint-5000/rng_state.pth b/checkpoint-5000/rng_state.pth deleted file mode 100644 index 5157722a3d73b9477272bc1662b7333826cc1ef9..0000000000000000000000000000000000000000 --- a/checkpoint-5000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:03a3c1141f8bebf59967bea65fa021fcc2ad8a3d7753ae183d1d946d3f5b6d8e -size 14503 diff --git a/checkpoint-5000/scheduler.pt b/checkpoint-5000/scheduler.pt deleted file mode 100644 index 90bfa33aa2e57caff6083bf68c3b38db47518ccd..0000000000000000000000000000000000000000 --- a/checkpoint-5000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d4841973343260138ab6da195f6b9590db0a8465a2275f311ddcc72346193a56 -size 623 diff --git a/checkpoint-5000/special_tokens_map.json b/checkpoint-5000/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-5000/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-5000/tokenizer.json b/checkpoint-5000/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-5000/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-5000/tokenizer_config.json b/checkpoint-5000/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-5000/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-5000/trainer_state.json b/checkpoint-5000/trainer_state.json deleted file mode 100644 index 4102acbe8e4331647f9e73674164221c2379e7ed..0000000000000000000000000000000000000000 --- a/checkpoint-5000/trainer_state.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "best_metric": 3.8811988830566406, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_100000samples_-1vocab_original-frozen/checkpoint-5000", - "epoch": 0.42517006802721086, - "global_step": 5000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.21, - "learning_rate": 9e-05, - "loss": 4.2702, - "step": 2500 - }, - { - "epoch": 0.43, - "learning_rate": 8e-05, - "loss": 3.9326, - "step": 5000 - }, - { - "epoch": 0.43, - "eval_loss": 3.8811988830566406, - "eval_runtime": 485.7533, - "eval_samples_per_second": 9.486, - "eval_steps_per_second": 4.743, - "step": 5000 - } - ], - "max_steps": 25000, - "num_train_epochs": 3, - "total_flos": 7.429655887872e+16, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-5000/training_args.bin b/checkpoint-5000/training_args.bin deleted file mode 100644 index 6a78b5d574e7567392ad355276053f142e65e136..0000000000000000000000000000000000000000 --- a/checkpoint-5000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d14f6f41cf21f30f5eb683a0b9414800094d15f898e0254a9c31393958aed209 -size 3375 diff --git a/checkpoint-5000/wikiann-az-results.txt b/checkpoint-5000/wikiann-az-results.txt deleted file mode 100644 index 0aef5c7d84226401d9be14907e692cb0bbe4ab6e..0000000000000000000000000000000000000000 --- a/checkpoint-5000/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_100000samples_-1vocab_original-frozen/checkpoint-5000 -[0.38218793828892006, 0.3579985905567301, 0.3697183098591549, 0.3871193559677984, 0.3948467966573816, 0.3963901423116973, 0.37625824366539395, 0.3749124036440084, 0.3687305430646835, 0.37491337491337495] -37.83 -1.14 -================================================== \ No newline at end of file diff --git a/checkpoint-7500/config.json b/checkpoint-7500/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-7500/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-7500/optimizer.pt b/checkpoint-7500/optimizer.pt deleted file mode 100644 index 48d2bce367370a8c48c8aac155df2b255e362962..0000000000000000000000000000000000000000 --- a/checkpoint-7500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f055cd2bf6df4026e63d2f99f9b1f45b751fcc253244b2844bf2b4c838946bff -size 2254269 diff --git a/checkpoint-7500/pytorch_model.bin b/checkpoint-7500/pytorch_model.bin deleted file mode 100644 index 1034916817f811a52bbd3980927b40f8bbf9ff2d..0000000000000000000000000000000000000000 --- a/checkpoint-7500/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cebe5b8bfdb3893e54dbb049657d44d1b94c8187b22a74529a7378c7ca325c94 -size 2236955191 diff --git a/checkpoint-7500/rng_state.pth b/checkpoint-7500/rng_state.pth deleted file mode 100644 index 916464268cda9acd1b743ea2e7d4186638891878..0000000000000000000000000000000000000000 --- a/checkpoint-7500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ae1d1585ab878b7ae0fe1ecbd43fce472401533271aa49f074210172000bb383 -size 14503 diff --git a/checkpoint-7500/scheduler.pt b/checkpoint-7500/scheduler.pt deleted file mode 100644 index 92a7351ba15122d445557f2ca342b5c2a8f66242..0000000000000000000000000000000000000000 --- a/checkpoint-7500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3645d67727f305ed9f285de92859169b9ced76729a97a8143c6ece6d98a161d3 -size 623 diff --git a/checkpoint-7500/special_tokens_map.json b/checkpoint-7500/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-7500/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-7500/tokenizer.json b/checkpoint-7500/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-7500/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-7500/tokenizer_config.json b/checkpoint-7500/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-7500/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-7500/trainer_state.json b/checkpoint-7500/trainer_state.json deleted file mode 100644 index 25fb65b4be6dc9d09d36b1970cdafb9651fe1290..0000000000000000000000000000000000000000 --- a/checkpoint-7500/trainer_state.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "best_metric": 3.3035173416137695, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_100000samples_-1vocab_original-frozen/checkpoint-7500", - "epoch": 0.6377551020408163, - "global_step": 7500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.21, - "learning_rate": 0.0008, - "loss": 3.7096, - "step": 2500 - }, - { - "epoch": 0.21, - "eval_loss": 3.477477550506592, - "eval_runtime": 486.1756, - "eval_samples_per_second": 9.478, - "eval_steps_per_second": 4.739, - "step": 2500 - }, - { - "epoch": 0.43, - "learning_rate": 0.0006, - "loss": 3.3667, - "step": 5000 - }, - { - "epoch": 0.43, - "eval_loss": 3.3522424697875977, - "eval_runtime": 486.8305, - "eval_samples_per_second": 9.465, - "eval_steps_per_second": 4.733, - "step": 5000 - }, - { - "epoch": 0.64, - "learning_rate": 0.0004, - "loss": 3.29, - "step": 7500 - }, - { - "epoch": 0.64, - "eval_loss": 3.3035173416137695, - "eval_runtime": 485.9329, - "eval_samples_per_second": 9.483, - "eval_steps_per_second": 4.741, - "step": 7500 - } - ], - "max_steps": 12500, - "num_train_epochs": 2, - "total_flos": 1.1144483831808e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-7500/training_args.bin b/checkpoint-7500/training_args.bin deleted file mode 100644 index b756f4c53d69d220b82fd6ca639d45c2fbd66683..0000000000000000000000000000000000000000 --- a/checkpoint-7500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3803a6357491b3295e37b75071a44122ab2d037bb565a91e83c16087b12fef18 -size 3375 diff --git a/checkpoint-7500/wikiann-az-results.txt b/checkpoint-7500/wikiann-az-results.txt deleted file mode 100644 index affcf7627eadd1771e83c80a5b0006c6d484507b..0000000000000000000000000000000000000000 --- a/checkpoint-7500/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_100000samples_-1vocab_original-frozen/checkpoint-7500 -[0.39019264448336255, 0.3659394792399719, 0.3655536028119509, 0.3738317757009345, 0.3984806629834254, 0.39290681502086233, 0.37873523280055593, 0.3785112359550562, 0.3681318681318681, 0.3688581314878893] -37.81 -1.14 -================================================== \ No newline at end of file diff --git a/config.json b/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/eval_results.json b/eval_results.json deleted file mode 100644 index f587834b9132311bc5f53267d293963877319e77..0000000000000000000000000000000000000000 --- a/eval_results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "epoch": 2.13, - "eval_loss": 3.5801477432250977, - "eval_runtime": 484.8289, - "eval_samples": 4608, - "eval_samples_per_second": 9.504, - "eval_steps_per_second": 4.752, - "perplexity": 35.87884131122737 -} \ No newline at end of file diff --git a/pytorch_model.bin b/pytorch_model.bin deleted file mode 100644 index 35ca0a6b9d95fa2d3ea320d4c76eacb7f579a9bb..0000000000000000000000000000000000000000 --- a/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5a88af31593d9a67c1c620eb8db4bf37161db7447cf27709c0edec31c4dd7a73 -size 2236955191 diff --git a/special_tokens_map.json b/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/tatoeba-az-en-results.txt b/tatoeba-az-en-results.txt deleted file mode 100644 index d3459994afce019af366efcb2e287ca9040ba2d5..0000000000000000000000000000000000000000 --- a/tatoeba-az-en-results.txt +++ /dev/null @@ -1,7 +0,0 @@ -================================================== -Tatoeba Results (200 pairs of az-en) -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_100000samples_-1vocab_original-frozen -[tensor(0.0150)] -1.50 ± 0.00 -================================================== \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/tokenizer_config.json b/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/train_results.json b/train_results.json deleted file mode 100644 index 28e1d0c22f12fd1f14fe5f5e36e5e514a5c49381..0000000000000000000000000000000000000000 --- a/train_results.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "epoch": 2.13, - "train_loss": 3.7238579296875, - "train_runtime": 52437.7197, - "train_samples": 94080, - "train_samples_per_second": 3.814, - "train_steps_per_second": 0.477 -} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json deleted file mode 100644 index 1b0538c830e1d2686c62f21a88f803163582203b..0000000000000000000000000000000000000000 --- a/trainer_state.json +++ /dev/null @@ -1,125 +0,0 @@ -{ - "best_metric": 3.5801477432250977, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_100000samples_-1vocab_original-frozen/checkpoint-25000", - "epoch": 2.1258503401360542, - "global_step": 25000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.21, - "learning_rate": 9e-05, - "loss": 4.2702, - "step": 2500 - }, - { - "epoch": 0.43, - "learning_rate": 8e-05, - "loss": 3.9326, - "step": 5000 - }, - { - "epoch": 0.43, - "eval_loss": 3.8811988830566406, - "eval_runtime": 485.7533, - "eval_samples_per_second": 9.486, - "eval_steps_per_second": 4.743, - "step": 5000 - }, - { - "epoch": 0.64, - "learning_rate": 7e-05, - "loss": 3.7946, - "step": 7500 - }, - { - "epoch": 0.85, - "learning_rate": 6e-05, - "loss": 3.7099, - "step": 10000 - }, - { - "epoch": 0.85, - "eval_loss": 3.7074594497680664, - "eval_runtime": 487.18, - "eval_samples_per_second": 9.459, - "eval_steps_per_second": 4.729, - "step": 10000 - }, - { - "epoch": 1.06, - "learning_rate": 5e-05, - "loss": 3.657, - "step": 12500 - }, - { - "epoch": 1.28, - "learning_rate": 4e-05, - "loss": 3.6085, - "step": 15000 - }, - { - "epoch": 1.28, - "eval_loss": 3.6284306049346924, - "eval_runtime": 484.984, - "eval_samples_per_second": 9.501, - "eval_steps_per_second": 4.751, - "step": 15000 - }, - { - "epoch": 1.49, - "learning_rate": 3e-05, - "loss": 3.5817, - "step": 17500 - }, - { - "epoch": 1.7, - "learning_rate": 2e-05, - "loss": 3.5696, - "step": 20000 - }, - { - "epoch": 1.7, - "eval_loss": 3.5915658473968506, - "eval_runtime": 485.8978, - "eval_samples_per_second": 9.483, - "eval_steps_per_second": 4.742, - "step": 20000 - }, - { - "epoch": 1.91, - "learning_rate": 1e-05, - "loss": 3.5612, - "step": 22500 - }, - { - "epoch": 2.13, - "learning_rate": 0.0, - "loss": 3.5533, - "step": 25000 - }, - { - "epoch": 2.13, - "eval_loss": 3.5801477432250977, - "eval_runtime": 485.0169, - "eval_samples_per_second": 9.501, - "eval_steps_per_second": 4.75, - "step": 25000 - }, - { - "epoch": 2.13, - "step": 25000, - "total_flos": 3.714827943936e+17, - "train_loss": 3.7238579296875, - "train_runtime": 52437.7197, - "train_samples_per_second": 3.814, - "train_steps_per_second": 0.477 - } - ], - "max_steps": 25000, - "num_train_epochs": 3, - "total_flos": 3.714827943936e+17, - "trial_name": null, - "trial_params": null -} diff --git a/training_args.bin b/training_args.bin deleted file mode 100644 index 6a78b5d574e7567392ad355276053f142e65e136..0000000000000000000000000000000000000000 --- a/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d14f6f41cf21f30f5eb683a0b9414800094d15f898e0254a9c31393958aed209 -size 3375 diff --git a/wikiann-az-results.txt b/wikiann-az-results.txt deleted file mode 100644 index c808e247415f2fbf72bde0057f0550c9de322b2c..0000000000000000000000000000000000000000 --- a/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results WikiANN-az -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_100000samples_-1vocab_original-frozen/ -[0.088, 0.09881847475832439, 0.09090909090909091, 0.11284046692607004, 0.09696376101860922] -9.75 -0.86 -================================================== \ No newline at end of file diff --git a/word_embeddings.pt b/word_embeddings.pt deleted file mode 100644 index 2a4cbda005e5d2ade008b1110876ca6a87706b17..0000000000000000000000000000000000000000 --- a/word_embeddings.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:174ed618237771e5906be0e8d70c568de63633f3bb5e8a1e303bbdbaeaedc1ca -size 1027605867 diff --git a/word_embeddings_layernorm.pt b/word_embeddings_layernorm.pt deleted file mode 100644 index 1b8759b2a378472c0f17a4292a2a6276b8a3e07e..0000000000000000000000000000000000000000 --- a/word_embeddings_layernorm.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:10917f86841a4f322406bd72ba2e4ae8e4780aaf462c98a76eca01e0c5fbc893 -size 9703