diff --git a/README.md b/README.md index a68ca72bb173428ede2bfefa9b15497bf7950426..1a8a4180bcf769a642aab7021b9cad81bdbf26d5 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,204 @@ --- -license: bigcode-openrail-m +library_name: peft +base_model: bigcode/starcoder --- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6098288fad4bbcdb74ef084a3768b87e7417e0eb --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "bigcode/starcoder", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "c_fc", + "c_proj", + "c_attn", + "q_attn" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d98fe66714affaa8f1014cba78cba4c7eab4d2e3 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:495e43d13098fd20e268cc0b7e29b7ce202bd78c87b46b411c81898af04d9b90 +size 55255584 diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1a8a4180bcf769a642aab7021b9cad81bdbf26d5 --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: bigcode/starcoder +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6098288fad4bbcdb74ef084a3768b87e7417e0eb --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "bigcode/starcoder", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "c_fc", + "c_proj", + "c_attn", + "q_attn" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c2f43330d9ea550d303471a7a5737775eff445ba --- /dev/null +++ b/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb4472b9d8d18b86805c351acc203625198bbf35920baa841afdc6ee12f5240f +size 55255584 diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ccfc46f6168104f81b51e27fbb10be789580e83 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82c7ac539ee39d3cb5e740b4054e9ef614187aa4557fff8989ac9bcfc8006a47 +size 110696954 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a774faeaf089ab95129d3906ede82432cf83f1a1 --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f903165bb36368f47dd9f2d97c529373babf7977e621de0dc0c839044562d263 +size 14180 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f98483cf62f8cf7c1eb1872901fa62de73660e0 --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf139b2cd869934a6a802450d748b65f070f2fe1250b16bce2934376e88f03de +size 1064 diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c900a82efc451643b241c709b76778b83756bb61 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,53 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.1, + "eval_steps": 100, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 0.0001666666666666667, + "loss": 0.8745, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019979028262377118, + "loss": 0.8093, + "step": 50 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019893981312363562, + "loss": 0.7357, + "step": 75 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019744105246469263, + "loss": 0.7535, + "step": 100 + }, + { + "epoch": 0.1, + "eval_loss": 0.4003306031227112, + "eval_runtime": 1.7839, + "eval_samples_per_second": 2.242, + "eval_steps_per_second": 0.561, + "step": 100 + } + ], + "logging_steps": 25, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "total_flos": 1.493507298557952e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..557c39f09d92e07be295a9a915c67bb7ca437307 --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fcc93ccb3f87a3311bb3baa8290949012608d857aa7e1f8e40c50e3c4f99548 +size 4792 diff --git a/checkpoint-1000/README.md b/checkpoint-1000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1a8a4180bcf769a642aab7021b9cad81bdbf26d5 --- /dev/null +++ b/checkpoint-1000/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: bigcode/starcoder +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-1000/adapter_config.json b/checkpoint-1000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6098288fad4bbcdb74ef084a3768b87e7417e0eb --- /dev/null +++ b/checkpoint-1000/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "bigcode/starcoder", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "c_fc", + "c_proj", + "c_attn", + "q_attn" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1000/adapter_model.safetensors b/checkpoint-1000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d98fe66714affaa8f1014cba78cba4c7eab4d2e3 --- /dev/null +++ b/checkpoint-1000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:495e43d13098fd20e268cc0b7e29b7ce202bd78c87b46b411c81898af04d9b90 +size 55255584 diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..28058adad9033f662cf2cbb29492c441e6f76925 --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbc36e1c878784099b46f5b9a718b335165f7613c64f61d059016e0cedfaa033 +size 110696954 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f5314951bacbb24ef52ef6af64118ecbdb46677f --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1dc61da6425213a4ed0c6718da9534c16f880b198a814db7fecbd699176650b +size 14244 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..72348d78bd4d31a6338b619b3f40d676886b8aa9 --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b5bf190dc871967c45091d9f1ab233b2d2ed62baca21fee5dfedb5718ffa5d +size 1064 diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c80c1f2c8a3ea659eb441331fb4842383630d8cc --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,341 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 0.0001666666666666667, + "loss": 0.8745, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019979028262377118, + "loss": 0.8093, + "step": 50 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019893981312363562, + "loss": 0.7357, + "step": 75 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019744105246469263, + "loss": 0.7535, + "step": 100 + }, + { + "epoch": 0.1, + "eval_loss": 0.4003306031227112, + "eval_runtime": 1.7839, + "eval_samples_per_second": 2.242, + "eval_steps_per_second": 0.561, + "step": 100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001953038210948861, + "loss": 0.7249, + "step": 125 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019254212296427044, + "loss": 0.7118, + "step": 150 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018917405376582145, + "loss": 0.7467, + "step": 175 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018522168236559695, + "loss": 0.6714, + "step": 200 + }, + { + "epoch": 0.2, + "eval_loss": 0.3684937059879303, + "eval_runtime": 1.7853, + "eval_samples_per_second": 2.24, + "eval_steps_per_second": 0.56, + "step": 200 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018071090619916093, + "loss": 0.654, + "step": 225 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017567128158176953, + "loss": 0.6392, + "step": 250 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017013583004418993, + "loss": 0.5745, + "step": 275 + }, + { + "epoch": 0.3, + "learning_rate": 0.000164140821963114, + "loss": 0.5364, + "step": 300 + }, + { + "epoch": 0.3, + "eval_loss": 0.3665352761745453, + "eval_runtime": 1.7852, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 300 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015772553890390197, + "loss": 0.5693, + "step": 325 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015093201623287631, + "loss": 0.563, + "step": 350 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014380476768566824, + "loss": 0.5478, + "step": 375 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013639049369634876, + "loss": 0.5763, + "step": 400 + }, + { + "epoch": 0.4, + "eval_loss": 0.3363753855228424, + "eval_runtime": 1.7851, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012873777539848283, + "loss": 0.4891, + "step": 425 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012089675630312754, + "loss": 0.5331, + "step": 450 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011291881373954065, + "loss": 0.5679, + "step": 475 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010485622221144484, + "loss": 0.5982, + "step": 500 + }, + { + "epoch": 0.5, + "eval_loss": 0.3186224400997162, + "eval_runtime": 1.7828, + "eval_samples_per_second": 2.244, + "eval_steps_per_second": 0.561, + "step": 500 + }, + { + "epoch": 0.53, + "learning_rate": 9.676181087466444e-05, + "loss": 0.5467, + "step": 525 + }, + { + "epoch": 0.55, + "learning_rate": 8.868861738047158e-05, + "loss": 0.5706, + "step": 550 + }, + { + "epoch": 0.57, + "learning_rate": 8.068954035279121e-05, + "loss": 0.504, + "step": 575 + }, + { + "epoch": 0.6, + "learning_rate": 7.281699277636572e-05, + "loss": 0.5267, + "step": 600 + }, + { + "epoch": 0.6, + "eval_loss": 0.32175499200820923, + "eval_runtime": 1.7846, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 600 + }, + { + "epoch": 0.62, + "learning_rate": 6.512255856701177e-05, + "loss": 0.5414, + "step": 625 + }, + { + "epoch": 0.65, + "learning_rate": 5.765665457425102e-05, + "loss": 0.5412, + "step": 650 + }, + { + "epoch": 0.68, + "learning_rate": 5.0468200231001286e-05, + "loss": 0.4611, + "step": 675 + }, + { + "epoch": 0.7, + "learning_rate": 4.360429701490934e-05, + "loss": 0.5073, + "step": 700 + }, + { + "epoch": 0.7, + "eval_loss": 0.31275978684425354, + "eval_runtime": 1.7833, + "eval_samples_per_second": 2.243, + "eval_steps_per_second": 0.561, + "step": 700 + }, + { + "epoch": 0.72, + "learning_rate": 3.710991982161555e-05, + "loss": 0.4778, + "step": 725 + }, + { + "epoch": 0.75, + "learning_rate": 3.102762227218957e-05, + "loss": 0.5454, + "step": 750 + }, + { + "epoch": 0.78, + "learning_rate": 2.5397257885675397e-05, + "loss": 0.5612, + "step": 775 + }, + { + "epoch": 0.8, + "learning_rate": 2.025571894372794e-05, + "loss": 0.4983, + "step": 800 + }, + { + "epoch": 0.8, + "eval_loss": 0.31457942724227905, + "eval_runtime": 1.7821, + "eval_samples_per_second": 2.245, + "eval_steps_per_second": 0.561, + "step": 800 + }, + { + "epoch": 0.82, + "learning_rate": 1.563669475839956e-05, + "loss": 0.4941, + "step": 825 + }, + { + "epoch": 0.85, + "learning_rate": 1.1570450926997655e-05, + "loss": 0.4926, + "step": 850 + }, + { + "epoch": 0.88, + "learning_rate": 8.083631020418791e-06, + "loss": 0.5094, + "step": 875 + }, + { + "epoch": 0.9, + "learning_rate": 5.199082004372957e-06, + "loss": 0.5116, + "step": 900 + }, + { + "epoch": 0.9, + "eval_loss": 0.31357938051223755, + "eval_runtime": 1.7811, + "eval_samples_per_second": 2.246, + "eval_steps_per_second": 0.561, + "step": 900 + }, + { + "epoch": 0.93, + "learning_rate": 2.9357045374040825e-06, + "loss": 0.4244, + "step": 925 + }, + { + "epoch": 0.95, + "learning_rate": 1.30832912661093e-06, + "loss": 0.4511, + "step": 950 + }, + { + "epoch": 0.97, + "learning_rate": 3.2761895254306287e-07, + "loss": 0.4179, + "step": 975 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.4662, + "step": 1000 + }, + { + "epoch": 1.0, + "eval_loss": 0.3136279284954071, + "eval_runtime": 1.7836, + "eval_samples_per_second": 2.243, + "eval_steps_per_second": 0.561, + "step": 1000 + } + ], + "logging_steps": 25, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "total_flos": 1.493507298557952e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..557c39f09d92e07be295a9a915c67bb7ca437307 --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fcc93ccb3f87a3311bb3baa8290949012608d857aa7e1f8e40c50e3c4f99548 +size 4792 diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1a8a4180bcf769a642aab7021b9cad81bdbf26d5 --- /dev/null +++ b/checkpoint-200/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: bigcode/starcoder +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6098288fad4bbcdb74ef084a3768b87e7417e0eb --- /dev/null +++ b/checkpoint-200/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "bigcode/starcoder", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "c_fc", + "c_proj", + "c_attn", + "q_attn" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-200/adapter_model.safetensors b/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb02fb3a8f12cf59fc754b2a0ce8b3c3fbf9ffd0 --- /dev/null +++ b/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f05f803645127df3c355c2f4280d7691d4ce9deb1fbc56f61247b55c7e5b719 +size 55255584 diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..745f764cc79f699002ee2a5964b913fed046aba4 --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee69d8793a2d6932e774ee12256f5321b4b714280b2d4d70141459f210dccc26 +size 110696954 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..00180635a03155b2c4178261fddd39f7e7e01951 --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:332b5f59525e9daeab689d4a9878243dd81ca2310bbe6cd23fa9e3060f182362 +size 14244 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..173c1c98178d848af5e6242d79c72f38075e8fa4 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0a17e229e361808d0d69c75ecdc1ef9a97dcbcbf9ffe72c26d29d2aceaec1f9 +size 1064 diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..79dac770733a89d5e6eeaae8e7ed868ca8467e81 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,85 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.2, + "eval_steps": 100, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 0.0001666666666666667, + "loss": 0.8745, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019979028262377118, + "loss": 0.8093, + "step": 50 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019893981312363562, + "loss": 0.7357, + "step": 75 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019744105246469263, + "loss": 0.7535, + "step": 100 + }, + { + "epoch": 0.1, + "eval_loss": 0.4003306031227112, + "eval_runtime": 1.7839, + "eval_samples_per_second": 2.242, + "eval_steps_per_second": 0.561, + "step": 100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001953038210948861, + "loss": 0.7249, + "step": 125 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019254212296427044, + "loss": 0.7118, + "step": 150 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018917405376582145, + "loss": 0.7467, + "step": 175 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018522168236559695, + "loss": 0.6714, + "step": 200 + }, + { + "epoch": 0.2, + "eval_loss": 0.3684937059879303, + "eval_runtime": 1.7853, + "eval_samples_per_second": 2.24, + "eval_steps_per_second": 0.56, + "step": 200 + } + ], + "logging_steps": 25, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "total_flos": 2.987014597115904e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..557c39f09d92e07be295a9a915c67bb7ca437307 --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fcc93ccb3f87a3311bb3baa8290949012608d857aa7e1f8e40c50e3c4f99548 +size 4792 diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1a8a4180bcf769a642aab7021b9cad81bdbf26d5 --- /dev/null +++ b/checkpoint-300/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: bigcode/starcoder +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6098288fad4bbcdb74ef084a3768b87e7417e0eb --- /dev/null +++ b/checkpoint-300/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "bigcode/starcoder", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "c_fc", + "c_proj", + "c_attn", + "q_attn" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-300/adapter_model.safetensors b/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19bca716f3c3c5b0131fbbea2156d93234440e51 --- /dev/null +++ b/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a461b4f667ec91e2a4a0f8ebb1a0beac3e7b119c3c7bd46e011e114d66acbe +size 55255584 diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..017744ab9eada74a7521ccd39c84235d8b1578f4 --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:490271b6f994cc5d0c62ef67f2239acfb833aef87a116a9b9be155f14dfd79f2 +size 110696954 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cabda9172fb94b27b87b397a6885021046e2f1f8 --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ebe5ecb60a57db217626fc48593a1343b259e1412ab2cc0ce66d958d2f58062 +size 14180 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fe178e003e381f81d080e79e5b55125339bbe1d --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62db676ea589f2e897f3ed22ee3133a534ed12d0dd978bfaec8bc59572ea976b +size 1064 diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6089c9c793dcb80af5815b3f07a7c1eb36eeb5f1 --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,117 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3, + "eval_steps": 100, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 0.0001666666666666667, + "loss": 0.8745, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019979028262377118, + "loss": 0.8093, + "step": 50 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019893981312363562, + "loss": 0.7357, + "step": 75 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019744105246469263, + "loss": 0.7535, + "step": 100 + }, + { + "epoch": 0.1, + "eval_loss": 0.4003306031227112, + "eval_runtime": 1.7839, + "eval_samples_per_second": 2.242, + "eval_steps_per_second": 0.561, + "step": 100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001953038210948861, + "loss": 0.7249, + "step": 125 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019254212296427044, + "loss": 0.7118, + "step": 150 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018917405376582145, + "loss": 0.7467, + "step": 175 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018522168236559695, + "loss": 0.6714, + "step": 200 + }, + { + "epoch": 0.2, + "eval_loss": 0.3684937059879303, + "eval_runtime": 1.7853, + "eval_samples_per_second": 2.24, + "eval_steps_per_second": 0.56, + "step": 200 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018071090619916093, + "loss": 0.654, + "step": 225 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017567128158176953, + "loss": 0.6392, + "step": 250 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017013583004418993, + "loss": 0.5745, + "step": 275 + }, + { + "epoch": 0.3, + "learning_rate": 0.000164140821963114, + "loss": 0.5364, + "step": 300 + }, + { + "epoch": 0.3, + "eval_loss": 0.3665352761745453, + "eval_runtime": 1.7852, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 300 + } + ], + "logging_steps": 25, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "total_flos": 4.480521895673856e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..557c39f09d92e07be295a9a915c67bb7ca437307 --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fcc93ccb3f87a3311bb3baa8290949012608d857aa7e1f8e40c50e3c4f99548 +size 4792 diff --git a/checkpoint-400/README.md b/checkpoint-400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1a8a4180bcf769a642aab7021b9cad81bdbf26d5 --- /dev/null +++ b/checkpoint-400/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: bigcode/starcoder +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-400/adapter_config.json b/checkpoint-400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6098288fad4bbcdb74ef084a3768b87e7417e0eb --- /dev/null +++ b/checkpoint-400/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "bigcode/starcoder", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "c_fc", + "c_proj", + "c_attn", + "q_attn" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-400/adapter_model.safetensors b/checkpoint-400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..91c029774e2143722295321ff5222d8b9163a5d8 --- /dev/null +++ b/checkpoint-400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edaca769743a175eff4e0ef6bc5d4e4446f21774430a20fc43aa3c3670419ba9 +size 55255584 diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0836d2bbbecc1f92f771deca100028a57c17f38 --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e62dc266a27e92f541dc5d52ccba7508e44b1ad73a79786b9a027debc324590 +size 110696954 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d3f7e3211099cdf977d7e9f4fdd962ef78c19a55 --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71832fb3990c1059e14ec9109d9bf125f682c118937e9e5b1a3310b3e8be05ec +size 14244 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ad0056e5a40f74ac00fdca712d00ff6acc08a16 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73179501dc4bcc1b0d3ff711880af909a9b84bb7d003a900c122d08331d45bfb +size 1064 diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..54845698a70956c7585b008ff514aa7c35bef820 --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,149 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4, + "eval_steps": 100, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 0.0001666666666666667, + "loss": 0.8745, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019979028262377118, + "loss": 0.8093, + "step": 50 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019893981312363562, + "loss": 0.7357, + "step": 75 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019744105246469263, + "loss": 0.7535, + "step": 100 + }, + { + "epoch": 0.1, + "eval_loss": 0.4003306031227112, + "eval_runtime": 1.7839, + "eval_samples_per_second": 2.242, + "eval_steps_per_second": 0.561, + "step": 100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001953038210948861, + "loss": 0.7249, + "step": 125 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019254212296427044, + "loss": 0.7118, + "step": 150 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018917405376582145, + "loss": 0.7467, + "step": 175 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018522168236559695, + "loss": 0.6714, + "step": 200 + }, + { + "epoch": 0.2, + "eval_loss": 0.3684937059879303, + "eval_runtime": 1.7853, + "eval_samples_per_second": 2.24, + "eval_steps_per_second": 0.56, + "step": 200 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018071090619916093, + "loss": 0.654, + "step": 225 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017567128158176953, + "loss": 0.6392, + "step": 250 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017013583004418993, + "loss": 0.5745, + "step": 275 + }, + { + "epoch": 0.3, + "learning_rate": 0.000164140821963114, + "loss": 0.5364, + "step": 300 + }, + { + "epoch": 0.3, + "eval_loss": 0.3665352761745453, + "eval_runtime": 1.7852, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 300 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015772553890390197, + "loss": 0.5693, + "step": 325 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015093201623287631, + "loss": 0.563, + "step": 350 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014380476768566824, + "loss": 0.5478, + "step": 375 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013639049369634876, + "loss": 0.5763, + "step": 400 + }, + { + "epoch": 0.4, + "eval_loss": 0.3363753855228424, + "eval_runtime": 1.7851, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 400 + } + ], + "logging_steps": 25, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "total_flos": 5.974029194231808e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..557c39f09d92e07be295a9a915c67bb7ca437307 --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fcc93ccb3f87a3311bb3baa8290949012608d857aa7e1f8e40c50e3c4f99548 +size 4792 diff --git a/checkpoint-500/README.md b/checkpoint-500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1a8a4180bcf769a642aab7021b9cad81bdbf26d5 --- /dev/null +++ b/checkpoint-500/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: bigcode/starcoder +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-500/adapter_config.json b/checkpoint-500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6098288fad4bbcdb74ef084a3768b87e7417e0eb --- /dev/null +++ b/checkpoint-500/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "bigcode/starcoder", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "c_fc", + "c_proj", + "c_attn", + "q_attn" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-500/adapter_model.safetensors b/checkpoint-500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1218a1b8743f94751d777eb3442560a3dc5ffd1f --- /dev/null +++ b/checkpoint-500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3da466246316f1d6c831d2bca7fe024dc139f59f71a0bb8a12a79bf885467db4 +size 55255584 diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd8a455183720b0639e1ea805d90f76307b063fd --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d1dd841350fff6cb5403d07db622784417e503895aa7c470dc0bb46de7e37d6 +size 110696954 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7392b708e7078eff436d42ec64b06199047cc73a --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f08f4c8427f0bcf80eabd18dc74ad53a2ae6e85f6226bc2a3da12c0c80968b99 +size 14244 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f4d2be0a4fc6d85c5a622bc714619ecc88082ef --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f51b657e3d38a2589f1fc9606eb9bdf1d6b09dd6934a23956cba0003ba32ad8 +size 1064 diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..33c225bc5834b4dea752fce088c70cfc58716871 --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,181 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5, + "eval_steps": 100, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 0.0001666666666666667, + "loss": 0.8745, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019979028262377118, + "loss": 0.8093, + "step": 50 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019893981312363562, + "loss": 0.7357, + "step": 75 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019744105246469263, + "loss": 0.7535, + "step": 100 + }, + { + "epoch": 0.1, + "eval_loss": 0.4003306031227112, + "eval_runtime": 1.7839, + "eval_samples_per_second": 2.242, + "eval_steps_per_second": 0.561, + "step": 100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001953038210948861, + "loss": 0.7249, + "step": 125 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019254212296427044, + "loss": 0.7118, + "step": 150 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018917405376582145, + "loss": 0.7467, + "step": 175 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018522168236559695, + "loss": 0.6714, + "step": 200 + }, + { + "epoch": 0.2, + "eval_loss": 0.3684937059879303, + "eval_runtime": 1.7853, + "eval_samples_per_second": 2.24, + "eval_steps_per_second": 0.56, + "step": 200 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018071090619916093, + "loss": 0.654, + "step": 225 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017567128158176953, + "loss": 0.6392, + "step": 250 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017013583004418993, + "loss": 0.5745, + "step": 275 + }, + { + "epoch": 0.3, + "learning_rate": 0.000164140821963114, + "loss": 0.5364, + "step": 300 + }, + { + "epoch": 0.3, + "eval_loss": 0.3665352761745453, + "eval_runtime": 1.7852, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 300 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015772553890390197, + "loss": 0.5693, + "step": 325 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015093201623287631, + "loss": 0.563, + "step": 350 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014380476768566824, + "loss": 0.5478, + "step": 375 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013639049369634876, + "loss": 0.5763, + "step": 400 + }, + { + "epoch": 0.4, + "eval_loss": 0.3363753855228424, + "eval_runtime": 1.7851, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012873777539848283, + "loss": 0.4891, + "step": 425 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012089675630312754, + "loss": 0.5331, + "step": 450 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011291881373954065, + "loss": 0.5679, + "step": 475 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010485622221144484, + "loss": 0.5982, + "step": 500 + }, + { + "epoch": 0.5, + "eval_loss": 0.3186224400997162, + "eval_runtime": 1.7828, + "eval_samples_per_second": 2.244, + "eval_steps_per_second": 0.561, + "step": 500 + } + ], + "logging_steps": 25, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "total_flos": 7.46753649278976e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..557c39f09d92e07be295a9a915c67bb7ca437307 --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fcc93ccb3f87a3311bb3baa8290949012608d857aa7e1f8e40c50e3c4f99548 +size 4792 diff --git a/checkpoint-600/README.md b/checkpoint-600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1a8a4180bcf769a642aab7021b9cad81bdbf26d5 --- /dev/null +++ b/checkpoint-600/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: bigcode/starcoder +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-600/adapter_config.json b/checkpoint-600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6098288fad4bbcdb74ef084a3768b87e7417e0eb --- /dev/null +++ b/checkpoint-600/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "bigcode/starcoder", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "c_fc", + "c_proj", + "c_attn", + "q_attn" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-600/adapter_model.safetensors b/checkpoint-600/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f72d3b8c1e3517f299de399762c91f014dc51695 --- /dev/null +++ b/checkpoint-600/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3a1177b48df34a07dd54a78cbe57b71eb36b1cdb9e4b877cc6e15528adc53b8 +size 55255584 diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f167c4c99dffda93b018a54b8564b9f6449867f --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5d8aa45b9c69ae988bc1fbf425ee83fbd79364b606318429cecb7dfac74001e +size 110696954 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b1f3069834495acb59f0aa16edc1ede4a883becb --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a91f13809dea95708f045b5041b33713e1a48a67c5472668cf0efd76f8b445e +size 14308 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd6986bfc373b9de3981d8f233bee7f74c15e46f --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcefd21a679e412181f45d593adc9f913ee65c9d794bb1327c60d32db161f9ba +size 1064 diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..441a1bd8aca23256591d24981a270ba9bf4a7728 --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,213 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6, + "eval_steps": 100, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 0.0001666666666666667, + "loss": 0.8745, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019979028262377118, + "loss": 0.8093, + "step": 50 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019893981312363562, + "loss": 0.7357, + "step": 75 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019744105246469263, + "loss": 0.7535, + "step": 100 + }, + { + "epoch": 0.1, + "eval_loss": 0.4003306031227112, + "eval_runtime": 1.7839, + "eval_samples_per_second": 2.242, + "eval_steps_per_second": 0.561, + "step": 100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001953038210948861, + "loss": 0.7249, + "step": 125 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019254212296427044, + "loss": 0.7118, + "step": 150 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018917405376582145, + "loss": 0.7467, + "step": 175 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018522168236559695, + "loss": 0.6714, + "step": 200 + }, + { + "epoch": 0.2, + "eval_loss": 0.3684937059879303, + "eval_runtime": 1.7853, + "eval_samples_per_second": 2.24, + "eval_steps_per_second": 0.56, + "step": 200 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018071090619916093, + "loss": 0.654, + "step": 225 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017567128158176953, + "loss": 0.6392, + "step": 250 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017013583004418993, + "loss": 0.5745, + "step": 275 + }, + { + "epoch": 0.3, + "learning_rate": 0.000164140821963114, + "loss": 0.5364, + "step": 300 + }, + { + "epoch": 0.3, + "eval_loss": 0.3665352761745453, + "eval_runtime": 1.7852, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 300 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015772553890390197, + "loss": 0.5693, + "step": 325 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015093201623287631, + "loss": 0.563, + "step": 350 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014380476768566824, + "loss": 0.5478, + "step": 375 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013639049369634876, + "loss": 0.5763, + "step": 400 + }, + { + "epoch": 0.4, + "eval_loss": 0.3363753855228424, + "eval_runtime": 1.7851, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012873777539848283, + "loss": 0.4891, + "step": 425 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012089675630312754, + "loss": 0.5331, + "step": 450 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011291881373954065, + "loss": 0.5679, + "step": 475 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010485622221144484, + "loss": 0.5982, + "step": 500 + }, + { + "epoch": 0.5, + "eval_loss": 0.3186224400997162, + "eval_runtime": 1.7828, + "eval_samples_per_second": 2.244, + "eval_steps_per_second": 0.561, + "step": 500 + }, + { + "epoch": 0.53, + "learning_rate": 9.676181087466444e-05, + "loss": 0.5467, + "step": 525 + }, + { + "epoch": 0.55, + "learning_rate": 8.868861738047158e-05, + "loss": 0.5706, + "step": 550 + }, + { + "epoch": 0.57, + "learning_rate": 8.068954035279121e-05, + "loss": 0.504, + "step": 575 + }, + { + "epoch": 0.6, + "learning_rate": 7.281699277636572e-05, + "loss": 0.5267, + "step": 600 + }, + { + "epoch": 0.6, + "eval_loss": 0.32175499200820923, + "eval_runtime": 1.7846, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 600 + } + ], + "logging_steps": 25, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "total_flos": 8.961043791347712e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..557c39f09d92e07be295a9a915c67bb7ca437307 --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fcc93ccb3f87a3311bb3baa8290949012608d857aa7e1f8e40c50e3c4f99548 +size 4792 diff --git a/checkpoint-700/README.md b/checkpoint-700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1a8a4180bcf769a642aab7021b9cad81bdbf26d5 --- /dev/null +++ b/checkpoint-700/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: bigcode/starcoder +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-700/adapter_config.json b/checkpoint-700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6098288fad4bbcdb74ef084a3768b87e7417e0eb --- /dev/null +++ b/checkpoint-700/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "bigcode/starcoder", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "c_fc", + "c_proj", + "c_attn", + "q_attn" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-700/adapter_model.safetensors b/checkpoint-700/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f65fe1a4fb2ac3a8b3cbd156b4c993e634f0f4a8 --- /dev/null +++ b/checkpoint-700/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fca3c456994a1d356f67d8acec8917d07e4f03f07240a46d95cd2b0eff1dc58 +size 55255584 diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fcc682f99e407cc969b4a9a15ad0c9703d342984 --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd7d3e2a072899b2988c4c4373e993cd098462d00d30112a0ca1bdfb199c91a8 +size 110696954 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..40cc38bea20ffef2b85d317c514d5b30fbd4272e --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b44a0756396f467936c750a761224e7658358a341eb50049ed8a2999527c41ba +size 14244 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bdd63abab74435409716d98392c4cc31673d25b0 --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee7d0f7d485b5526e2ee6f622f3b0d08799620da7edfb8e1eba12c1be5768802 +size 1064 diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f82138261e1c621adcca5ae59717ced1fa660878 --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,245 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7, + "eval_steps": 100, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 0.0001666666666666667, + "loss": 0.8745, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019979028262377118, + "loss": 0.8093, + "step": 50 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019893981312363562, + "loss": 0.7357, + "step": 75 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019744105246469263, + "loss": 0.7535, + "step": 100 + }, + { + "epoch": 0.1, + "eval_loss": 0.4003306031227112, + "eval_runtime": 1.7839, + "eval_samples_per_second": 2.242, + "eval_steps_per_second": 0.561, + "step": 100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001953038210948861, + "loss": 0.7249, + "step": 125 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019254212296427044, + "loss": 0.7118, + "step": 150 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018917405376582145, + "loss": 0.7467, + "step": 175 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018522168236559695, + "loss": 0.6714, + "step": 200 + }, + { + "epoch": 0.2, + "eval_loss": 0.3684937059879303, + "eval_runtime": 1.7853, + "eval_samples_per_second": 2.24, + "eval_steps_per_second": 0.56, + "step": 200 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018071090619916093, + "loss": 0.654, + "step": 225 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017567128158176953, + "loss": 0.6392, + "step": 250 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017013583004418993, + "loss": 0.5745, + "step": 275 + }, + { + "epoch": 0.3, + "learning_rate": 0.000164140821963114, + "loss": 0.5364, + "step": 300 + }, + { + "epoch": 0.3, + "eval_loss": 0.3665352761745453, + "eval_runtime": 1.7852, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 300 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015772553890390197, + "loss": 0.5693, + "step": 325 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015093201623287631, + "loss": 0.563, + "step": 350 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014380476768566824, + "loss": 0.5478, + "step": 375 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013639049369634876, + "loss": 0.5763, + "step": 400 + }, + { + "epoch": 0.4, + "eval_loss": 0.3363753855228424, + "eval_runtime": 1.7851, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012873777539848283, + "loss": 0.4891, + "step": 425 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012089675630312754, + "loss": 0.5331, + "step": 450 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011291881373954065, + "loss": 0.5679, + "step": 475 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010485622221144484, + "loss": 0.5982, + "step": 500 + }, + { + "epoch": 0.5, + "eval_loss": 0.3186224400997162, + "eval_runtime": 1.7828, + "eval_samples_per_second": 2.244, + "eval_steps_per_second": 0.561, + "step": 500 + }, + { + "epoch": 0.53, + "learning_rate": 9.676181087466444e-05, + "loss": 0.5467, + "step": 525 + }, + { + "epoch": 0.55, + "learning_rate": 8.868861738047158e-05, + "loss": 0.5706, + "step": 550 + }, + { + "epoch": 0.57, + "learning_rate": 8.068954035279121e-05, + "loss": 0.504, + "step": 575 + }, + { + "epoch": 0.6, + "learning_rate": 7.281699277636572e-05, + "loss": 0.5267, + "step": 600 + }, + { + "epoch": 0.6, + "eval_loss": 0.32175499200820923, + "eval_runtime": 1.7846, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 600 + }, + { + "epoch": 0.62, + "learning_rate": 6.512255856701177e-05, + "loss": 0.5414, + "step": 625 + }, + { + "epoch": 0.65, + "learning_rate": 5.765665457425102e-05, + "loss": 0.5412, + "step": 650 + }, + { + "epoch": 0.68, + "learning_rate": 5.0468200231001286e-05, + "loss": 0.4611, + "step": 675 + }, + { + "epoch": 0.7, + "learning_rate": 4.360429701490934e-05, + "loss": 0.5073, + "step": 700 + }, + { + "epoch": 0.7, + "eval_loss": 0.31275978684425354, + "eval_runtime": 1.7833, + "eval_samples_per_second": 2.243, + "eval_steps_per_second": 0.561, + "step": 700 + } + ], + "logging_steps": 25, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "total_flos": 1.0454551089905664e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..557c39f09d92e07be295a9a915c67bb7ca437307 --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fcc93ccb3f87a3311bb3baa8290949012608d857aa7e1f8e40c50e3c4f99548 +size 4792 diff --git a/checkpoint-800/README.md b/checkpoint-800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1a8a4180bcf769a642aab7021b9cad81bdbf26d5 --- /dev/null +++ b/checkpoint-800/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: bigcode/starcoder +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-800/adapter_config.json b/checkpoint-800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6098288fad4bbcdb74ef084a3768b87e7417e0eb --- /dev/null +++ b/checkpoint-800/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "bigcode/starcoder", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "c_fc", + "c_proj", + "c_attn", + "q_attn" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-800/adapter_model.safetensors b/checkpoint-800/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5b38436bf4dded5ced5ed9ee7d9de0dc09d44ff5 --- /dev/null +++ b/checkpoint-800/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f62b7453de1221b078dadbe54158bc1fd6c24b4fd1a45d320c690a9267973507 +size 55255584 diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c151f228aa7f5ba634b9f53210bdb03700f1e648 --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b216dca75198c06cba335f8c8b5dcedeaa6b90bdcb9eb1e76c6b3486c2e9ec75 +size 110696954 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..968b40abb974f47902413164162bb1009c10bd2a --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca034349ae452ed48fcd28a3ba282ee09c3539476408af294847ea4085a8b179 +size 14244 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c499990657c7c065d2df8f1de5a9c8d2f12204fc --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ec37bdebbc5b8b19beedf58ee58aa153fde95046ddecd1bc6ab6f5a2dda54ea +size 1064 diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ab3ec709e2dd54f649439cbbd038c818b829ea3b --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,277 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8, + "eval_steps": 100, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 0.0001666666666666667, + "loss": 0.8745, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019979028262377118, + "loss": 0.8093, + "step": 50 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019893981312363562, + "loss": 0.7357, + "step": 75 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019744105246469263, + "loss": 0.7535, + "step": 100 + }, + { + "epoch": 0.1, + "eval_loss": 0.4003306031227112, + "eval_runtime": 1.7839, + "eval_samples_per_second": 2.242, + "eval_steps_per_second": 0.561, + "step": 100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001953038210948861, + "loss": 0.7249, + "step": 125 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019254212296427044, + "loss": 0.7118, + "step": 150 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018917405376582145, + "loss": 0.7467, + "step": 175 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018522168236559695, + "loss": 0.6714, + "step": 200 + }, + { + "epoch": 0.2, + "eval_loss": 0.3684937059879303, + "eval_runtime": 1.7853, + "eval_samples_per_second": 2.24, + "eval_steps_per_second": 0.56, + "step": 200 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018071090619916093, + "loss": 0.654, + "step": 225 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017567128158176953, + "loss": 0.6392, + "step": 250 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017013583004418993, + "loss": 0.5745, + "step": 275 + }, + { + "epoch": 0.3, + "learning_rate": 0.000164140821963114, + "loss": 0.5364, + "step": 300 + }, + { + "epoch": 0.3, + "eval_loss": 0.3665352761745453, + "eval_runtime": 1.7852, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 300 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015772553890390197, + "loss": 0.5693, + "step": 325 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015093201623287631, + "loss": 0.563, + "step": 350 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014380476768566824, + "loss": 0.5478, + "step": 375 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013639049369634876, + "loss": 0.5763, + "step": 400 + }, + { + "epoch": 0.4, + "eval_loss": 0.3363753855228424, + "eval_runtime": 1.7851, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012873777539848283, + "loss": 0.4891, + "step": 425 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012089675630312754, + "loss": 0.5331, + "step": 450 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011291881373954065, + "loss": 0.5679, + "step": 475 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010485622221144484, + "loss": 0.5982, + "step": 500 + }, + { + "epoch": 0.5, + "eval_loss": 0.3186224400997162, + "eval_runtime": 1.7828, + "eval_samples_per_second": 2.244, + "eval_steps_per_second": 0.561, + "step": 500 + }, + { + "epoch": 0.53, + "learning_rate": 9.676181087466444e-05, + "loss": 0.5467, + "step": 525 + }, + { + "epoch": 0.55, + "learning_rate": 8.868861738047158e-05, + "loss": 0.5706, + "step": 550 + }, + { + "epoch": 0.57, + "learning_rate": 8.068954035279121e-05, + "loss": 0.504, + "step": 575 + }, + { + "epoch": 0.6, + "learning_rate": 7.281699277636572e-05, + "loss": 0.5267, + "step": 600 + }, + { + "epoch": 0.6, + "eval_loss": 0.32175499200820923, + "eval_runtime": 1.7846, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 600 + }, + { + "epoch": 0.62, + "learning_rate": 6.512255856701177e-05, + "loss": 0.5414, + "step": 625 + }, + { + "epoch": 0.65, + "learning_rate": 5.765665457425102e-05, + "loss": 0.5412, + "step": 650 + }, + { + "epoch": 0.68, + "learning_rate": 5.0468200231001286e-05, + "loss": 0.4611, + "step": 675 + }, + { + "epoch": 0.7, + "learning_rate": 4.360429701490934e-05, + "loss": 0.5073, + "step": 700 + }, + { + "epoch": 0.7, + "eval_loss": 0.31275978684425354, + "eval_runtime": 1.7833, + "eval_samples_per_second": 2.243, + "eval_steps_per_second": 0.561, + "step": 700 + }, + { + "epoch": 0.72, + "learning_rate": 3.710991982161555e-05, + "loss": 0.4778, + "step": 725 + }, + { + "epoch": 0.75, + "learning_rate": 3.102762227218957e-05, + "loss": 0.5454, + "step": 750 + }, + { + "epoch": 0.78, + "learning_rate": 2.5397257885675397e-05, + "loss": 0.5612, + "step": 775 + }, + { + "epoch": 0.8, + "learning_rate": 2.025571894372794e-05, + "loss": 0.4983, + "step": 800 + }, + { + "epoch": 0.8, + "eval_loss": 0.31457942724227905, + "eval_runtime": 1.7821, + "eval_samples_per_second": 2.245, + "eval_steps_per_second": 0.561, + "step": 800 + } + ], + "logging_steps": 25, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "total_flos": 1.1948058388463616e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..557c39f09d92e07be295a9a915c67bb7ca437307 --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fcc93ccb3f87a3311bb3baa8290949012608d857aa7e1f8e40c50e3c4f99548 +size 4792 diff --git a/checkpoint-900/README.md b/checkpoint-900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1a8a4180bcf769a642aab7021b9cad81bdbf26d5 --- /dev/null +++ b/checkpoint-900/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: bigcode/starcoder +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/checkpoint-900/adapter_config.json b/checkpoint-900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6098288fad4bbcdb74ef084a3768b87e7417e0eb --- /dev/null +++ b/checkpoint-900/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "bigcode/starcoder", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "c_fc", + "c_proj", + "c_attn", + "q_attn" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-900/adapter_model.safetensors b/checkpoint-900/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dfdec1a4c6f6367ec6c38eaed1eb3cd15edbd8ff --- /dev/null +++ b/checkpoint-900/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3358051ec125a923ae4cf1d319917b15a2cac4df5e35e8f64ecdc51d4de79fab +size 55255584 diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1532f9f29ff0d9e5409b1c49ff03df72bbb1b600 --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8c5e0398b978b8f43e7ac6f02b0571571c7800b39920f9d8388ceea530b82cd +size 110696954 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6158a337eda612f5f2e9eb368e7ba509b3f264c2 --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:245dfa928d7d3e07de5668b5b5b92a91ef5ab419006e01f6d0bee47af42c87a5 +size 14244 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7592de3fb221fa21b6347beafc65882981886cc7 --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6d225a71bb72ad95a478c83641b7619a08e29cd6c6abe740bf8160f226a9bce +size 1064 diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1d465406c7cfa5e710a370ade0f1157011b95df3 --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,309 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9, + "eval_steps": 100, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "learning_rate": 0.0001666666666666667, + "loss": 0.8745, + "step": 25 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019979028262377118, + "loss": 0.8093, + "step": 50 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019893981312363562, + "loss": 0.7357, + "step": 75 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019744105246469263, + "loss": 0.7535, + "step": 100 + }, + { + "epoch": 0.1, + "eval_loss": 0.4003306031227112, + "eval_runtime": 1.7839, + "eval_samples_per_second": 2.242, + "eval_steps_per_second": 0.561, + "step": 100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001953038210948861, + "loss": 0.7249, + "step": 125 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019254212296427044, + "loss": 0.7118, + "step": 150 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018917405376582145, + "loss": 0.7467, + "step": 175 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018522168236559695, + "loss": 0.6714, + "step": 200 + }, + { + "epoch": 0.2, + "eval_loss": 0.3684937059879303, + "eval_runtime": 1.7853, + "eval_samples_per_second": 2.24, + "eval_steps_per_second": 0.56, + "step": 200 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018071090619916093, + "loss": 0.654, + "step": 225 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017567128158176953, + "loss": 0.6392, + "step": 250 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017013583004418993, + "loss": 0.5745, + "step": 275 + }, + { + "epoch": 0.3, + "learning_rate": 0.000164140821963114, + "loss": 0.5364, + "step": 300 + }, + { + "epoch": 0.3, + "eval_loss": 0.3665352761745453, + "eval_runtime": 1.7852, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 300 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015772553890390197, + "loss": 0.5693, + "step": 325 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015093201623287631, + "loss": 0.563, + "step": 350 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014380476768566824, + "loss": 0.5478, + "step": 375 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013639049369634876, + "loss": 0.5763, + "step": 400 + }, + { + "epoch": 0.4, + "eval_loss": 0.3363753855228424, + "eval_runtime": 1.7851, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012873777539848283, + "loss": 0.4891, + "step": 425 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012089675630312754, + "loss": 0.5331, + "step": 450 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011291881373954065, + "loss": 0.5679, + "step": 475 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010485622221144484, + "loss": 0.5982, + "step": 500 + }, + { + "epoch": 0.5, + "eval_loss": 0.3186224400997162, + "eval_runtime": 1.7828, + "eval_samples_per_second": 2.244, + "eval_steps_per_second": 0.561, + "step": 500 + }, + { + "epoch": 0.53, + "learning_rate": 9.676181087466444e-05, + "loss": 0.5467, + "step": 525 + }, + { + "epoch": 0.55, + "learning_rate": 8.868861738047158e-05, + "loss": 0.5706, + "step": 550 + }, + { + "epoch": 0.57, + "learning_rate": 8.068954035279121e-05, + "loss": 0.504, + "step": 575 + }, + { + "epoch": 0.6, + "learning_rate": 7.281699277636572e-05, + "loss": 0.5267, + "step": 600 + }, + { + "epoch": 0.6, + "eval_loss": 0.32175499200820923, + "eval_runtime": 1.7846, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 0.56, + "step": 600 + }, + { + "epoch": 0.62, + "learning_rate": 6.512255856701177e-05, + "loss": 0.5414, + "step": 625 + }, + { + "epoch": 0.65, + "learning_rate": 5.765665457425102e-05, + "loss": 0.5412, + "step": 650 + }, + { + "epoch": 0.68, + "learning_rate": 5.0468200231001286e-05, + "loss": 0.4611, + "step": 675 + }, + { + "epoch": 0.7, + "learning_rate": 4.360429701490934e-05, + "loss": 0.5073, + "step": 700 + }, + { + "epoch": 0.7, + "eval_loss": 0.31275978684425354, + "eval_runtime": 1.7833, + "eval_samples_per_second": 2.243, + "eval_steps_per_second": 0.561, + "step": 700 + }, + { + "epoch": 0.72, + "learning_rate": 3.710991982161555e-05, + "loss": 0.4778, + "step": 725 + }, + { + "epoch": 0.75, + "learning_rate": 3.102762227218957e-05, + "loss": 0.5454, + "step": 750 + }, + { + "epoch": 0.78, + "learning_rate": 2.5397257885675397e-05, + "loss": 0.5612, + "step": 775 + }, + { + "epoch": 0.8, + "learning_rate": 2.025571894372794e-05, + "loss": 0.4983, + "step": 800 + }, + { + "epoch": 0.8, + "eval_loss": 0.31457942724227905, + "eval_runtime": 1.7821, + "eval_samples_per_second": 2.245, + "eval_steps_per_second": 0.561, + "step": 800 + }, + { + "epoch": 0.82, + "learning_rate": 1.563669475839956e-05, + "loss": 0.4941, + "step": 825 + }, + { + "epoch": 0.85, + "learning_rate": 1.1570450926997655e-05, + "loss": 0.4926, + "step": 850 + }, + { + "epoch": 0.88, + "learning_rate": 8.083631020418791e-06, + "loss": 0.5094, + "step": 875 + }, + { + "epoch": 0.9, + "learning_rate": 5.199082004372957e-06, + "loss": 0.5116, + "step": 900 + }, + { + "epoch": 0.9, + "eval_loss": 0.31357938051223755, + "eval_runtime": 1.7811, + "eval_samples_per_second": 2.246, + "eval_steps_per_second": 0.561, + "step": 900 + } + ], + "logging_steps": 25, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 100, + "total_flos": 1.3441565687021568e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..557c39f09d92e07be295a9a915c67bb7ca437307 --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fcc93ccb3f87a3311bb3baa8290949012608d857aa7e1f8e40c50e3c4f99548 +size 4792 diff --git a/final_checkpoint/README.md b/final_checkpoint/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1a8a4180bcf769a642aab7021b9cad81bdbf26d5 --- /dev/null +++ b/final_checkpoint/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: bigcode/starcoder +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.7.2.dev0 \ No newline at end of file diff --git a/final_checkpoint/adapter_config.json b/final_checkpoint/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6098288fad4bbcdb74ef084a3768b87e7417e0eb --- /dev/null +++ b/final_checkpoint/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "bigcode/starcoder", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "c_fc", + "c_proj", + "c_attn", + "q_attn" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/final_checkpoint/adapter_model.safetensors b/final_checkpoint/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d98fe66714affaa8f1014cba78cba4c7eab4d2e3 --- /dev/null +++ b/final_checkpoint/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:495e43d13098fd20e268cc0b7e29b7ce202bd78c87b46b411c81898af04d9b90 +size 55255584 diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..557c39f09d92e07be295a9a915c67bb7ca437307 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fcc93ccb3f87a3311bb3baa8290949012608d857aa7e1f8e40c50e3c4f99548 +size 4792