diff --git a/README.md b/README.md index 9f2c57866664b8ddbbfa3a4b9eecbed05d7a3ba3..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,202 @@ --- -title: Ig Caption -emoji: 🦀 -colorFrom: pink -colorTo: gray -sdk: gradio -sdk_version: 4.44.1 -app_file: app.py -pinned: false +base_model: microsoft/git-base +library_name: peft --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27aff4dac84f5b704dcd0e79178b30037ba14aa5 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c052972788561e17cc1efd70127f9ca5f55d2b24e26a9754de413f5d31b108 +size 593144 diff --git a/checkpoint-105/README.md b/checkpoint-105/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-105/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-105/adapter_config.json b/checkpoint-105/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-105/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-105/adapter_model.safetensors b/checkpoint-105/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a056641bb92581fe2e038ec6f726238dc915f1ce --- /dev/null +++ b/checkpoint-105/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:099481d6993847df0d26e29266868b60b9576cffcd1ad03254ea681178066463 +size 593144 diff --git a/checkpoint-105/optimizer.pt b/checkpoint-105/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b57b613a29e116c79aa3c8194ae8c1a9d928607f --- /dev/null +++ b/checkpoint-105/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b130461c911b6689f8fad694710452934398c557396bffbcd6537d445912509e +size 1200278 diff --git a/checkpoint-105/rng_state.pth b/checkpoint-105/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e77f748d5f14738c54df10afb826f32ef227b19f --- /dev/null +++ b/checkpoint-105/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4321a5f55c24ed1c3aff9552d7e499e6834a973711c55ae49611427f0d738131 +size 14244 diff --git a/checkpoint-105/scheduler.pt b/checkpoint-105/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..48091b4876936ad88639bb4bfbc3ae706288e213 --- /dev/null +++ b/checkpoint-105/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e0786f84ffe6f2142798a9b6f65187c8b2533ce71afd2f74fff7bb85226d5af +size 1064 diff --git a/checkpoint-105/trainer_state.json b/checkpoint-105/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3ce7209206ce813adf59bdab214c98f61c5742ad --- /dev/null +++ b/checkpoint-105/trainer_state.json @@ -0,0 +1,68 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 105, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 491542403788800.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-105/training_args.bin b/checkpoint-105/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-105/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-126/README.md b/checkpoint-126/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-126/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-126/adapter_config.json b/checkpoint-126/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-126/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-126/adapter_model.safetensors b/checkpoint-126/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6eab7dffca77399a100ff54640dc3165337ee8c8 --- /dev/null +++ b/checkpoint-126/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78fa0f5980919bbd37c12ba6262c241488d9bb724956b208afe32f25a041f486 +size 593144 diff --git a/checkpoint-126/optimizer.pt b/checkpoint-126/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ff1fa59aa8d85259f8ce0faccd608b2236884d6 --- /dev/null +++ b/checkpoint-126/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d500f8d786ccc8ced8a2da8785e4c3de84eddb3741e87f74b2e40a42375a8d91 +size 1200278 diff --git a/checkpoint-126/rng_state.pth b/checkpoint-126/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..465103fe25ff842ebd38dfe993432adffbadae06 --- /dev/null +++ b/checkpoint-126/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c91e3944249ba54477cb166467c28248c38c5a729bee7a37f56d02f52c1a21d +size 14244 diff --git a/checkpoint-126/scheduler.pt b/checkpoint-126/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7a0082201748f152efc4f0ff717bad1272c9e22 --- /dev/null +++ b/checkpoint-126/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7365cc31cabe5de45b00e7ce5e540edd5a47c5b318b0206f964b44d0e3763b4 +size 1064 diff --git a/checkpoint-126/trainer_state.json b/checkpoint-126/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3d0e21d7324c695f79e3a79a65e3349dfbda71aa --- /dev/null +++ b/checkpoint-126/trainer_state.json @@ -0,0 +1,75 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0, + "eval_steps": 500, + "global_step": 126, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 589850884546560.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-126/training_args.bin b/checkpoint-126/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-126/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-147/README.md b/checkpoint-147/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-147/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-147/adapter_config.json b/checkpoint-147/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-147/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-147/adapter_model.safetensors b/checkpoint-147/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82f7c54f4fe53ee37adcce73862e50b898e6a8e7 --- /dev/null +++ b/checkpoint-147/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47a20d6ea103c1e2690a3f11ee74125b912165667f7145c96f3492c5479aa1cd +size 593144 diff --git a/checkpoint-147/optimizer.pt b/checkpoint-147/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3712c00e63a25867100c715548752972760a65c2 --- /dev/null +++ b/checkpoint-147/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad14f01c616447670dac16c09a3a9dfb3b88c76c3540ed765d5b1032cd642017 +size 1200278 diff --git a/checkpoint-147/rng_state.pth b/checkpoint-147/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2bfba8469b1ab47c1eb0ec0ef8a67f8f44e6c88b --- /dev/null +++ b/checkpoint-147/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f28e82c05d3747c92a64c6548519a9e6e3213799e756b65c3496bfc0cbf2c8d7 +size 14244 diff --git a/checkpoint-147/scheduler.pt b/checkpoint-147/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b06d745388ab9284bd8b0fbcd844b5488d099eb --- /dev/null +++ b/checkpoint-147/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f04d04dd6d2e93caf23ae6a529acfc114ecc6172087652f4429c9fc124b5439c +size 1064 diff --git a/checkpoint-147/trainer_state.json b/checkpoint-147/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d9b00f0f2594f1bede26f5478b306dfd835e7bed --- /dev/null +++ b/checkpoint-147/trainer_state.json @@ -0,0 +1,82 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 147, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 688159365304320.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-147/training_args.bin b/checkpoint-147/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-147/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-168/README.md b/checkpoint-168/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-168/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-168/adapter_config.json b/checkpoint-168/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-168/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-168/adapter_model.safetensors b/checkpoint-168/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49b84acbdcc7dd2e926d273b3b8b8340a9fdd97e --- /dev/null +++ b/checkpoint-168/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1f79468b098838bdda31dc4cfd002301a6dcecc491e4912a192525b29ffdac3 +size 593144 diff --git a/checkpoint-168/optimizer.pt b/checkpoint-168/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..24422cd8003270a041eb642a60fab48f807f8d17 --- /dev/null +++ b/checkpoint-168/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:335fbbe0cad6230af54b61b6c29beb7ecc39fbc478e8fa7d8467331eed4f497d +size 1200278 diff --git a/checkpoint-168/rng_state.pth b/checkpoint-168/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dc2af56008add56a44de2a241d7acc2f8339a047 --- /dev/null +++ b/checkpoint-168/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5faa98b74feac1a7680576cbe4f3d4bd533afe7fee450bdef9a57486e6cec72 +size 14244 diff --git a/checkpoint-168/scheduler.pt b/checkpoint-168/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0346523a11a7fea96828583da11634357a14efd3 --- /dev/null +++ b/checkpoint-168/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0efea91adcda6ee4a3609817a506c59180006bda83dd3be6d91d85c66a757fcd +size 1064 diff --git a/checkpoint-168/trainer_state.json b/checkpoint-168/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3a590bfe2ee7f8a6dc504a91c6f4ff948ea28030 --- /dev/null +++ b/checkpoint-168/trainer_state.json @@ -0,0 +1,89 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.0, + "eval_steps": 500, + "global_step": 168, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 786467846062080.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-168/training_args.bin b/checkpoint-168/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-168/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-189/README.md b/checkpoint-189/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-189/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-189/adapter_config.json b/checkpoint-189/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-189/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-189/adapter_model.safetensors b/checkpoint-189/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..40224f750a55743cf2ceeaf19372331acae2e221 --- /dev/null +++ b/checkpoint-189/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d66a4a1c854705a10166110e5f804fc1218c93d86a60edd7c69099dc9a8f4b2 +size 593144 diff --git a/checkpoint-189/optimizer.pt b/checkpoint-189/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cf7098b4330e765bafcd815d0ad984bca0f0008 --- /dev/null +++ b/checkpoint-189/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdd9bc7df8dfe56fe64f3ffcadf95ee3337cccba87da25a97dd17559b80bd34e +size 1200278 diff --git a/checkpoint-189/rng_state.pth b/checkpoint-189/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..084bd82f70a7d34c7e7de267bad80e26f138a717 --- /dev/null +++ b/checkpoint-189/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45319e774bd2c4b061078f791731a02fd3ea29d0cfe78fad6561eff86b487576 +size 14244 diff --git a/checkpoint-189/scheduler.pt b/checkpoint-189/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..82024c59fa216cab6320a89e7299e8eea882e4bc --- /dev/null +++ b/checkpoint-189/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d902382eed472b6e877903aa22caf6cb87bb4ab6c48baa5ba4bde6c939ed2dfe +size 1064 diff --git a/checkpoint-189/trainer_state.json b/checkpoint-189/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..aa58a085061cebe578c9aef7692338ef9605515b --- /dev/null +++ b/checkpoint-189/trainer_state.json @@ -0,0 +1,96 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.0, + "eval_steps": 500, + "global_step": 189, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 884776326819840.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-189/training_args.bin b/checkpoint-189/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-189/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-21/README.md b/checkpoint-21/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-21/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-21/adapter_config.json b/checkpoint-21/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-21/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-21/adapter_model.safetensors b/checkpoint-21/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05561e2b5ad4fa7597a8bd232ac4186b1ec70244 --- /dev/null +++ b/checkpoint-21/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d07df82a3590e84acd3b59922bfb84212274edc94a786f0d79407eca71cf761 +size 593144 diff --git a/checkpoint-21/optimizer.pt b/checkpoint-21/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a172789639c2ff80ef9c0e9ee7e58639d671aa26 --- /dev/null +++ b/checkpoint-21/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:667c119dd087fae3755f218bfa474f861a39cf8de5d13679d1699b4c8e060cc5 +size 1200278 diff --git a/checkpoint-21/rng_state.pth b/checkpoint-21/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..588d0caf4e110ed76d958f0c994d3c6ddc88d23b --- /dev/null +++ b/checkpoint-21/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:487b0c415e54e215325b0d1d905731a0599e9682d67059c0b0834f18c7bd47bd +size 14244 diff --git a/checkpoint-21/scheduler.pt b/checkpoint-21/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..023995cb04e5f60d552ed4ee52bd0b9ac1c785aa --- /dev/null +++ b/checkpoint-21/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad23765a900241a4b35a188eb39ba78d21ce8b6e79986f62ccc7d014b47a8ad2 +size 1064 diff --git a/checkpoint-21/trainer_state.json b/checkpoint-21/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e346c9269e0918fba97e609efa3c3fcbde8a20b9 --- /dev/null +++ b/checkpoint-21/trainer_state.json @@ -0,0 +1,40 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 21, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 98308480757760.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-21/training_args.bin b/checkpoint-21/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-21/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-210/README.md b/checkpoint-210/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-210/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-210/adapter_config.json b/checkpoint-210/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-210/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-210/adapter_model.safetensors b/checkpoint-210/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ddd37a9e418f8f053bb39a92b9981a7f30f284e --- /dev/null +++ b/checkpoint-210/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bada40c7a27d3fca72f09dc2f3bd19ae8a7042a929fb1cc731f43955d754021a +size 593144 diff --git a/checkpoint-210/optimizer.pt b/checkpoint-210/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..112d588052cdd833a707c019c13c4b655117067e --- /dev/null +++ b/checkpoint-210/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33f5609b59caa8e8a3196a2a776609bc4106e12a6a0816b19409fe70a0967f04 +size 1200278 diff --git a/checkpoint-210/rng_state.pth b/checkpoint-210/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f28ac98de5c5a215197b77c33e7c47ced0540c07 --- /dev/null +++ b/checkpoint-210/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf8e83d33f0270a73a688ce972bad23e989ad0bd3b9f0693756dd06a901d1963 +size 14244 diff --git a/checkpoint-210/scheduler.pt b/checkpoint-210/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea852db77120ea2a7324360ddd7cb67832ec570e --- /dev/null +++ b/checkpoint-210/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:068333b42a807264b437c0d1174d781fb3f59454fa67f9884e34b54c798d7287 +size 1064 diff --git a/checkpoint-210/trainer_state.json b/checkpoint-210/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1dd94b79188ae486acfe823c5015475e67b2ef5b --- /dev/null +++ b/checkpoint-210/trainer_state.json @@ -0,0 +1,103 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.0, + "eval_steps": 500, + "global_step": 210, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 983084807577600.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-210/training_args.bin b/checkpoint-210/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-210/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-231/README.md b/checkpoint-231/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-231/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-231/adapter_config.json b/checkpoint-231/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-231/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-231/adapter_model.safetensors b/checkpoint-231/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..082d1260193fa8f576bb966cdddbddd3e297a398 --- /dev/null +++ b/checkpoint-231/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3945467e26da0763979d7f27391a0b5dad02925f049d57d21cd2b1e37667157 +size 593144 diff --git a/checkpoint-231/optimizer.pt b/checkpoint-231/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..777c72907807c48a56ef81139dfaa6b5dd101fca --- /dev/null +++ b/checkpoint-231/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f52310fd4c7de5cf45ed46f8c64cb8aad37a9c0174372acd76b53515f55d68b4 +size 1200278 diff --git a/checkpoint-231/rng_state.pth b/checkpoint-231/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f57c9af98d4921a8e55dd7b4171a68804ddac809 --- /dev/null +++ b/checkpoint-231/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a56db2e1c3ebe0775a97d88ef37cd0101d4a82ab068b80c114d0ed6ba3ac41b0 +size 14244 diff --git a/checkpoint-231/scheduler.pt b/checkpoint-231/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7c3a4389c5aed5c728fb13c85ae251ddb68bd13 --- /dev/null +++ b/checkpoint-231/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bb8e215e956b5ff701ee343c0f52b9787c7ebb5531f1c9addd4dc0b9f5b7111 +size 1064 diff --git a/checkpoint-231/trainer_state.json b/checkpoint-231/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..543990597e9e9d90ad39f1c129a4e2c275a4a4d3 --- /dev/null +++ b/checkpoint-231/trainer_state.json @@ -0,0 +1,110 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 11.0, + "eval_steps": 500, + "global_step": 231, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1081393288335360.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-231/training_args.bin b/checkpoint-231/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-231/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-252/README.md b/checkpoint-252/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-252/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-252/adapter_config.json b/checkpoint-252/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-252/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-252/adapter_model.safetensors b/checkpoint-252/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c2791e02c1bf5de88c8648e1f262daff74e7eb4 --- /dev/null +++ b/checkpoint-252/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b885a1e2868764df884b4ee31512bd7646802501429777e0ab4ce477661a3b44 +size 593144 diff --git a/checkpoint-252/optimizer.pt b/checkpoint-252/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ff87f104f0b0f908f5a7fb66d70d8062f1bda9d --- /dev/null +++ b/checkpoint-252/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20b2bf86fc024f3d1e9756cd433154330095a88718b3133be10efee038e81592 +size 1200278 diff --git a/checkpoint-252/rng_state.pth b/checkpoint-252/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..59fb4df07f3c52fea651f280fea2ce3a2c1861fe --- /dev/null +++ b/checkpoint-252/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adbbe5b08e02a4d3a6c17fc71db2f69ecf33633b62e1bf2c3ef3b713758e20eb +size 14244 diff --git a/checkpoint-252/scheduler.pt b/checkpoint-252/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c83d1d801f738766b3b80334964ba322f30d8bfc --- /dev/null +++ b/checkpoint-252/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a587e46f14ab1d00002dfb7aa0f0930c2e27e9beae687be102d7ef47b4e6003c +size 1064 diff --git a/checkpoint-252/trainer_state.json b/checkpoint-252/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ef89830a197e0ec8441470f9170c39249b80ab17 --- /dev/null +++ b/checkpoint-252/trainer_state.json @@ -0,0 +1,117 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 12.0, + "eval_steps": 500, + "global_step": 252, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1179701769093120.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-252/training_args.bin b/checkpoint-252/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-252/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-273/README.md b/checkpoint-273/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-273/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-273/adapter_config.json b/checkpoint-273/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-273/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-273/adapter_model.safetensors b/checkpoint-273/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47aa84ac2c22a06401ebaff5035166bca03aef64 --- /dev/null +++ b/checkpoint-273/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b812dab085803514ee5cfec8786e4c85dd38743cf53a67aae5ecdd4a30cab1a4 +size 593144 diff --git a/checkpoint-273/optimizer.pt b/checkpoint-273/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..61aee247425a185c7fdea5a5abde70ea70fb5752 --- /dev/null +++ b/checkpoint-273/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4ca1a01400ee71c5ea48fa2db7f3ffb706b1ac186b0c96965eeb37b344c1f6d +size 1200278 diff --git a/checkpoint-273/rng_state.pth b/checkpoint-273/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e9e2d44376d0bf12ab6d092907f71f9017fdd829 --- /dev/null +++ b/checkpoint-273/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e056d87bd1ba6269f19b166b6619dc0bc4ccdc5bce35ce0d9f3eb2bfb0b1a483 +size 14244 diff --git a/checkpoint-273/scheduler.pt b/checkpoint-273/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6452d3b2e36190348a2b6a135347574196436811 --- /dev/null +++ b/checkpoint-273/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a0f8bd1983d7521211731bbdf129e193f7cc976afb7cf81cd862661693e802b +size 1064 diff --git a/checkpoint-273/trainer_state.json b/checkpoint-273/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..741bcd773f3b6a392fda89d2d95567e009d4027e --- /dev/null +++ b/checkpoint-273/trainer_state.json @@ -0,0 +1,124 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 13.0, + "eval_steps": 500, + "global_step": 273, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1278010249850880.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-273/training_args.bin b/checkpoint-273/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-273/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-294/README.md b/checkpoint-294/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-294/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-294/adapter_config.json b/checkpoint-294/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-294/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-294/adapter_model.safetensors b/checkpoint-294/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dcc9acb2f0abcf4cb0e86b4074aff0c436d0f28a --- /dev/null +++ b/checkpoint-294/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6730e85299e2d379e895f6f655567696ecf4d06d426969bcb235f95d6029977 +size 593144 diff --git a/checkpoint-294/optimizer.pt b/checkpoint-294/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a893baee5419acc929b3082915892ae1c96903e4 --- /dev/null +++ b/checkpoint-294/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:636df25e106011c97b68cba997fb0f284cf79dfbc1a6c6060ed4a037ab2df9d9 +size 1200278 diff --git a/checkpoint-294/rng_state.pth b/checkpoint-294/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..da969a2d9c34a4f48b2fe0dccc8b1fafd06e863f --- /dev/null +++ b/checkpoint-294/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:850f9cd3f493b98c0b9d1c386c9918c6d0acc3294d339b8625261880902cb27d +size 14244 diff --git a/checkpoint-294/scheduler.pt b/checkpoint-294/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b358a1757d3b2d0b6131d351079650bd7533ecec --- /dev/null +++ b/checkpoint-294/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95ffa6febcc892ebf4f256d40a0076e01a3a3112e9d50156801de4af956dcfb0 +size 1064 diff --git a/checkpoint-294/trainer_state.json b/checkpoint-294/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..148930ac8642fecf6298ec06f1047e6a42618d89 --- /dev/null +++ b/checkpoint-294/trainer_state.json @@ -0,0 +1,131 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 14.0, + "eval_steps": 500, + "global_step": 294, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1376318730608640.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-294/training_args.bin b/checkpoint-294/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-294/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-315/README.md b/checkpoint-315/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-315/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-315/adapter_config.json b/checkpoint-315/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-315/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-315/adapter_model.safetensors b/checkpoint-315/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c424d70832428ed4b23f1846300daa8b5008f3c5 --- /dev/null +++ b/checkpoint-315/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5c49fccecf48df4b3b35b3fffea9a43a29fedc52928c5536ae14d715a2a5438 +size 593144 diff --git a/checkpoint-315/optimizer.pt b/checkpoint-315/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0976439366db6a6c774146375b9115ef09c3e14e --- /dev/null +++ b/checkpoint-315/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99c44746239fd297f0ad9e38f3784a0a852592da05f454cb0ad098165d9b3f2d +size 1200278 diff --git a/checkpoint-315/rng_state.pth b/checkpoint-315/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8bdbcc9bd116bc8184ba94e2d8f46d103de75682 --- /dev/null +++ b/checkpoint-315/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c8324c862a05f09e44ea56d5b7eeb0439d021d2430402361d830565fecb555a +size 14244 diff --git a/checkpoint-315/scheduler.pt b/checkpoint-315/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5d03b5e15bbe64219fc100ac5216cca57cc8fa7 --- /dev/null +++ b/checkpoint-315/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28fb4bce215c56fe6f5a1be6d8e21401005788203ecf29f97f99ebbb7005ba43 +size 1064 diff --git a/checkpoint-315/trainer_state.json b/checkpoint-315/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b907a5c9b47dc22b3a849e4ef62440d3d0fe28cf --- /dev/null +++ b/checkpoint-315/trainer_state.json @@ -0,0 +1,138 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 15.0, + "eval_steps": 500, + "global_step": 315, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + }, + { + "epoch": 15.0, + "eval_runtime": 6.6679, + "eval_samples_per_second": 6.749, + "eval_steps_per_second": 0.75, + "step": 315 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1474627211366400.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-315/training_args.bin b/checkpoint-315/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-315/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-336/README.md b/checkpoint-336/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-336/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-336/adapter_config.json b/checkpoint-336/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-336/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-336/adapter_model.safetensors b/checkpoint-336/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f9684881ce77835ae7b8d26708c09e9813bca4b --- /dev/null +++ b/checkpoint-336/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d00250a28cd49c9aef52d051b2f958b2f090c331c27b276be30757eff49b0c8 +size 593144 diff --git a/checkpoint-336/optimizer.pt b/checkpoint-336/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c19cdfa73636b99ea1d7024c5b2221c169c92dd5 --- /dev/null +++ b/checkpoint-336/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0b6f4f0625c3818957b373863bc23be937a1fd2715cd54fbabc65543e77f1f8 +size 1200278 diff --git a/checkpoint-336/rng_state.pth b/checkpoint-336/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..952d92d307e7f83b399e75f0e8a8286ee7c7d628 --- /dev/null +++ b/checkpoint-336/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7fae434b96a6510bb1a89f1f23e890d319046a826eb0a2aed872daaa81385bb +size 14244 diff --git a/checkpoint-336/scheduler.pt b/checkpoint-336/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf01e4900d0b673a7ac43d457f4f3c508ed93c9f --- /dev/null +++ b/checkpoint-336/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d90cfb4dedb748b5f30ed9ab73f49873d108f6e822a28b85bee9790e2e055a3 +size 1064 diff --git a/checkpoint-336/trainer_state.json b/checkpoint-336/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0a8bcf165aed7e56975ad43d546f859cc91b56c3 --- /dev/null +++ b/checkpoint-336/trainer_state.json @@ -0,0 +1,145 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 16.0, + "eval_steps": 500, + "global_step": 336, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + }, + { + "epoch": 15.0, + "eval_runtime": 6.6679, + "eval_samples_per_second": 6.749, + "eval_steps_per_second": 0.75, + "step": 315 + }, + { + "epoch": 16.0, + "eval_runtime": 6.4137, + "eval_samples_per_second": 7.016, + "eval_steps_per_second": 0.78, + "step": 336 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1572935692124160.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-336/training_args.bin b/checkpoint-336/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-336/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-357/README.md b/checkpoint-357/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-357/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-357/adapter_config.json b/checkpoint-357/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-357/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-357/adapter_model.safetensors b/checkpoint-357/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..200bfd37c09922a74c25c44e793c294fcd8d5036 --- /dev/null +++ b/checkpoint-357/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4382cdd0aefc59ae1fef8b69ed726c2955f795f99022be1f20a6eef1deceb64c +size 593144 diff --git a/checkpoint-357/optimizer.pt b/checkpoint-357/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3018d20df66c339548de66424a4b068b2f02b2ef --- /dev/null +++ b/checkpoint-357/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9744b42cc282604bfc25899ccf9351ad8831e2d526111bcc1ea493360fe3602 +size 1200278 diff --git a/checkpoint-357/rng_state.pth b/checkpoint-357/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f7facb885063c585f1665ab189bc49546805da2d --- /dev/null +++ b/checkpoint-357/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:269cd9b66021e6b8628b191e2ce641028775724024cbdbc15b77856e8fc6c966 +size 14244 diff --git a/checkpoint-357/scheduler.pt b/checkpoint-357/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..18afe1f5b6a77dfab270cd47be4cdf62e26f0bb7 --- /dev/null +++ b/checkpoint-357/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21fd9ca2388d68c453c9238449f333f2863613383b27e79d14cb1289da4744f3 +size 1064 diff --git a/checkpoint-357/trainer_state.json b/checkpoint-357/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d48bfd1dac0732646c1638daf67e012866096eb9 --- /dev/null +++ b/checkpoint-357/trainer_state.json @@ -0,0 +1,152 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 17.0, + "eval_steps": 500, + "global_step": 357, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + }, + { + "epoch": 15.0, + "eval_runtime": 6.6679, + "eval_samples_per_second": 6.749, + "eval_steps_per_second": 0.75, + "step": 315 + }, + { + "epoch": 16.0, + "eval_runtime": 6.4137, + "eval_samples_per_second": 7.016, + "eval_steps_per_second": 0.78, + "step": 336 + }, + { + "epoch": 17.0, + "eval_runtime": 6.106, + "eval_samples_per_second": 7.37, + "eval_steps_per_second": 0.819, + "step": 357 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1671244172881920.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-357/training_args.bin b/checkpoint-357/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-357/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-378/README.md b/checkpoint-378/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-378/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-378/adapter_config.json b/checkpoint-378/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-378/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-378/adapter_model.safetensors b/checkpoint-378/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85fb407f7af20d4a24c98045d3af54aee28d74c0 --- /dev/null +++ b/checkpoint-378/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db33d7b96bc240608c7953e0aaa779ea3ce6d177dd98e235372c1b778766335b +size 593144 diff --git a/checkpoint-378/optimizer.pt b/checkpoint-378/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..29a6ce9c84d24519ced276ac9414a2fdfeffa129 --- /dev/null +++ b/checkpoint-378/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e467c7c81bc84a3aebb216144c978ea28bbd0728b5b09f3017145a3c58afe545 +size 1200278 diff --git a/checkpoint-378/rng_state.pth b/checkpoint-378/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7b3020b8937faff16ac454de8b15041ac5555a90 --- /dev/null +++ b/checkpoint-378/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ae94711cf5cb87c89c2f67686b10c1b58c8c590a6013b8c19f81ee9625c0e56 +size 14244 diff --git a/checkpoint-378/scheduler.pt b/checkpoint-378/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e99a64ae5b041b56664cf213d3f889b5a4583e78 --- /dev/null +++ b/checkpoint-378/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be3a25f974b7829e9e1a053625b5248c3ba9891d5ce2a0777f46e6cf27e1c9e7 +size 1064 diff --git a/checkpoint-378/trainer_state.json b/checkpoint-378/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..46110cd08c9c4acdf06293ce7c42505338892fed --- /dev/null +++ b/checkpoint-378/trainer_state.json @@ -0,0 +1,159 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 18.0, + "eval_steps": 500, + "global_step": 378, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + }, + { + "epoch": 15.0, + "eval_runtime": 6.6679, + "eval_samples_per_second": 6.749, + "eval_steps_per_second": 0.75, + "step": 315 + }, + { + "epoch": 16.0, + "eval_runtime": 6.4137, + "eval_samples_per_second": 7.016, + "eval_steps_per_second": 0.78, + "step": 336 + }, + { + "epoch": 17.0, + "eval_runtime": 6.106, + "eval_samples_per_second": 7.37, + "eval_steps_per_second": 0.819, + "step": 357 + }, + { + "epoch": 18.0, + "eval_runtime": 6.6353, + "eval_samples_per_second": 6.782, + "eval_steps_per_second": 0.754, + "step": 378 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1769552653639680.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-378/training_args.bin b/checkpoint-378/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-378/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-399/README.md b/checkpoint-399/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-399/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-399/adapter_config.json b/checkpoint-399/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-399/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-399/adapter_model.safetensors b/checkpoint-399/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7938f3fadd557c9bf84f93841762bd41a354a32d --- /dev/null +++ b/checkpoint-399/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4642065a49b7db3a3db0d62369c59f2e80e5b63533a08b88188be544eb875e7 +size 593144 diff --git a/checkpoint-399/optimizer.pt b/checkpoint-399/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..040fce704394aa00f1f8507886bb1279ea43e9a4 --- /dev/null +++ b/checkpoint-399/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49693123a2f792254e87b7504f2384becedc513bf1093c6f49223701a6b7a419 +size 1200278 diff --git a/checkpoint-399/rng_state.pth b/checkpoint-399/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cc284c26b305216a4cfc667343821543d43660ae --- /dev/null +++ b/checkpoint-399/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57f111323e77ac9119c481614cdd89d68a6758e9d6d46bb6e1c8603d5c43c87c +size 14244 diff --git a/checkpoint-399/scheduler.pt b/checkpoint-399/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3fbfa6ed04b3986d7fd0283d7f29ba90deb5c944 --- /dev/null +++ b/checkpoint-399/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c1835df6be0711bd01d5f283c48c7720bfe0b644cc3a8b18fb71a86296f6cfa +size 1064 diff --git a/checkpoint-399/trainer_state.json b/checkpoint-399/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9e911d621ca2ae9743a47bbf34e50f4d5955ac0b --- /dev/null +++ b/checkpoint-399/trainer_state.json @@ -0,0 +1,166 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 19.0, + "eval_steps": 500, + "global_step": 399, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + }, + { + "epoch": 15.0, + "eval_runtime": 6.6679, + "eval_samples_per_second": 6.749, + "eval_steps_per_second": 0.75, + "step": 315 + }, + { + "epoch": 16.0, + "eval_runtime": 6.4137, + "eval_samples_per_second": 7.016, + "eval_steps_per_second": 0.78, + "step": 336 + }, + { + "epoch": 17.0, + "eval_runtime": 6.106, + "eval_samples_per_second": 7.37, + "eval_steps_per_second": 0.819, + "step": 357 + }, + { + "epoch": 18.0, + "eval_runtime": 6.6353, + "eval_samples_per_second": 6.782, + "eval_steps_per_second": 0.754, + "step": 378 + }, + { + "epoch": 19.0, + "eval_runtime": 6.5796, + "eval_samples_per_second": 6.839, + "eval_steps_per_second": 0.76, + "step": 399 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1867861134397440.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-399/training_args.bin b/checkpoint-399/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-399/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-42/README.md b/checkpoint-42/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-42/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-42/adapter_config.json b/checkpoint-42/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-42/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-42/adapter_model.safetensors b/checkpoint-42/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e0bbc285deffa9ca8527aa06342ef20b55db8b0 --- /dev/null +++ b/checkpoint-42/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53fbdb7b91a2f6dc67fe45eb4d2f23c16d1bd8c8ea1aff6bca6b3692998c8f0e +size 593144 diff --git a/checkpoint-42/optimizer.pt b/checkpoint-42/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b704b69ce5b9504dde98bb5d49a63d777b8df3b9 --- /dev/null +++ b/checkpoint-42/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77c1844d18646e571c275336d3f57f2b581842a0343f283c49012e8742b06a5f +size 1200278 diff --git a/checkpoint-42/rng_state.pth b/checkpoint-42/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..204c5fbd29de7360fe4b957091ad3bbb6e59dcd7 --- /dev/null +++ b/checkpoint-42/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a703c5c7c0a8ef6dc87c3443993dac1e3eb51cef0cf1111dc3b59ddf867f1e07 +size 14244 diff --git a/checkpoint-42/scheduler.pt b/checkpoint-42/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..65a5eaa70cdaea70ca26084dde49f5e2bd1cdd18 --- /dev/null +++ b/checkpoint-42/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a534a23eabc80ab81b7bc9a88650bd1648745b5c0ab4ff7393d138d66c7fc8b1 +size 1064 diff --git a/checkpoint-42/trainer_state.json b/checkpoint-42/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..877027ec31e3c4b634da46f8c55a7cde652f46c3 --- /dev/null +++ b/checkpoint-42/trainer_state.json @@ -0,0 +1,47 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 42, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 196616961515520.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-42/training_args.bin b/checkpoint-42/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-42/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-420/README.md b/checkpoint-420/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-420/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-420/adapter_config.json b/checkpoint-420/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-420/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-420/adapter_model.safetensors b/checkpoint-420/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7f2f366a320a34a19ded51256c6aec7996214ee --- /dev/null +++ b/checkpoint-420/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d89a854dcbf95802a76998215c7a7a4cbd51c1259d44d19d5def31da985e286 +size 593144 diff --git a/checkpoint-420/optimizer.pt b/checkpoint-420/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb376458a0eddf8fb26b9ce01592cf721b5252f7 --- /dev/null +++ b/checkpoint-420/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e0911eb870564534d5143593de6fe0aa278726cce9ee89cbec5a199d911ffbc +size 1200278 diff --git a/checkpoint-420/rng_state.pth b/checkpoint-420/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..01259b97446e6f0193bee36bb7d8a99aaae5e2e2 --- /dev/null +++ b/checkpoint-420/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b79e21ffdc70a0d56a005b53679c02a0d278807a9519ba605a807195d0ca276 +size 14244 diff --git a/checkpoint-420/scheduler.pt b/checkpoint-420/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e814786646b068a0932211d828f5b84cb569356e --- /dev/null +++ b/checkpoint-420/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:110163bc994170e6f0329d40df6091e9391944f9f5bb315bf1f1fd68aa8504cb +size 1064 diff --git a/checkpoint-420/trainer_state.json b/checkpoint-420/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..966d896e7e664e4abfcc5aec78ba0e445ba22969 --- /dev/null +++ b/checkpoint-420/trainer_state.json @@ -0,0 +1,173 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 20.0, + "eval_steps": 500, + "global_step": 420, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + }, + { + "epoch": 15.0, + "eval_runtime": 6.6679, + "eval_samples_per_second": 6.749, + "eval_steps_per_second": 0.75, + "step": 315 + }, + { + "epoch": 16.0, + "eval_runtime": 6.4137, + "eval_samples_per_second": 7.016, + "eval_steps_per_second": 0.78, + "step": 336 + }, + { + "epoch": 17.0, + "eval_runtime": 6.106, + "eval_samples_per_second": 7.37, + "eval_steps_per_second": 0.819, + "step": 357 + }, + { + "epoch": 18.0, + "eval_runtime": 6.6353, + "eval_samples_per_second": 6.782, + "eval_steps_per_second": 0.754, + "step": 378 + }, + { + "epoch": 19.0, + "eval_runtime": 6.5796, + "eval_samples_per_second": 6.839, + "eval_steps_per_second": 0.76, + "step": 399 + }, + { + "epoch": 20.0, + "eval_runtime": 6.0621, + "eval_samples_per_second": 7.423, + "eval_steps_per_second": 0.825, + "step": 420 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1966169615155200.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-420/training_args.bin b/checkpoint-420/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-420/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-441/README.md b/checkpoint-441/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-441/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-441/adapter_config.json b/checkpoint-441/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-441/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-441/adapter_model.safetensors b/checkpoint-441/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6eff3087ab74fb1eb6c01c648094057830996d85 --- /dev/null +++ b/checkpoint-441/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ff409360c7e7b52abccb4bf6d0f2ddb1c8c55876aaeba39cf09aa0cdedfb2d1 +size 593144 diff --git a/checkpoint-441/optimizer.pt b/checkpoint-441/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e35788e691b45605e133ff34f0a76a57e8ba39c --- /dev/null +++ b/checkpoint-441/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f27868ba170721b4eaff1a62e175e0142d876a1b5a35673461296798e4f6ef1 +size 1200278 diff --git a/checkpoint-441/rng_state.pth b/checkpoint-441/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c0d2471e5a72833ef4694b04c7c1df51d42c6290 --- /dev/null +++ b/checkpoint-441/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb919b191db140e4accc9ee946cdcdc1c71314a777c56955e4ebcc76486ac09a +size 14244 diff --git a/checkpoint-441/scheduler.pt b/checkpoint-441/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..80f1e124b2105f3e39debe428c9763c1db44448b --- /dev/null +++ b/checkpoint-441/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5389becf390a1cf844edb75075ccdc06735004d501ed62d6caade757ba5dc251 +size 1064 diff --git a/checkpoint-441/trainer_state.json b/checkpoint-441/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0610588353fb2a449b3df2ed2f0fbd3584ea4773 --- /dev/null +++ b/checkpoint-441/trainer_state.json @@ -0,0 +1,180 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 21.0, + "eval_steps": 500, + "global_step": 441, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + }, + { + "epoch": 15.0, + "eval_runtime": 6.6679, + "eval_samples_per_second": 6.749, + "eval_steps_per_second": 0.75, + "step": 315 + }, + { + "epoch": 16.0, + "eval_runtime": 6.4137, + "eval_samples_per_second": 7.016, + "eval_steps_per_second": 0.78, + "step": 336 + }, + { + "epoch": 17.0, + "eval_runtime": 6.106, + "eval_samples_per_second": 7.37, + "eval_steps_per_second": 0.819, + "step": 357 + }, + { + "epoch": 18.0, + "eval_runtime": 6.6353, + "eval_samples_per_second": 6.782, + "eval_steps_per_second": 0.754, + "step": 378 + }, + { + "epoch": 19.0, + "eval_runtime": 6.5796, + "eval_samples_per_second": 6.839, + "eval_steps_per_second": 0.76, + "step": 399 + }, + { + "epoch": 20.0, + "eval_runtime": 6.0621, + "eval_samples_per_second": 7.423, + "eval_steps_per_second": 0.825, + "step": 420 + }, + { + "epoch": 21.0, + "eval_runtime": 6.0837, + "eval_samples_per_second": 7.397, + "eval_steps_per_second": 0.822, + "step": 441 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2064478095912960.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-441/training_args.bin b/checkpoint-441/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-441/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-462/README.md b/checkpoint-462/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-462/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-462/adapter_config.json b/checkpoint-462/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-462/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-462/adapter_model.safetensors b/checkpoint-462/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ca075d6c545cbdd7e61c624f1f5a443c99fe523 --- /dev/null +++ b/checkpoint-462/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a68221a11eb25dbc2d14539d63a63c9cd88984291adfd1f29233ff7aa15c3fec +size 593144 diff --git a/checkpoint-462/optimizer.pt b/checkpoint-462/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..67d9432cffc25c00dbd18093b200a72b5de2b0fa --- /dev/null +++ b/checkpoint-462/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:124b8931974458cce5245b5eee8aec2f2235735e650b8a74cd1ffb3ffa52fc6d +size 1200278 diff --git a/checkpoint-462/rng_state.pth b/checkpoint-462/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b5473b7bb0e848a4c5a2786b5d38c92afe52d216 --- /dev/null +++ b/checkpoint-462/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41737cced89f7e0e7a9adefc19cd1d778a0f7baa73a0ef1102b76d481768dc6e +size 14244 diff --git a/checkpoint-462/scheduler.pt b/checkpoint-462/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d61ae118a7f63f439e8b901acd7771ff59290d9 --- /dev/null +++ b/checkpoint-462/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74156940342a430ace3ecc39111cb591e51fd05b60224c69338af5bc076f0065 +size 1064 diff --git a/checkpoint-462/trainer_state.json b/checkpoint-462/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e5e2d5a4bf606172a03d408dee53009994851ec7 --- /dev/null +++ b/checkpoint-462/trainer_state.json @@ -0,0 +1,187 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 22.0, + "eval_steps": 500, + "global_step": 462, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + }, + { + "epoch": 15.0, + "eval_runtime": 6.6679, + "eval_samples_per_second": 6.749, + "eval_steps_per_second": 0.75, + "step": 315 + }, + { + "epoch": 16.0, + "eval_runtime": 6.4137, + "eval_samples_per_second": 7.016, + "eval_steps_per_second": 0.78, + "step": 336 + }, + { + "epoch": 17.0, + "eval_runtime": 6.106, + "eval_samples_per_second": 7.37, + "eval_steps_per_second": 0.819, + "step": 357 + }, + { + "epoch": 18.0, + "eval_runtime": 6.6353, + "eval_samples_per_second": 6.782, + "eval_steps_per_second": 0.754, + "step": 378 + }, + { + "epoch": 19.0, + "eval_runtime": 6.5796, + "eval_samples_per_second": 6.839, + "eval_steps_per_second": 0.76, + "step": 399 + }, + { + "epoch": 20.0, + "eval_runtime": 6.0621, + "eval_samples_per_second": 7.423, + "eval_steps_per_second": 0.825, + "step": 420 + }, + { + "epoch": 21.0, + "eval_runtime": 6.0837, + "eval_samples_per_second": 7.397, + "eval_steps_per_second": 0.822, + "step": 441 + }, + { + "epoch": 22.0, + "eval_runtime": 6.7734, + "eval_samples_per_second": 6.644, + "eval_steps_per_second": 0.738, + "step": 462 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2162786576670720.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-462/training_args.bin b/checkpoint-462/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-462/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-483/README.md b/checkpoint-483/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-483/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-483/adapter_config.json b/checkpoint-483/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-483/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-483/adapter_model.safetensors b/checkpoint-483/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c5c06c34585e39e4e65ae2d4a8afe7e97fbb34a --- /dev/null +++ b/checkpoint-483/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e18eb5f6199289e671c8f67c31ba90d035089f79fd09296bb484834ca0231f6d +size 593144 diff --git a/checkpoint-483/optimizer.pt b/checkpoint-483/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..74c4743338165457c101fad2494ea4f9cc54b0d4 --- /dev/null +++ b/checkpoint-483/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d949dcfb8516ebf5993898bc2961c1835b8e4ab71bc5fa4e5689cda68adbab1 +size 1200278 diff --git a/checkpoint-483/rng_state.pth b/checkpoint-483/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d90e803335dcabe52cf0438d97780a404409c711 --- /dev/null +++ b/checkpoint-483/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60b17fc167d1fcbdc6c6edb67220bb837f7080d05429afddec134757cf7de499 +size 14244 diff --git a/checkpoint-483/scheduler.pt b/checkpoint-483/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5fa0188bb08a614f6d189ed5bb0cb8e5be94ae5 --- /dev/null +++ b/checkpoint-483/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b541dda1d8f42964aa93e7fcb94da05d4601d89d5a4a3ff69f247d01377602f1 +size 1064 diff --git a/checkpoint-483/trainer_state.json b/checkpoint-483/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..334d2232d62daa4cc0e14bd7c7ed337a5be1496a --- /dev/null +++ b/checkpoint-483/trainer_state.json @@ -0,0 +1,194 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 23.0, + "eval_steps": 500, + "global_step": 483, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + }, + { + "epoch": 15.0, + "eval_runtime": 6.6679, + "eval_samples_per_second": 6.749, + "eval_steps_per_second": 0.75, + "step": 315 + }, + { + "epoch": 16.0, + "eval_runtime": 6.4137, + "eval_samples_per_second": 7.016, + "eval_steps_per_second": 0.78, + "step": 336 + }, + { + "epoch": 17.0, + "eval_runtime": 6.106, + "eval_samples_per_second": 7.37, + "eval_steps_per_second": 0.819, + "step": 357 + }, + { + "epoch": 18.0, + "eval_runtime": 6.6353, + "eval_samples_per_second": 6.782, + "eval_steps_per_second": 0.754, + "step": 378 + }, + { + "epoch": 19.0, + "eval_runtime": 6.5796, + "eval_samples_per_second": 6.839, + "eval_steps_per_second": 0.76, + "step": 399 + }, + { + "epoch": 20.0, + "eval_runtime": 6.0621, + "eval_samples_per_second": 7.423, + "eval_steps_per_second": 0.825, + "step": 420 + }, + { + "epoch": 21.0, + "eval_runtime": 6.0837, + "eval_samples_per_second": 7.397, + "eval_steps_per_second": 0.822, + "step": 441 + }, + { + "epoch": 22.0, + "eval_runtime": 6.7734, + "eval_samples_per_second": 6.644, + "eval_steps_per_second": 0.738, + "step": 462 + }, + { + "epoch": 23.0, + "eval_runtime": 6.3665, + "eval_samples_per_second": 7.068, + "eval_steps_per_second": 0.785, + "step": 483 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2261095057428480.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-483/training_args.bin b/checkpoint-483/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-483/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-504/README.md b/checkpoint-504/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-504/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-504/adapter_config.json b/checkpoint-504/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-504/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-504/adapter_model.safetensors b/checkpoint-504/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6401650b051718173a622eae128616af57ade4d5 --- /dev/null +++ b/checkpoint-504/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b9d040ae31eb718a11ead83a0661393a950fb1ec588a86f129ab94900c2f94 +size 593144 diff --git a/checkpoint-504/optimizer.pt b/checkpoint-504/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..504f74532c1c6305571aaaa0f5e7f989fa090fa4 --- /dev/null +++ b/checkpoint-504/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a621e4a92e7ca3198c8f826053c66b4c3e4a24f8e6dcac314bbf1c736ada2283 +size 1200278 diff --git a/checkpoint-504/rng_state.pth b/checkpoint-504/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8b563ea0a0906a4b4b8bba573f03450c6bcc0dec --- /dev/null +++ b/checkpoint-504/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62ef698d9fe153b42d4dba2f41c1dd744614ac8cda42f476337e734084288edb +size 14244 diff --git a/checkpoint-504/scheduler.pt b/checkpoint-504/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8587f81196c14e44c96325c34911fff01ec03fad --- /dev/null +++ b/checkpoint-504/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fc097ad02ad8c74981c6dc04b706ce628a6de9d0538c54df055f57ea9db76b9 +size 1064 diff --git a/checkpoint-504/trainer_state.json b/checkpoint-504/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..83a29677bac53ded88ec0dea94ff22e1a6681321 --- /dev/null +++ b/checkpoint-504/trainer_state.json @@ -0,0 +1,208 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 24.0, + "eval_steps": 500, + "global_step": 504, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + }, + { + "epoch": 15.0, + "eval_runtime": 6.6679, + "eval_samples_per_second": 6.749, + "eval_steps_per_second": 0.75, + "step": 315 + }, + { + "epoch": 16.0, + "eval_runtime": 6.4137, + "eval_samples_per_second": 7.016, + "eval_steps_per_second": 0.78, + "step": 336 + }, + { + "epoch": 17.0, + "eval_runtime": 6.106, + "eval_samples_per_second": 7.37, + "eval_steps_per_second": 0.819, + "step": 357 + }, + { + "epoch": 18.0, + "eval_runtime": 6.6353, + "eval_samples_per_second": 6.782, + "eval_steps_per_second": 0.754, + "step": 378 + }, + { + "epoch": 19.0, + "eval_runtime": 6.5796, + "eval_samples_per_second": 6.839, + "eval_steps_per_second": 0.76, + "step": 399 + }, + { + "epoch": 20.0, + "eval_runtime": 6.0621, + "eval_samples_per_second": 7.423, + "eval_steps_per_second": 0.825, + "step": 420 + }, + { + "epoch": 21.0, + "eval_runtime": 6.0837, + "eval_samples_per_second": 7.397, + "eval_steps_per_second": 0.822, + "step": 441 + }, + { + "epoch": 22.0, + "eval_runtime": 6.7734, + "eval_samples_per_second": 6.644, + "eval_steps_per_second": 0.738, + "step": 462 + }, + { + "epoch": 23.0, + "eval_runtime": 6.3665, + "eval_samples_per_second": 7.068, + "eval_steps_per_second": 0.785, + "step": 483 + }, + { + "epoch": 23.80952380952381, + "grad_norm": 0.07010962069034576, + "learning_rate": 6.878306878306877e-05, + "loss": 9.2521, + "step": 500 + }, + { + "epoch": 24.0, + "eval_runtime": 6.0991, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 504 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2359403538186240.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-504/training_args.bin b/checkpoint-504/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-504/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-525/README.md b/checkpoint-525/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-525/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-525/adapter_config.json b/checkpoint-525/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-525/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-525/adapter_model.safetensors b/checkpoint-525/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a924b2ddd3f551dd287efa2e985d6ac770d6ee1 --- /dev/null +++ b/checkpoint-525/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71b408a9662a5334d282769206eabb7df3cd3437a7f3a0e00838aff2e929d28d +size 593144 diff --git a/checkpoint-525/optimizer.pt b/checkpoint-525/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..28b312b1d2567bcdcfa3c9b8308fc4da4b3ff71e --- /dev/null +++ b/checkpoint-525/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce9e19b47642d9c4b0b384cba268306cc85f71aef246b4d840bf87d9d92438ba +size 1200278 diff --git a/checkpoint-525/rng_state.pth b/checkpoint-525/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..09d135ee75b47f7d174f3730f6d48c089dd46e5c --- /dev/null +++ b/checkpoint-525/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:655e1356e2736b3f0f7bd518be459cb742c1dd8cd848b00005864ba5e6c3fe59 +size 14244 diff --git a/checkpoint-525/scheduler.pt b/checkpoint-525/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c5eedade1dd9271e7443523144802a9d5ed5b94 --- /dev/null +++ b/checkpoint-525/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e7e39be1c80038f9739fddabed9069c218b730654aeaa96caa125090e4459c4 +size 1064 diff --git a/checkpoint-525/trainer_state.json b/checkpoint-525/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..990c38dae4736562f6565c6fbef971981d0e4e83 --- /dev/null +++ b/checkpoint-525/trainer_state.json @@ -0,0 +1,215 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 25.0, + "eval_steps": 500, + "global_step": 525, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + }, + { + "epoch": 15.0, + "eval_runtime": 6.6679, + "eval_samples_per_second": 6.749, + "eval_steps_per_second": 0.75, + "step": 315 + }, + { + "epoch": 16.0, + "eval_runtime": 6.4137, + "eval_samples_per_second": 7.016, + "eval_steps_per_second": 0.78, + "step": 336 + }, + { + "epoch": 17.0, + "eval_runtime": 6.106, + "eval_samples_per_second": 7.37, + "eval_steps_per_second": 0.819, + "step": 357 + }, + { + "epoch": 18.0, + "eval_runtime": 6.6353, + "eval_samples_per_second": 6.782, + "eval_steps_per_second": 0.754, + "step": 378 + }, + { + "epoch": 19.0, + "eval_runtime": 6.5796, + "eval_samples_per_second": 6.839, + "eval_steps_per_second": 0.76, + "step": 399 + }, + { + "epoch": 20.0, + "eval_runtime": 6.0621, + "eval_samples_per_second": 7.423, + "eval_steps_per_second": 0.825, + "step": 420 + }, + { + "epoch": 21.0, + "eval_runtime": 6.0837, + "eval_samples_per_second": 7.397, + "eval_steps_per_second": 0.822, + "step": 441 + }, + { + "epoch": 22.0, + "eval_runtime": 6.7734, + "eval_samples_per_second": 6.644, + "eval_steps_per_second": 0.738, + "step": 462 + }, + { + "epoch": 23.0, + "eval_runtime": 6.3665, + "eval_samples_per_second": 7.068, + "eval_steps_per_second": 0.785, + "step": 483 + }, + { + "epoch": 23.80952380952381, + "grad_norm": 0.07010962069034576, + "learning_rate": 6.878306878306877e-05, + "loss": 9.2521, + "step": 500 + }, + { + "epoch": 24.0, + "eval_runtime": 6.0991, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 504 + }, + { + "epoch": 25.0, + "eval_runtime": 6.7598, + "eval_samples_per_second": 6.657, + "eval_steps_per_second": 0.74, + "step": 525 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2457712018944000.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-525/training_args.bin b/checkpoint-525/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-525/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-546/README.md b/checkpoint-546/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-546/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-546/adapter_config.json b/checkpoint-546/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-546/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-546/adapter_model.safetensors b/checkpoint-546/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10c29c31352f082a9633c52df0bd55f941281686 --- /dev/null +++ b/checkpoint-546/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f75958b8a8479aaca65c3253106ffd9e76ffba8a261dc2a373fe31ad69e2d55b +size 593144 diff --git a/checkpoint-546/optimizer.pt b/checkpoint-546/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b89fe69c4d60c776071d2ebe0771f1930efcb11 --- /dev/null +++ b/checkpoint-546/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3ead039f47440f8164c10b33ba625be4a44ef4ab77c305b1e7157578f144b94 +size 1200278 diff --git a/checkpoint-546/rng_state.pth b/checkpoint-546/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ce30e0cbfc6268393a3844f3b4f512b31b928235 --- /dev/null +++ b/checkpoint-546/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3546b3c6fb9453db85975725f65854837862dc75327bfc8180f181514ca0fb22 +size 14244 diff --git a/checkpoint-546/scheduler.pt b/checkpoint-546/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbeaea9efec8161a85367b699c1957b3d7af4878 --- /dev/null +++ b/checkpoint-546/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d951e8275416848e8b4d290d12323ea6d26c276cd7a67a90322fbde8def1c70a +size 1064 diff --git a/checkpoint-546/trainer_state.json b/checkpoint-546/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d3bfaf4b650c20ee3abc9aa251c24ffc64aee424 --- /dev/null +++ b/checkpoint-546/trainer_state.json @@ -0,0 +1,222 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 26.0, + "eval_steps": 500, + "global_step": 546, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + }, + { + "epoch": 15.0, + "eval_runtime": 6.6679, + "eval_samples_per_second": 6.749, + "eval_steps_per_second": 0.75, + "step": 315 + }, + { + "epoch": 16.0, + "eval_runtime": 6.4137, + "eval_samples_per_second": 7.016, + "eval_steps_per_second": 0.78, + "step": 336 + }, + { + "epoch": 17.0, + "eval_runtime": 6.106, + "eval_samples_per_second": 7.37, + "eval_steps_per_second": 0.819, + "step": 357 + }, + { + "epoch": 18.0, + "eval_runtime": 6.6353, + "eval_samples_per_second": 6.782, + "eval_steps_per_second": 0.754, + "step": 378 + }, + { + "epoch": 19.0, + "eval_runtime": 6.5796, + "eval_samples_per_second": 6.839, + "eval_steps_per_second": 0.76, + "step": 399 + }, + { + "epoch": 20.0, + "eval_runtime": 6.0621, + "eval_samples_per_second": 7.423, + "eval_steps_per_second": 0.825, + "step": 420 + }, + { + "epoch": 21.0, + "eval_runtime": 6.0837, + "eval_samples_per_second": 7.397, + "eval_steps_per_second": 0.822, + "step": 441 + }, + { + "epoch": 22.0, + "eval_runtime": 6.7734, + "eval_samples_per_second": 6.644, + "eval_steps_per_second": 0.738, + "step": 462 + }, + { + "epoch": 23.0, + "eval_runtime": 6.3665, + "eval_samples_per_second": 7.068, + "eval_steps_per_second": 0.785, + "step": 483 + }, + { + "epoch": 23.80952380952381, + "grad_norm": 0.07010962069034576, + "learning_rate": 6.878306878306877e-05, + "loss": 9.2521, + "step": 500 + }, + { + "epoch": 24.0, + "eval_runtime": 6.0991, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 504 + }, + { + "epoch": 25.0, + "eval_runtime": 6.7598, + "eval_samples_per_second": 6.657, + "eval_steps_per_second": 0.74, + "step": 525 + }, + { + "epoch": 26.0, + "eval_runtime": 6.5824, + "eval_samples_per_second": 6.836, + "eval_steps_per_second": 0.76, + "step": 546 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2556020499701760.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-546/training_args.bin b/checkpoint-546/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-546/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-567/README.md b/checkpoint-567/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-567/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-567/adapter_config.json b/checkpoint-567/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-567/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-567/adapter_model.safetensors b/checkpoint-567/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be84be8faa389ca1f9025be842d8a47e11177618 --- /dev/null +++ b/checkpoint-567/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22b65346e7aa975f98c21fc3292d8ac76382b058a59a818edfa4e848ed08f531 +size 593144 diff --git a/checkpoint-567/optimizer.pt b/checkpoint-567/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..935056c01ee29983d2b97616ab98ea20a96ef6b1 --- /dev/null +++ b/checkpoint-567/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eb1c8ba2c00819e864c14a60907bb7588ac66b6e42719a923c8de22ac2eac59 +size 1200278 diff --git a/checkpoint-567/rng_state.pth b/checkpoint-567/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..15be3e1397fb2429e5e2aa44a64a222904bec480 --- /dev/null +++ b/checkpoint-567/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dac01f33d135214b0589bd21da12e4becdcf24d79c66464e8464f0139e741dc7 +size 14244 diff --git a/checkpoint-567/scheduler.pt b/checkpoint-567/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..980176611d55ddeb84b385903c25e288fe0f4e6f --- /dev/null +++ b/checkpoint-567/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78354c55849f7feddcb9e7b13df4aa3f7a3eb7997b7b6a480c4bf7db8733edb3 +size 1064 diff --git a/checkpoint-567/trainer_state.json b/checkpoint-567/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1fabcc2f2ef19490a7e029794eee8f659cbc3cb9 --- /dev/null +++ b/checkpoint-567/trainer_state.json @@ -0,0 +1,229 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 27.0, + "eval_steps": 500, + "global_step": 567, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + }, + { + "epoch": 15.0, + "eval_runtime": 6.6679, + "eval_samples_per_second": 6.749, + "eval_steps_per_second": 0.75, + "step": 315 + }, + { + "epoch": 16.0, + "eval_runtime": 6.4137, + "eval_samples_per_second": 7.016, + "eval_steps_per_second": 0.78, + "step": 336 + }, + { + "epoch": 17.0, + "eval_runtime": 6.106, + "eval_samples_per_second": 7.37, + "eval_steps_per_second": 0.819, + "step": 357 + }, + { + "epoch": 18.0, + "eval_runtime": 6.6353, + "eval_samples_per_second": 6.782, + "eval_steps_per_second": 0.754, + "step": 378 + }, + { + "epoch": 19.0, + "eval_runtime": 6.5796, + "eval_samples_per_second": 6.839, + "eval_steps_per_second": 0.76, + "step": 399 + }, + { + "epoch": 20.0, + "eval_runtime": 6.0621, + "eval_samples_per_second": 7.423, + "eval_steps_per_second": 0.825, + "step": 420 + }, + { + "epoch": 21.0, + "eval_runtime": 6.0837, + "eval_samples_per_second": 7.397, + "eval_steps_per_second": 0.822, + "step": 441 + }, + { + "epoch": 22.0, + "eval_runtime": 6.7734, + "eval_samples_per_second": 6.644, + "eval_steps_per_second": 0.738, + "step": 462 + }, + { + "epoch": 23.0, + "eval_runtime": 6.3665, + "eval_samples_per_second": 7.068, + "eval_steps_per_second": 0.785, + "step": 483 + }, + { + "epoch": 23.80952380952381, + "grad_norm": 0.07010962069034576, + "learning_rate": 6.878306878306877e-05, + "loss": 9.2521, + "step": 500 + }, + { + "epoch": 24.0, + "eval_runtime": 6.0991, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 504 + }, + { + "epoch": 25.0, + "eval_runtime": 6.7598, + "eval_samples_per_second": 6.657, + "eval_steps_per_second": 0.74, + "step": 525 + }, + { + "epoch": 26.0, + "eval_runtime": 6.5824, + "eval_samples_per_second": 6.836, + "eval_steps_per_second": 0.76, + "step": 546 + }, + { + "epoch": 27.0, + "eval_runtime": 6.1001, + "eval_samples_per_second": 7.377, + "eval_steps_per_second": 0.82, + "step": 567 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2654328980459520.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-567/training_args.bin b/checkpoint-567/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-567/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-588/README.md b/checkpoint-588/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-588/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-588/adapter_config.json b/checkpoint-588/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-588/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-588/adapter_model.safetensors b/checkpoint-588/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..263775ab6165040ccfb66a91939cd0fd66dfdc1f --- /dev/null +++ b/checkpoint-588/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16281d0b4a0bad898d5f7bf0573a01e5114989c0329e4f918ea237b66178076f +size 593144 diff --git a/checkpoint-588/optimizer.pt b/checkpoint-588/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7effa569975d90f49f75a857b92ec3fe8d5a0ec4 --- /dev/null +++ b/checkpoint-588/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39d75e4e9cf06828e5c47884b6e9e340640afa76a609db3dd268d53cebf9483f +size 1200278 diff --git a/checkpoint-588/rng_state.pth b/checkpoint-588/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..61992bc4211cc0150be53a9e9c2d843ac9b7e1c9 --- /dev/null +++ b/checkpoint-588/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:616a691b0982e11c392d74b2d5ead7cc21586ccbd5f1ec969548f6752722cf96 +size 14244 diff --git a/checkpoint-588/scheduler.pt b/checkpoint-588/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab349384ab6d4ef0f7437ace3a9e152b0db56b6d --- /dev/null +++ b/checkpoint-588/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b83dc95ff46bdb3bdb55d743a5228b0a109b0536939acb3906b75ee62fa60a7 +size 1064 diff --git a/checkpoint-588/trainer_state.json b/checkpoint-588/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..218f0dca7057c962e190f3e55e001d701e72f881 --- /dev/null +++ b/checkpoint-588/trainer_state.json @@ -0,0 +1,236 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 28.0, + "eval_steps": 500, + "global_step": 588, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + }, + { + "epoch": 15.0, + "eval_runtime": 6.6679, + "eval_samples_per_second": 6.749, + "eval_steps_per_second": 0.75, + "step": 315 + }, + { + "epoch": 16.0, + "eval_runtime": 6.4137, + "eval_samples_per_second": 7.016, + "eval_steps_per_second": 0.78, + "step": 336 + }, + { + "epoch": 17.0, + "eval_runtime": 6.106, + "eval_samples_per_second": 7.37, + "eval_steps_per_second": 0.819, + "step": 357 + }, + { + "epoch": 18.0, + "eval_runtime": 6.6353, + "eval_samples_per_second": 6.782, + "eval_steps_per_second": 0.754, + "step": 378 + }, + { + "epoch": 19.0, + "eval_runtime": 6.5796, + "eval_samples_per_second": 6.839, + "eval_steps_per_second": 0.76, + "step": 399 + }, + { + "epoch": 20.0, + "eval_runtime": 6.0621, + "eval_samples_per_second": 7.423, + "eval_steps_per_second": 0.825, + "step": 420 + }, + { + "epoch": 21.0, + "eval_runtime": 6.0837, + "eval_samples_per_second": 7.397, + "eval_steps_per_second": 0.822, + "step": 441 + }, + { + "epoch": 22.0, + "eval_runtime": 6.7734, + "eval_samples_per_second": 6.644, + "eval_steps_per_second": 0.738, + "step": 462 + }, + { + "epoch": 23.0, + "eval_runtime": 6.3665, + "eval_samples_per_second": 7.068, + "eval_steps_per_second": 0.785, + "step": 483 + }, + { + "epoch": 23.80952380952381, + "grad_norm": 0.07010962069034576, + "learning_rate": 6.878306878306877e-05, + "loss": 9.2521, + "step": 500 + }, + { + "epoch": 24.0, + "eval_runtime": 6.0991, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 504 + }, + { + "epoch": 25.0, + "eval_runtime": 6.7598, + "eval_samples_per_second": 6.657, + "eval_steps_per_second": 0.74, + "step": 525 + }, + { + "epoch": 26.0, + "eval_runtime": 6.5824, + "eval_samples_per_second": 6.836, + "eval_steps_per_second": 0.76, + "step": 546 + }, + { + "epoch": 27.0, + "eval_runtime": 6.1001, + "eval_samples_per_second": 7.377, + "eval_steps_per_second": 0.82, + "step": 567 + }, + { + "epoch": 28.0, + "eval_runtime": 6.4201, + "eval_samples_per_second": 7.009, + "eval_steps_per_second": 0.779, + "step": 588 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2752637461217280.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-588/training_args.bin b/checkpoint-588/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-588/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-609/README.md b/checkpoint-609/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-609/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-609/adapter_config.json b/checkpoint-609/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-609/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-609/adapter_model.safetensors b/checkpoint-609/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b2b5b53d60969ad4ec017a8981da9e6f89622a3 --- /dev/null +++ b/checkpoint-609/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f87180aef28add97cb6316cf8b8c158a646824dd5e874681e0e42b97d5660b63 +size 593144 diff --git a/checkpoint-609/optimizer.pt b/checkpoint-609/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d504ceaf0db69d2a8361e6b6375368054b5345bc --- /dev/null +++ b/checkpoint-609/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9fed2026357665c1afdb62f367c3ddf07a91f670947c8e22943c99ce68fe6f7 +size 1200278 diff --git a/checkpoint-609/rng_state.pth b/checkpoint-609/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..13875de2a25d9fcc4e6e227e30b64d1af4222cb5 --- /dev/null +++ b/checkpoint-609/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06b42a81d90bf891e0c644ad16b75858e04c376cdb96b8be1614fb116a7544f4 +size 14244 diff --git a/checkpoint-609/scheduler.pt b/checkpoint-609/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..168dd27a20fff78aa1ccc7bce259978518aa8262 --- /dev/null +++ b/checkpoint-609/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad8f64a98e8d347ccadf07e46c52069a3f737c34a469ae28b30a5fafcb789ed6 +size 1064 diff --git a/checkpoint-609/trainer_state.json b/checkpoint-609/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..40b1acf858eca39570c74fa2c7599315288b436f --- /dev/null +++ b/checkpoint-609/trainer_state.json @@ -0,0 +1,243 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 29.0, + "eval_steps": 500, + "global_step": 609, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + }, + { + "epoch": 15.0, + "eval_runtime": 6.6679, + "eval_samples_per_second": 6.749, + "eval_steps_per_second": 0.75, + "step": 315 + }, + { + "epoch": 16.0, + "eval_runtime": 6.4137, + "eval_samples_per_second": 7.016, + "eval_steps_per_second": 0.78, + "step": 336 + }, + { + "epoch": 17.0, + "eval_runtime": 6.106, + "eval_samples_per_second": 7.37, + "eval_steps_per_second": 0.819, + "step": 357 + }, + { + "epoch": 18.0, + "eval_runtime": 6.6353, + "eval_samples_per_second": 6.782, + "eval_steps_per_second": 0.754, + "step": 378 + }, + { + "epoch": 19.0, + "eval_runtime": 6.5796, + "eval_samples_per_second": 6.839, + "eval_steps_per_second": 0.76, + "step": 399 + }, + { + "epoch": 20.0, + "eval_runtime": 6.0621, + "eval_samples_per_second": 7.423, + "eval_steps_per_second": 0.825, + "step": 420 + }, + { + "epoch": 21.0, + "eval_runtime": 6.0837, + "eval_samples_per_second": 7.397, + "eval_steps_per_second": 0.822, + "step": 441 + }, + { + "epoch": 22.0, + "eval_runtime": 6.7734, + "eval_samples_per_second": 6.644, + "eval_steps_per_second": 0.738, + "step": 462 + }, + { + "epoch": 23.0, + "eval_runtime": 6.3665, + "eval_samples_per_second": 7.068, + "eval_steps_per_second": 0.785, + "step": 483 + }, + { + "epoch": 23.80952380952381, + "grad_norm": 0.07010962069034576, + "learning_rate": 6.878306878306877e-05, + "loss": 9.2521, + "step": 500 + }, + { + "epoch": 24.0, + "eval_runtime": 6.0991, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 504 + }, + { + "epoch": 25.0, + "eval_runtime": 6.7598, + "eval_samples_per_second": 6.657, + "eval_steps_per_second": 0.74, + "step": 525 + }, + { + "epoch": 26.0, + "eval_runtime": 6.5824, + "eval_samples_per_second": 6.836, + "eval_steps_per_second": 0.76, + "step": 546 + }, + { + "epoch": 27.0, + "eval_runtime": 6.1001, + "eval_samples_per_second": 7.377, + "eval_steps_per_second": 0.82, + "step": 567 + }, + { + "epoch": 28.0, + "eval_runtime": 6.4201, + "eval_samples_per_second": 7.009, + "eval_steps_per_second": 0.779, + "step": 588 + }, + { + "epoch": 29.0, + "eval_runtime": 6.5591, + "eval_samples_per_second": 6.861, + "eval_steps_per_second": 0.762, + "step": 609 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2850945941975040.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-609/training_args.bin b/checkpoint-609/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-609/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-63/README.md b/checkpoint-63/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-63/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-63/adapter_config.json b/checkpoint-63/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-63/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-63/adapter_model.safetensors b/checkpoint-63/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d7041d53c361dbcf830c7e28a6e1f312cafdf347 --- /dev/null +++ b/checkpoint-63/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:439b748a0b22cf7184119625ce0c94e11026203dad59af34ea1fa58b911465d3 +size 593144 diff --git a/checkpoint-63/optimizer.pt b/checkpoint-63/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b12ad4b8b1ab7d4d5c2908d127a8b759c1b8e34 --- /dev/null +++ b/checkpoint-63/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b205553ec57e06781ebeb57b949b27a99fedf2af4318a275ada8bd844e18afc +size 1200278 diff --git a/checkpoint-63/rng_state.pth b/checkpoint-63/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1d356519fe8eceb710696dfbc724cceac0f19bbb --- /dev/null +++ b/checkpoint-63/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec34efd45621aabd9e81de9ff41eb08f889164a0d951929ff0526846676815cd +size 14244 diff --git a/checkpoint-63/scheduler.pt b/checkpoint-63/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..34bd5003013cfb1cb5d2cd2685528312c057900e --- /dev/null +++ b/checkpoint-63/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5085ab123d90ac3fb271f2c67e47b55077f9766bbd8a1c8ca19b3205b349867c +size 1064 diff --git a/checkpoint-63/trainer_state.json b/checkpoint-63/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..98f1cfd38286a7c3850b33e82d3443db31bb17bf --- /dev/null +++ b/checkpoint-63/trainer_state.json @@ -0,0 +1,54 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 63, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 294925442273280.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-63/training_args.bin b/checkpoint-63/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-63/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-630/README.md b/checkpoint-630/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-630/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-630/adapter_config.json b/checkpoint-630/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-630/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-630/adapter_model.safetensors b/checkpoint-630/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27aff4dac84f5b704dcd0e79178b30037ba14aa5 --- /dev/null +++ b/checkpoint-630/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c052972788561e17cc1efd70127f9ca5f55d2b24e26a9754de413f5d31b108 +size 593144 diff --git a/checkpoint-630/optimizer.pt b/checkpoint-630/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..422dea65708730ea74197f397645d1fad99a110c --- /dev/null +++ b/checkpoint-630/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b211fb6749f75e4791210644b944105b2ea43971845d61a92ce498cee439a25 +size 1200278 diff --git a/checkpoint-630/rng_state.pth b/checkpoint-630/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a4d755fefb1b7412f4b3dba2dec42a0f2ec504bf --- /dev/null +++ b/checkpoint-630/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5580fadfe0970583de05f315ddfbd81bfe79cccb526ed25401f0f953a7810ea +size 14244 diff --git a/checkpoint-630/scheduler.pt b/checkpoint-630/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..701b6bc569d33f59a8083a477669a7e47fccd445 --- /dev/null +++ b/checkpoint-630/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:280e6b3100b3aaf78ea167aadecfd45c7f5561ee85f3d76feb22608c01201412 +size 1064 diff --git a/checkpoint-630/trainer_state.json b/checkpoint-630/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bde5975f0ad82e58391fc5771d2f7a6419ac5165 --- /dev/null +++ b/checkpoint-630/trainer_state.json @@ -0,0 +1,250 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 30.0, + "eval_steps": 500, + "global_step": 630, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + }, + { + "epoch": 5.0, + "eval_runtime": 6.5487, + "eval_samples_per_second": 6.872, + "eval_steps_per_second": 0.764, + "step": 105 + }, + { + "epoch": 6.0, + "eval_runtime": 6.0733, + "eval_samples_per_second": 7.409, + "eval_steps_per_second": 0.823, + "step": 126 + }, + { + "epoch": 7.0, + "eval_runtime": 6.0993, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 147 + }, + { + "epoch": 8.0, + "eval_runtime": 6.6473, + "eval_samples_per_second": 6.77, + "eval_steps_per_second": 0.752, + "step": 168 + }, + { + "epoch": 9.0, + "eval_runtime": 6.3262, + "eval_samples_per_second": 7.113, + "eval_steps_per_second": 0.79, + "step": 189 + }, + { + "epoch": 10.0, + "eval_runtime": 6.0277, + "eval_samples_per_second": 7.465, + "eval_steps_per_second": 0.829, + "step": 210 + }, + { + "epoch": 11.0, + "eval_runtime": 6.6821, + "eval_samples_per_second": 6.734, + "eval_steps_per_second": 0.748, + "step": 231 + }, + { + "epoch": 12.0, + "eval_runtime": 6.5331, + "eval_samples_per_second": 6.888, + "eval_steps_per_second": 0.765, + "step": 252 + }, + { + "epoch": 13.0, + "eval_runtime": 6.0417, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.828, + "step": 273 + }, + { + "epoch": 14.0, + "eval_runtime": 6.0373, + "eval_samples_per_second": 7.454, + "eval_steps_per_second": 0.828, + "step": 294 + }, + { + "epoch": 15.0, + "eval_runtime": 6.6679, + "eval_samples_per_second": 6.749, + "eval_steps_per_second": 0.75, + "step": 315 + }, + { + "epoch": 16.0, + "eval_runtime": 6.4137, + "eval_samples_per_second": 7.016, + "eval_steps_per_second": 0.78, + "step": 336 + }, + { + "epoch": 17.0, + "eval_runtime": 6.106, + "eval_samples_per_second": 7.37, + "eval_steps_per_second": 0.819, + "step": 357 + }, + { + "epoch": 18.0, + "eval_runtime": 6.6353, + "eval_samples_per_second": 6.782, + "eval_steps_per_second": 0.754, + "step": 378 + }, + { + "epoch": 19.0, + "eval_runtime": 6.5796, + "eval_samples_per_second": 6.839, + "eval_steps_per_second": 0.76, + "step": 399 + }, + { + "epoch": 20.0, + "eval_runtime": 6.0621, + "eval_samples_per_second": 7.423, + "eval_steps_per_second": 0.825, + "step": 420 + }, + { + "epoch": 21.0, + "eval_runtime": 6.0837, + "eval_samples_per_second": 7.397, + "eval_steps_per_second": 0.822, + "step": 441 + }, + { + "epoch": 22.0, + "eval_runtime": 6.7734, + "eval_samples_per_second": 6.644, + "eval_steps_per_second": 0.738, + "step": 462 + }, + { + "epoch": 23.0, + "eval_runtime": 6.3665, + "eval_samples_per_second": 7.068, + "eval_steps_per_second": 0.785, + "step": 483 + }, + { + "epoch": 23.80952380952381, + "grad_norm": 0.07010962069034576, + "learning_rate": 6.878306878306877e-05, + "loss": 9.2521, + "step": 500 + }, + { + "epoch": 24.0, + "eval_runtime": 6.0991, + "eval_samples_per_second": 7.378, + "eval_steps_per_second": 0.82, + "step": 504 + }, + { + "epoch": 25.0, + "eval_runtime": 6.7598, + "eval_samples_per_second": 6.657, + "eval_steps_per_second": 0.74, + "step": 525 + }, + { + "epoch": 26.0, + "eval_runtime": 6.5824, + "eval_samples_per_second": 6.836, + "eval_steps_per_second": 0.76, + "step": 546 + }, + { + "epoch": 27.0, + "eval_runtime": 6.1001, + "eval_samples_per_second": 7.377, + "eval_steps_per_second": 0.82, + "step": 567 + }, + { + "epoch": 28.0, + "eval_runtime": 6.4201, + "eval_samples_per_second": 7.009, + "eval_steps_per_second": 0.779, + "step": 588 + }, + { + "epoch": 29.0, + "eval_runtime": 6.5591, + "eval_samples_per_second": 6.861, + "eval_steps_per_second": 0.762, + "step": 609 + }, + { + "epoch": 30.0, + "eval_runtime": 6.0655, + "eval_samples_per_second": 7.419, + "eval_steps_per_second": 0.824, + "step": 630 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2949254422732800.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-630/training_args.bin b/checkpoint-630/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-630/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/checkpoint-84/README.md b/checkpoint-84/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c65a9ab0201e1b488bf55ad92a0b5cff9f04be6 --- /dev/null +++ b/checkpoint-84/README.md @@ -0,0 +1,202 @@ +--- +base_model: microsoft/git-base +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/checkpoint-84/adapter_config.json b/checkpoint-84/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d44ec7670bb505bd8432352092989b1a15c296 --- /dev/null +++ b/checkpoint-84/adapter_config.json @@ -0,0 +1,34 @@ +{ + "alpha_pattern": {}, + "auto_mapping": { + "base_model_class": "GitForCausalLM", + "parent_library": "transformers.models.git.modeling_git" + }, + "base_model_name_or_path": "microsoft/git-base", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier" + ], + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "value", + "query" + ], + "task_type": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-84/adapter_model.safetensors b/checkpoint-84/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71a56c5b6c80c968db322d76a5e1eb02e37b18a6 --- /dev/null +++ b/checkpoint-84/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:884623567b2aecb431ff989846fef7e6e98e00bf1638fe9a188ecd17309c6884 +size 593144 diff --git a/checkpoint-84/optimizer.pt b/checkpoint-84/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1703006bd7b76b0fe092e72d6135187fa3c40811 --- /dev/null +++ b/checkpoint-84/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202960485f0f38453259014f6a2b5f7cc7fc32de635e104326f9b5a0c320a6ac +size 1200278 diff --git a/checkpoint-84/rng_state.pth b/checkpoint-84/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..287d4afbccdf4cbed80e1ef3739b1d0549f09f61 --- /dev/null +++ b/checkpoint-84/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fbc7c13818bfa0295b540aa7c11a203173689d0d999c83989bc7ec3da8131f4 +size 14244 diff --git a/checkpoint-84/scheduler.pt b/checkpoint-84/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..118d7177cec4880e6d5ce3a998c859f7db2c0777 --- /dev/null +++ b/checkpoint-84/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8fe6ffd0dfa5cdc2d988b2c274f10349bf2c3f4b009ed31ea92cfd473d08d86 +size 1064 diff --git a/checkpoint-84/trainer_state.json b/checkpoint-84/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b31270cbc187d42234b7f0e0cd55fcfc39af1162 --- /dev/null +++ b/checkpoint-84/trainer_state.json @@ -0,0 +1,61 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 500, + "global_step": 84, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_runtime": 11.809, + "eval_samples_per_second": 3.811, + "eval_steps_per_second": 0.423, + "step": 21 + }, + { + "epoch": 2.0, + "eval_runtime": 6.8145, + "eval_samples_per_second": 6.604, + "eval_steps_per_second": 0.734, + "step": 42 + }, + { + "epoch": 3.0, + "eval_runtime": 6.0474, + "eval_samples_per_second": 7.441, + "eval_steps_per_second": 0.827, + "step": 63 + }, + { + "epoch": 4.0, + "eval_runtime": 6.3718, + "eval_samples_per_second": 7.062, + "eval_steps_per_second": 0.785, + "step": 84 + } + ], + "logging_steps": 500, + "max_steps": 630, + "num_input_tokens_seen": 0, + "num_train_epochs": 30, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 393233923031040.0, + "train_batch_size": 10, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-84/training_args.bin b/checkpoint-84/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/checkpoint-84/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176 diff --git a/ig.py b/ig.py new file mode 100644 index 0000000000000000000000000000000000000000..38f91af16037d741d0b9fd5b26737c4370032609 --- /dev/null +++ b/ig.py @@ -0,0 +1,45 @@ +from transformers import AutoProcessor, AutoModelForCausalLM +import gradio as gr +import torch + +# Load the processor and model +processor = AutoProcessor.from_pretrained("microsoft/git-base") +model = AutoModelForCausalLM.from_pretrained("./ig_caption") + +def predict(image): + try: + # Prepare the image using the processor + inputs = processor(images=image, return_tensors="pt") + + # Move inputs to the appropriate device + device = "cuda" if torch.cuda.is_available() else "cpu" + inputs = {key: value.to(device) for key, value in inputs.items()} + model.to(device) + + # Generate the caption + outputs = model.generate(**inputs) + + # Decode the generated caption + caption = processor.batch_decode(outputs, skip_special_tokens=True)[0] + + return caption + + except Exception as e: + print("Error during prediction:", str(e)) + return "Error: " + str(e) + +# https://www.gradio.app/guides +with gr.Blocks() as demo: + image = gr.Image(type="pil") + predict_btn = gr.Button("Predict", variant="primary") + output = gr.Label(label="Generated Caption") + + inputs = [image] + outputs = [output] + + predict_btn.click(predict, inputs=inputs, outputs=outputs) + +if __name__ == "__main__": + demo.launch() # Local machine only + # demo.launch(server_name="0.0.0.0") # LAN access to local machine + # demo.launch(share=True) # Public access to local machine diff --git a/runs/Oct08_15-21-38_528d319373fc/events.out.tfevents.1728400921.528d319373fc.2135.0 b/runs/Oct08_15-21-38_528d319373fc/events.out.tfevents.1728400921.528d319373fc.2135.0 new file mode 100644 index 0000000000000000000000000000000000000000..90882c1bd9e314464fcd4f77b3cd846fe6eb9518 --- /dev/null +++ b/runs/Oct08_15-21-38_528d319373fc/events.out.tfevents.1728400921.528d319373fc.2135.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e0ff360af5246f4cb5be65907be2137310ad904b4a68623dc9cee5d613be8f +size 12333 diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b601bb06afbbd0f62027bb49993f43301f9ebdb5 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870e1ff24d52f571f9b66eecb81300ba7b620de79860cca05110db11bcc791fa +size 5176