Update README.md

49aef49 about 1 year ago

3.56 kB

	---
	datasets:
	- TEAMGORANI/gorani-100k
	language:
	- en
	library_name: transformers
	pipeline_tag: text-generation
	---


	- model = TEAMGORANI/gorani-100k-llama2-13b-instruct
	- dataset_name = [TEAMGORANI/gorani-100k](https://huggingface.co/datasets/TEAMGORANI/gorani-100k)


	\| Training Process \| \|
	\|----------------------------------------------\|-------------------------------\|
	\| Tokenizer Used \| LlamaTokenizerFast \|
	\| Training Progress (Epoch 3.15/16) \| \|
	\| Step \| 19740/100000 \|
	\| Google Colab Resource Usage \| 150 tokens used \|


	\| System Information \| \| \|
	\|------------------------\|------------\|------------\|
	\| \| Used \| Total \|
	\| System RAM \| 5.8 GB \| 83.5 GB \|
	\| GPU RAM \| 26.6 GB \| 40.0 GB \|
	\| Disk \| 74.0 GB \| 166.8 GB \|




	\| Basic Training Settings \| \|
	\|-----------------------------\|---------------------------------\|
	\| local_rank \| -1 \|
	\| per_device_train_batch_size \| 4 \|
	\| per_device_eval_batch_size \| 1 \|
	\| gradient_accumulation_steps \| 4 \|
	\| learning_rate \| 2e-4 \|
	\| max_grad_norm \| 0.3 \|
	\| weight_decay \| 0.001 \|
	\| max_seq_length \| 2048 \|
	\| num_train_epochs \| 1 \|
	\| max_steps \| 100000 \|
	\| warmup_ratio \| 0.03 \|
	\| save_steps \| 500000 \|
	\| logging_steps \| 10000 \|

	\| 4-bit Precision Settings \| \|
	\|-----------------------------\|---------------------------------\|
	\| use_4bit \| True \|
	\| use_nested_quant \| False \|
	\| bnb_4bit_compute_dtype \| "bfloat16" \|
	\| bnb_4bit_quant_type \| "nf4" \|

	\| LoRA Settings \| \|
	\|-----------------------------\|---------------------------------\|
	\| lora_alpha \| 16 \|
	\| lora_dropout \| 0.1 \|
	\| lora_r \| 64 \|

	\| Advanced Training Flags \| \|
	\|-----------------------------\|---------------------------------\|
	\| fp16 \| False \|
	\| bf16 \| False \|
	\| packing \| False \|
	\| gradient_checkpointing \| True \|
	\| optim \| "paged_adamw_32bit" \|
	\| lr_scheduler_type \| "constant" \|
	\| group_by_length \| True \|

	\| GPU Configuration \| \|
	\|-----------------------------\|---------------------------------\|
	\| device_map \| {"": 0} \|

	---
	datasets:
	- TEAMGORANI/gorani-100k
	language:
	- en
	library_name: transformers
	pipeline_tag: text-generation
	---


	- model = TEAMGORANI/gorani-100k-llama2-13b-instruct
	- dataset_name = [TEAMGORANI/gorani-100k](https://huggingface.co/datasets/TEAMGORANI/gorani-100k)


	\| Training Process \| \|
	\|----------------------------------------------\|-------------------------------\|
	\| Tokenizer Used \| LlamaTokenizerFast \|
	\| Training Progress (Epoch 3.15/16) \| \|
	\| Step \| 19740/100000 \|
	\| Google Colab Resource Usage \| 150 tokens used \|


	\| System Information \| \| \|
	\|------------------------\|------------\|------------\|
	\| \| Used \| Total \|
	\| System RAM \| 5.8 GB \| 83.5 GB \|
	\| GPU RAM \| 26.6 GB \| 40.0 GB \|
	\| Disk \| 74.0 GB \| 166.8 GB \|




	\| Basic Training Settings \| \|
	\|-----------------------------\|---------------------------------\|
	\| local_rank \| -1 \|
	\| per_device_train_batch_size \| 4 \|
	\| per_device_eval_batch_size \| 1 \|
	\| gradient_accumulation_steps \| 4 \|
	\| learning_rate \| 2e-4 \|
	\| max_grad_norm \| 0.3 \|
	\| weight_decay \| 0.001 \|
	\| max_seq_length \| 2048 \|
	\| num_train_epochs \| 1 \|
	\| max_steps \| 100000 \|
	\| warmup_ratio \| 0.03 \|
	\| save_steps \| 500000 \|
	\| logging_steps \| 10000 \|

	\| 4-bit Precision Settings \| \|
	\|-----------------------------\|---------------------------------\|
	\| use_4bit \| True \|
	\| use_nested_quant \| False \|
	\| bnb_4bit_compute_dtype \| "bfloat16" \|
	\| bnb_4bit_quant_type \| "nf4" \|

	\| LoRA Settings \| \|
	\|-----------------------------\|---------------------------------\|
	\| lora_alpha \| 16 \|
	\| lora_dropout \| 0.1 \|
	\| lora_r \| 64 \|

	\| Advanced Training Flags \| \|
	\|-----------------------------\|---------------------------------\|
	\| fp16 \| False \|
	\| bf16 \| False \|
	\| packing \| False \|
	\| gradient_checkpointing \| True \|
	\| optim \| "paged_adamw_32bit" \|
	\| lr_scheduler_type \| "constant" \|
	\| group_by_length \| True \|

	\| GPU Configuration \| \|
	\|-----------------------------\|---------------------------------\|
	\| device_map \| {"": 0} \|