not-chameleon-30b / params.json
Tom9000's picture
Upload folder using huggingface_hub
ed69227 verified
{
"async_checkpointing": false,
"async_eval_ngpus": -1,
"batch_size": 4,
"data": "",
"disable_logging": false,
"disable_workers_print": false,
"dtype": "bf16",
"dump_after_steps": 0,
"dump_dir": "/fsx-onellm/rpasunuru/SFT/v2.1_textpp_30b_730k_sftv1.4_exp1/v2.1_textpp_30b_730k_sftv1.4_exp1_run000",
"dump_freq": 400,
"dump_profile_traces": false,
"enable_loss_tracker": false,
"epochs": -1,
"eval_freq": 100000,
"exp_id": "",
"exp_name": "",
"finetuning_dir": "/fsx-onellm/shared/from_rsc//v2.1_30b_qk_zloss_snorm_Nov_26_3_run000_checkpoint_0730000",
"fp32_reduce_scatter": "all",
"gpu_check_level": 3,
"image_loss_weight": 1.0,
"image_text_rotation_prob": 0.0,
"instruct": {
"no_loss_prompt": true,
"no_loss_truncated": false,
"use_eot": true
},
"instruct_data": "/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/long_caption:2.92,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/vqa:4.59,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/text2image:10.44,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/llama2_rjv6_helpful:43.27,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/code_llama:0.51,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/interleaved_batch1-17:27.45,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/image_dialogue:7.46,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/llama2_rjv6_harmless:0.97,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/cybersec_safety:0.33,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/onellm_multimodal_safety:0.86,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/autosafety:0.51,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/rainbow_safety:0.10,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/genai_safety:0.58",
"iter_gopher": {
"buffer_size": 16,
"max_precompute": 10,
"n_chars_by_tok": 15,
"n_seqs_to_concat": 10,
"num_processes": 1
},
"iter_jsonl": {
"buffer_size": 64,
"same_data": false
},
"iter_multi": {
"buffer_size": 512,
"ignore_extra_chunks": true,
"max_precompute": 20,
"multiprocess": true
},
"iter_type": "multi",
"keep_checkpoints_every_steps": 400,
"keep_eval_checkpoints": true,
"keep_n_last_checkpoints": 2,
"log_all_steps": false,
"log_freq": 10,
"log_updates": true,
"log_wandb": false,
"loss_rescaling": false,
"model": {
"add_extra_toks": "0",
"alpha_depth": "disabled",
"attn_dropout": 0,
"attn_to_keep": "all",
"custom_bwd": false,
"dim": 8192,
"dropout": 0.05,
"efficient_attn": "flash",
"emb_dropout": 0,
"ffn_dim_multiplier": 1.0,
"ffn_dropout": 0,
"full_logging_n_layers": 4,
"fuse_sequence_parallel": false,
"init": {
"coeff_std": null,
"depth_last": false,
"fixed_std": null,
"no_init": false,
"pos_init_scalar": null,
"use_depth": "current",
"use_gaussian": true
},
"layer_ckpt": "0::2",
"linear_residual_dropout": false,
"loss_parallel": true,
"max_length": 2048,
"multiple_of": 256,
"n_heads": 64,
"n_kv_heads": 8,
"n_layers": 48,
"non_linearity": "swiglu",
"norm_affine": true,
"norm_eps": 1e-05,
"norm_type": "rmsnorm",
"output_dropout": 0,
"output_size": -1,
"pre_norm": true,
"qk_normalization": true,
"recompute_attn": true,
"recompute_fc1_out": true,
"recompute_fc3_out": true,
"residual_dropout": 0.0,
"rope_theta": 10000.0,
"sequence_parallel": false,
"swin_norm": true,
"turn_eos_token": "<eos>",
"use_rope": true,
"vocab_size": 65536
},
"model_parallel_size": 4,
"no_final_ckpt": false,
"num_retrieved_docs": 0,
"old_mp": -1,
"old_world_size": -1,
"optim": {
"beta1": 0.9,
"beta2": 0.95,
"clip": 1.0,
"cosine_theta": 1.0,
"cycle_length": 1.0,
"epsilon": 1e-08,
"exp_factor": 0.5,
"lr": 1e-05,
"lr_min_ratio": 0.1,
"scheduler": "cosine",
"use_deprecated_optim": false,
"warmup": 100,
"weight_decay": 0.1
},
"periodic_gpu_check": true,
"profile_freq": -1,
"reshard_after_forward": true,
"restore_dataloader_position": false,
"retrieval_prob": 0.0,
"rlhf": null,
"root_dump_dir": "",
"save_optimizer_states": true,
"seq_len": 4096,
"slurm": {
"global_rank": 0,
"is_slurm_job": true,
"world_size": 128
},
"steps": 1200,
"tokenizer": "/fsx-onellm/rpasunuru/models/cm3z/cm3v2_7b_placeholder/gpt2-unified-image-sentinel.json",
"tokenizer_dir": "/fsx/guismay/data/large_experiments/fair_llm/datasets/tokenizers",
"torch_seed": -1,
"unlimited_steps": false,
"use_hf_tokenizer": true,
"valid": {
"batch_size": 1,
"debug": false,
"majority_voting": 0,
"n_batches": 100,
"onellm_eval": false,
"onellm_eval_media_storage": "",
"ppl_files_str": "",
"prompt_path": "",
"prompt_templates": "{}",
"random_fewshots": false,
"seq_len": 4096,
"tasks_root_dir": "",
"tasks_str": "",
"temperature": 1.0,
"top_k": 0,
"top_p": 0.0,
"use_sampling": false,
"write_eval": false
},
"wandb_entity": "violet-zct",
"wandb_project": "instruct_sft",
"water_marking_codes_str": null,
"z_loss_weight": 0.0001
}