YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co/docs/hub/model-cards#model-card-metadata)
culturax-ar-spbpe32k-focus-embs-anneal-bf16-mixed-xassyy15
W&B run url: https://wandb.ai/konstantinjdobler/tv/runs/xassyy15 W&B run ID: xassyy15
Metadata
{
"data_dir": "/raid/konstantin.dobler/culturax/ar/ar/tokenized/tokenizers_ar_sp-bpe-ar-32kauto",
"model_path": "/raid/konstantin.dobler/checkpoints/culturax-ar-spbpe32k-smart-heuristics-attn-fix-infini-just-embs/ruxr78xn/step-0000100-ckpt",
"from_scratch": false,
"saved_checkpoint_path": null,
"resume": false,
"train_file": "train.txt",
"val_file": "val.txt",
"tokenizer_path": "/raid/konstantin.dobler/checkpoints/culturax-ar-spbpe32k-smart-heuristics-attn-fix-infini-just-embs/ruxr78xn/step-0000100-ckpt",
"base_unit": "optimizer-steps",
"training_goal": 7680,
"eval_interval": 384,
"eval_samples": 10000,
"save_interval": 768,
"log_interval": 1,
"model_profiling_interval": 10,
"warmup_period": 76,
"lr_decay_period": 4608,
"lr_final_annealing_period": 1075,
"block_size": 4096,
"decontaminated_packing": true,
"max_lr": 3e-05,
"batch_size": 256,
"weight_decay": 0.05,
"beta1": 0.9,
"beta2": 0.95,
"grad_clip": 1.0,
"min_lr": 2e-06,
"infinite_lr": 1.5e-05,
"accelerator": "cuda",
"num_devices": 4,
"activation_checkpointing": false,
"distributed_strategy": "auto",
"use_fsdp": true,
"fsdp_sharding_strategy": "SHARD_GRAD_OP",
"fsdp_limit_all_gathers": false,
"fsdp_cpu_offload": false,
"fsdp_ram_friendly_checkpointing": false,
"fsdp_backward_prefetch_post": false,
"smart_cuda_alloc": false,
"fast_model_loading": true,
"micro_batch_size": 1,
"eval_micro_batch_size": 10,
"gradient_accumulation_steps": 64,
"precision": "bf16-true",
"use_anyprecision_adamw": false,
"adamw_foreach": true,
"compile": false,
"use_additional_flash_attn_kernels": true,
"workers": 8,
"preprocessing_workers": 224,
"run_name": "culturax-ar-spbpe32k-focus+justembs-attn-fix-infini",
"seed": 42,
"only_val": false,
"val_before_training": true,
"out_dir": "/raid/konstantin.dobler/checkpoints/culturax-ar-spbpe32k-focus+justembs-attn-fix-infini",
"wandb_tags": [],
"offline": false,
"debug": false,
"model_profiling": true,
"force_deterministic": false,
"fast_dev_run": false,
"cross_tokenizer_val": false,
"optimized_activation_checkpointing_policy": false,
"train_embeddings": false,
"train_only_embeddings": false,
"focus_init": false,
"refocus_init": false,
"mean_init": false,
"random_init": false,
"zipf_init": false,
"smart_heuristic_init": false,
"wechsel_init": false,
"deepfocus_init": false,
"zett_init": false,
"focus_fasttext_dim": 300,
"focus_fasttext_epochs": 3,
"focus_fasttext_min_count": 10,
"focus_auxiliary_mode": "fasttext-tokenlevel",
"focus_fasttext_model_path": null,
"focus_exact_match_all": false,
"focus_match_symbols": false,
"focus_bilingual_dict": null,
"focus_bilingual_dict_mode": "mean",
"focus_fuzzy_match_all": false,
"focus_random_init_source": "source",
"lora_r": 8,
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_query": false,
"lora_key": false,
"lora_value": false,
"lora_projection": false,
"lora_mlp": false,
"lora_head": false,
"perf_benchmark": false
}
- Downloads last month
- 19
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.