samikhan121 commited on Apr 3, 2024

Commit

b46fbf4

verified ·

1 Parent(s): 2fb8d8e

trained on indicmarco for 2 epochs

Browse files

Files changed (19) hide show

checkpoint-4000/config.json +29 -0
checkpoint-4000/optimizer.pt +3 -0
checkpoint-4000/pytorch_model.bin +3 -0
checkpoint-4000/rng_state.pth +3 -0
checkpoint-4000/scaler.pt +3 -0
checkpoint-4000/scheduler.pt +3 -0
checkpoint-4000/special_tokens_map.json +1 -0
checkpoint-4000/spiece.model +3 -0
checkpoint-4000/tokenizer.json +0 -0
checkpoint-4000/tokenizer_config.json +1 -0
checkpoint-4000/trainer_state.json +264 -0
checkpoint-4000/training_args.bin +3 -0
config.json +29 -0
pytorch_model.bin +3 -0
special_tokens_map.json +1 -0
spiece.model +3 -0
tokenizer.json +0 -0
tokenizer_config.json +1 -0
training_args.bin +3 -0

checkpoint-4000/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_name_or_path": "csebuetnlp/banglat5",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "gradient_checkpointing": false,
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_num_buckets": 32,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.16.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}

checkpoint-4000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fcfb6be105945579dfc4dc3f59f6a8855baf8a1cd6c686a8534db9ea254db5c
+size 1980788441

checkpoint-4000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f7459856ae487651345537ffc64109198fbba2eb58b1a21b1e785478cc80586
+size 990438349

checkpoint-4000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a956747b430cb60db8a0fb636f8cdadaf3a232a106344911fff66a6d543d067d
+size 14503

checkpoint-4000/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:065f8f26cceeb1f27fd509c28355e76259811bd58ce56cd862d4f918dd8697b9
+size 559

checkpoint-4000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a3235bd7b0de3314aa893f2f2f39d61f825435aadbdb8dbe0bc333f10ce1beb
+size 623

checkpoint-4000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}

checkpoint-4000/spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7dcab96935a2a51b1461c84e44c952ea8a3640c8bc3e2c6ae7a21d855454ae27
+size 1111492

checkpoint-4000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-4000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "model_max_length": 512, "special_tokens_map_file": "/home/cse/.cache/huggingface/transformers/ff0ed476d41a6f336fa52bd906c6c8f0a8684fe67bec634b201ed2d24331c915.c94798918c92ded6aeef2d2f0e666d2cc4145eca1aa6e1336fde07f2e13e2f46", "name_or_path": "csebuetnlp/banglat5", "sp_model_kwargs": {}, "tokenizer_class": "T5Tokenizer"}

checkpoint-4000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,264 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.925743170056565,
+  "global_step": 4000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05,
+      "learning_rate": 9.759268175252769e-05,
+      "loss": 9.8702,
+      "step": 100
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 9.518536350505537e-05,
+      "loss": 5.0407,
+      "step": 200
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 9.277804525758305e-05,
+      "loss": 3.9664,
+      "step": 300
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 9.037072701011074e-05,
+      "loss": 3.6556,
+      "step": 400
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 8.796340876263843e-05,
+      "loss": 3.4608,
+      "step": 500
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 8.555609051516611e-05,
+      "loss": 3.3129,
+      "step": 600
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 8.31487722676938e-05,
+      "loss": 3.2072,
+      "step": 700
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 8.074145402022148e-05,
+      "loss": 3.1383,
+      "step": 800
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 7.833413577274916e-05,
+      "loss": 3.0606,
+      "step": 900
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 7.592681752527685e-05,
+      "loss": 2.9994,
+      "step": 1000
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 7.351949927780452e-05,
+      "loss": 2.9714,
+      "step": 1100
+    },
+    {
+      "epoch": 0.58,
+      "learning_rate": 7.111218103033221e-05,
+      "loss": 2.9432,
+      "step": 1200
+    },
+    {
+      "epoch": 0.63,
+      "learning_rate": 6.87048627828599e-05,
+      "loss": 2.8978,
+      "step": 1300
+    },
+    {
+      "epoch": 0.67,
+      "learning_rate": 6.629754453538758e-05,
+      "loss": 2.8998,
+      "step": 1400
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 6.389022628791527e-05,
+      "loss": 2.8704,
+      "step": 1500
+    },
+    {
+      "epoch": 0.77,
+      "learning_rate": 6.148290804044296e-05,
+      "loss": 2.8642,
+      "step": 1600
+    },
+    {
+      "epoch": 0.82,
+      "learning_rate": 5.907558979297063e-05,
+      "loss": 2.832,
+      "step": 1700
+    },
+    {
+      "epoch": 0.87,
+      "learning_rate": 5.666827154549832e-05,
+      "loss": 2.8027,
+      "step": 1800
+    },
+    {
+      "epoch": 0.91,
+      "learning_rate": 5.426095329802601e-05,
+      "loss": 2.7968,
+      "step": 1900
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 5.185363505055368e-05,
+      "loss": 2.7954,
+      "step": 2000
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 2.46354079246521,
+      "eval_runtime": 2.8271,
+      "eval_samples_per_second": 353.714,
+      "eval_steps_per_second": 44.214,
+      "step": 2077
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.9446316803081375e-05,
+      "loss": 2.7673,
+      "step": 2100
+    },
+    {
+      "epoch": 1.06,
+      "learning_rate": 4.7038998555609055e-05,
+      "loss": 2.7401,
+      "step": 2200
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 4.4631680308136736e-05,
+      "loss": 2.7456,
+      "step": 2300
+    },
+    {
+      "epoch": 1.16,
+      "learning_rate": 4.222436206066442e-05,
+      "loss": 2.742,
+      "step": 2400
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 3.98170438131921e-05,
+      "loss": 2.7218,
+      "step": 2500
+    },
+    {
+      "epoch": 1.25,
+      "learning_rate": 3.740972556571979e-05,
+      "loss": 2.7248,
+      "step": 2600
+    },
+    {
+      "epoch": 1.3,
+      "learning_rate": 3.500240731824748e-05,
+      "loss": 2.7145,
+      "step": 2700
+    },
+    {
+      "epoch": 1.35,
+      "learning_rate": 3.259508907077516e-05,
+      "loss": 2.7046,
+      "step": 2800
+    },
+    {
+      "epoch": 1.4,
+      "learning_rate": 3.0187770823302842e-05,
+      "loss": 2.7095,
+      "step": 2900
+    },
+    {
+      "epoch": 1.44,
+      "learning_rate": 2.7780452575830522e-05,
+      "loss": 2.7005,
+      "step": 3000
+    },
+    {
+      "epoch": 1.49,
+      "learning_rate": 2.537313432835821e-05,
+      "loss": 2.6941,
+      "step": 3100
+    },
+    {
+      "epoch": 1.54,
+      "learning_rate": 2.2965816080885893e-05,
+      "loss": 2.6727,
+      "step": 3200
+    },
+    {
+      "epoch": 1.59,
+      "learning_rate": 2.0558497833413577e-05,
+      "loss": 2.6959,
+      "step": 3300
+    },
+    {
+      "epoch": 1.64,
+      "learning_rate": 1.8151179585941264e-05,
+      "loss": 2.6839,
+      "step": 3400
+    },
+    {
+      "epoch": 1.69,
+      "learning_rate": 1.5743861338468945e-05,
+      "loss": 2.6781,
+      "step": 3500
+    },
+    {
+      "epoch": 1.73,
+      "learning_rate": 1.333654309099663e-05,
+      "loss": 2.6732,
+      "step": 3600
+    },
+    {
+      "epoch": 1.78,
+      "learning_rate": 1.0929224843524314e-05,
+      "loss": 2.6776,
+      "step": 3700
+    },
+    {
+      "epoch": 1.83,
+      "learning_rate": 8.521906596051998e-06,
+      "loss": 2.673,
+      "step": 3800
+    },
+    {
+      "epoch": 1.88,
+      "learning_rate": 6.114588348579683e-06,
+      "loss": 2.6832,
+      "step": 3900
+    },
+    {
+      "epoch": 1.93,
+      "learning_rate": 3.7072701011073664e-06,
+      "loss": 2.6659,
+      "step": 4000
+    }
+  ],
+  "max_steps": 4154,
+  "num_train_epochs": 2,
+  "total_flos": 2.1297995361473126e+17,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-4000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af64407701951f621066c4162ac83b5a15d5e53e88b725b8747fc592fe12d5b0
+size 3183

config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_name_or_path": "csebuetnlp/banglat5",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "gradient_checkpointing": false,
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_num_buckets": 32,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.16.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0bda1dd771e10a900459c5bf7507b689da168d85cbf0c303643ec3d0b79611e7
+size 990438349

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@

spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7dcab96935a2a51b1461c84e44c952ea8a3640c8bc3e2c6ae7a21d855454ae27
+size 1111492

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af64407701951f621066c4162ac83b5a15d5e53e88b725b8747fc592fe12d5b0
+size 3183