Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

README.md +7 -4
adapter_config.json +2 -2
adapter_model.safetensors +1 -1
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
tokenizer_config.json +1 -1
trainer_state.json +1142 -0
training_args.bin +3 -0

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-library_name: transformers
-tags: []
 ---
 # Model Card for Model ID
@@ -15,7 +15,7 @@ tags: []
 <!-- Provide a longer summary of what this model is. -->
-This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated.
 - **Developed by:** [More Information Needed]
 - **Funded by [optional]:** [More Information Needed]
@@ -196,4 +196,7 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
 ## Model Card Contact
-[More Information Needed]

 ---
+base_model: meta-llama/Llama-2-7b-hf
+library_name: peft
 ---
 # Model Card for Model ID
 <!-- Provide a longer summary of what this model is. -->
 - **Developed by:** [More Information Needed]
 - **Funded by [optional]:** [More Information Needed]
 ## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.14.0

adapter_config.json CHANGED Viewed

@@ -16,8 +16,8 @@
   "revision": null,
   "scaling": 64.0,
   "target_modules": [
-    "q_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

   "revision": null,
   "scaling": 64.0,
   "target_modules": [
+    "v_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:74416b783a361c9e37331d56c58c581071c3d44b5f7fe6b6c629b0ea2a18d0c0
 size 38408880

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d69bc4c97b1766bc7558b33bbac997e4df8578d70bfcb47f300c56ee94ab771
 size 38408880

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d3058638990c9146c6a757db139f5e8ca3ab6d1f6d59d3e4741ebcbc65fe68a
+size 76854010

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4dc99f71a36fe65bf6a5e136aabd4a8cacdb3328a64f5d463f6de79f65a09eba
+size 14180

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c27ec41164777d5a60d46ff725d04fa16ace6e4584569a815a0c88eecdd1c934
+size 1064

tokenizer_config.json CHANGED Viewed

@@ -33,7 +33,7 @@
   "eos_token": "</s>",
   "extra_special_tokens": {},
   "legacy": false,
-  "model_max_length": 4096,
   "pad_token": "<unk>",
   "padding_side": "right",
   "sp_model_kwargs": {},

   "eos_token": "</s>",
   "extra_special_tokens": {},
   "legacy": false,
+  "model_max_length": 2048,
   "pad_token": "<unk>",
   "padding_side": "right",
   "sp_model_kwargs": {},

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1142 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 5,
+  "global_step": 7764,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.019319938176197836,
+      "grad_norm": 0.0015910037327557802,
+      "learning_rate": 0.000993560020607934,
+      "loss": 0.9793,
+      "step": 50
+    },
+    {
+      "epoch": 0.03863987635239567,
+      "grad_norm": 0.0015677690971642733,
+      "learning_rate": 0.0009871200412158681,
+      "loss": 0.9245,
+      "step": 100
+    },
+    {
+      "epoch": 0.05795981452859351,
+      "grad_norm": 0.0019664347637444735,
+      "learning_rate": 0.0009806800618238022,
+      "loss": 0.9161,
+      "step": 150
+    },
+    {
+      "epoch": 0.07727975270479134,
+      "grad_norm": 0.0017669295193627477,
+      "learning_rate": 0.0009742400824317363,
+      "loss": 0.8992,
+      "step": 200
+    },
+    {
+      "epoch": 0.09659969088098919,
+      "grad_norm": 0.0014395161997526884,
+      "learning_rate": 0.0009678001030396702,
+      "loss": 0.903,
+      "step": 250
+    },
+    {
+      "epoch": 0.11591962905718702,
+      "grad_norm": 0.0019409521482884884,
+      "learning_rate": 0.0009613601236476044,
+      "loss": 0.9156,
+      "step": 300
+    },
+    {
+      "epoch": 0.13523956723338484,
+      "grad_norm": 0.0017209590878337622,
+      "learning_rate": 0.0009549201442555384,
+      "loss": 0.8962,
+      "step": 350
+    },
+    {
+      "epoch": 0.1545595054095827,
+      "grad_norm": 0.0018070234218612313,
+      "learning_rate": 0.0009484801648634724,
+      "loss": 0.901,
+      "step": 400
+    },
+    {
+      "epoch": 0.17387944358578053,
+      "grad_norm": 0.0015913312090560794,
+      "learning_rate": 0.0009420401854714065,
+      "loss": 0.8757,
+      "step": 450
+    },
+    {
+      "epoch": 0.19319938176197837,
+      "grad_norm": 0.001516812015324831,
+      "learning_rate": 0.0009356002060793406,
+      "loss": 0.9066,
+      "step": 500
+    },
+    {
+      "epoch": 0.2125193199381762,
+      "grad_norm": 0.0024989296216517687,
+      "learning_rate": 0.0009291602266872746,
+      "loss": 0.9071,
+      "step": 550
+    },
+    {
+      "epoch": 0.23183925811437403,
+      "grad_norm": 0.0012714399490505457,
+      "learning_rate": 0.0009227202472952086,
+      "loss": 0.9258,
+      "step": 600
+    },
+    {
+      "epoch": 0.2511591962905719,
+      "grad_norm": 0.0011651200475171208,
+      "learning_rate": 0.0009162802679031428,
+      "loss": 0.8831,
+      "step": 650
+    },
+    {
+      "epoch": 0.2704791344667697,
+      "grad_norm": 0.001427617622539401,
+      "learning_rate": 0.0009098402885110768,
+      "loss": 0.8902,
+      "step": 700
+    },
+    {
+      "epoch": 0.28979907264296756,
+      "grad_norm": 0.0013865921646356583,
+      "learning_rate": 0.0009034003091190108,
+      "loss": 0.8841,
+      "step": 750
+    },
+    {
+      "epoch": 0.3091190108191654,
+      "grad_norm": 0.0013833673438057303,
+      "learning_rate": 0.0008969603297269449,
+      "loss": 0.8972,
+      "step": 800
+    },
+    {
+      "epoch": 0.3284389489953632,
+      "grad_norm": 0.0014730911934748292,
+      "learning_rate": 0.000890520350334879,
+      "loss": 0.9071,
+      "step": 850
+    },
+    {
+      "epoch": 0.34775888717156106,
+      "grad_norm": 0.0017960412660613656,
+      "learning_rate": 0.000884080370942813,
+      "loss": 0.8876,
+      "step": 900
+    },
+    {
+      "epoch": 0.3670788253477589,
+      "grad_norm": 0.0018052643863484263,
+      "learning_rate": 0.0008776403915507471,
+      "loss": 0.8858,
+      "step": 950
+    },
+    {
+      "epoch": 0.38639876352395675,
+      "grad_norm": 0.0014563511358574033,
+      "learning_rate": 0.000871200412158681,
+      "loss": 0.8958,
+      "step": 1000
+    },
+    {
+      "epoch": 0.40571870170015456,
+      "grad_norm": 0.0019182608230039477,
+      "learning_rate": 0.0008647604327666152,
+      "loss": 0.8933,
+      "step": 1050
+    },
+    {
+      "epoch": 0.4250386398763524,
+      "grad_norm": 0.001481884391978383,
+      "learning_rate": 0.0008583204533745493,
+      "loss": 0.868,
+      "step": 1100
+    },
+    {
+      "epoch": 0.44435857805255025,
+      "grad_norm": 0.0013063091319054365,
+      "learning_rate": 0.0008518804739824832,
+      "loss": 0.8674,
+      "step": 1150
+    },
+    {
+      "epoch": 0.46367851622874806,
+      "grad_norm": 0.0016475298907607794,
+      "learning_rate": 0.0008454404945904173,
+      "loss": 0.8878,
+      "step": 1200
+    },
+    {
+      "epoch": 0.48299845440494593,
+      "grad_norm": 0.001497351098805666,
+      "learning_rate": 0.0008390005151983514,
+      "loss": 0.8949,
+      "step": 1250
+    },
+    {
+      "epoch": 0.5023183925811437,
+      "grad_norm": 0.0014308547833934426,
+      "learning_rate": 0.0008325605358062854,
+      "loss": 0.8982,
+      "step": 1300
+    },
+    {
+      "epoch": 0.5216383307573416,
+      "grad_norm": 0.0016038663452491164,
+      "learning_rate": 0.0008261205564142195,
+      "loss": 0.9057,
+      "step": 1350
+    },
+    {
+      "epoch": 0.5409582689335394,
+      "grad_norm": 0.001774997217580676,
+      "learning_rate": 0.0008196805770221536,
+      "loss": 0.8695,
+      "step": 1400
+    },
+    {
+      "epoch": 0.5602782071097373,
+      "grad_norm": 0.0019299176055938005,
+      "learning_rate": 0.0008132405976300876,
+      "loss": 0.8832,
+      "step": 1450
+    },
+    {
+      "epoch": 0.5795981452859351,
+      "grad_norm": 0.0020758837927132845,
+      "learning_rate": 0.0008068006182380216,
+      "loss": 0.892,
+      "step": 1500
+    },
+    {
+      "epoch": 0.5989180834621329,
+      "grad_norm": 0.0016486513195559382,
+      "learning_rate": 0.0008003606388459557,
+      "loss": 0.8869,
+      "step": 1550
+    },
+    {
+      "epoch": 0.6182380216383307,
+      "grad_norm": 0.0013964808313176036,
+      "learning_rate": 0.0007939206594538898,
+      "loss": 0.8946,
+      "step": 1600
+    },
+    {
+      "epoch": 0.6375579598145286,
+      "grad_norm": 0.0016684457659721375,
+      "learning_rate": 0.0007874806800618238,
+      "loss": 0.8884,
+      "step": 1650
+    },
+    {
+      "epoch": 0.6568778979907264,
+      "grad_norm": 0.0016556017799302936,
+      "learning_rate": 0.0007810407006697579,
+      "loss": 0.8877,
+      "step": 1700
+    },
+    {
+      "epoch": 0.6761978361669243,
+      "grad_norm": 0.002399856923148036,
+      "learning_rate": 0.0007746007212776918,
+      "loss": 0.9077,
+      "step": 1750
+    },
+    {
+      "epoch": 0.6955177743431221,
+      "grad_norm": 0.0016722239088267088,
+      "learning_rate": 0.000768160741885626,
+      "loss": 0.8986,
+      "step": 1800
+    },
+    {
+      "epoch": 0.7148377125193199,
+      "grad_norm": 0.001298186951316893,
+      "learning_rate": 0.0007617207624935601,
+      "loss": 0.8959,
+      "step": 1850
+    },
+    {
+      "epoch": 0.7341576506955177,
+      "grad_norm": 0.0012781182304024696,
+      "learning_rate": 0.000755280783101494,
+      "loss": 0.8788,
+      "step": 1900
+    },
+    {
+      "epoch": 0.7534775888717156,
+      "grad_norm": 0.001566325663588941,
+      "learning_rate": 0.0007488408037094282,
+      "loss": 0.8982,
+      "step": 1950
+    },
+    {
+      "epoch": 0.7727975270479135,
+      "grad_norm": 0.0017126682214438915,
+      "learning_rate": 0.0007424008243173622,
+      "loss": 0.8983,
+      "step": 2000
+    },
+    {
+      "epoch": 0.7921174652241113,
+      "grad_norm": 0.0013752984814345837,
+      "learning_rate": 0.0007359608449252962,
+      "loss": 0.8922,
+      "step": 2050
+    },
+    {
+      "epoch": 0.8114374034003091,
+      "grad_norm": 0.0020329777617007494,
+      "learning_rate": 0.0007295208655332303,
+      "loss": 0.8923,
+      "step": 2100
+    },
+    {
+      "epoch": 0.8307573415765069,
+      "grad_norm": 0.0018424971494823694,
+      "learning_rate": 0.0007230808861411644,
+      "loss": 0.8749,
+      "step": 2150
+    },
+    {
+      "epoch": 0.8500772797527048,
+      "grad_norm": 0.0019076282624155283,
+      "learning_rate": 0.0007166409067490984,
+      "loss": 0.8986,
+      "step": 2200
+    },
+    {
+      "epoch": 0.8693972179289027,
+      "grad_norm": 0.0014463101979345083,
+      "learning_rate": 0.0007102009273570324,
+      "loss": 0.8691,
+      "step": 2250
+    },
+    {
+      "epoch": 0.8887171561051005,
+      "grad_norm": 0.0014440215891227126,
+      "learning_rate": 0.0007037609479649665,
+      "loss": 0.8827,
+      "step": 2300
+    },
+    {
+      "epoch": 0.9080370942812983,
+      "grad_norm": 0.0016820535529404879,
+      "learning_rate": 0.0006973209685729006,
+      "loss": 0.8973,
+      "step": 2350
+    },
+    {
+      "epoch": 0.9273570324574961,
+      "grad_norm": 0.0013854255666956306,
+      "learning_rate": 0.0006908809891808346,
+      "loss": 0.8859,
+      "step": 2400
+    },
+    {
+      "epoch": 0.9466769706336939,
+      "grad_norm": 0.002300039865076542,
+      "learning_rate": 0.0006844410097887687,
+      "loss": 0.8829,
+      "step": 2450
+    },
+    {
+      "epoch": 0.9659969088098919,
+      "grad_norm": 0.0015681901713833213,
+      "learning_rate": 0.0006780010303967026,
+      "loss": 0.8909,
+      "step": 2500
+    },
+    {
+      "epoch": 0.9853168469860897,
+      "grad_norm": 0.0021285219117999077,
+      "learning_rate": 0.0006715610510046368,
+      "loss": 0.8852,
+      "step": 2550
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.8796888589859009,
+      "eval_runtime": 3690.6011,
+      "eval_samples_per_second": 2.805,
+      "eval_steps_per_second": 0.351,
+      "step": 2588
+    },
+    {
+      "epoch": 1.0046367851622875,
+      "grad_norm": 0.0018459343118593097,
+      "learning_rate": 0.0006651210716125709,
+      "loss": 0.9123,
+      "step": 2600
+    },
+    {
+      "epoch": 1.0239567233384854,
+      "grad_norm": 0.0020025072153657675,
+      "learning_rate": 0.0006586810922205048,
+      "loss": 0.8976,
+      "step": 2650
+    },
+    {
+      "epoch": 1.0432766615146831,
+      "grad_norm": 0.002290483098477125,
+      "learning_rate": 0.000652241112828439,
+      "loss": 0.8649,
+      "step": 2700
+    },
+    {
+      "epoch": 1.062596599690881,
+      "grad_norm": 0.0015446515753865242,
+      "learning_rate": 0.0006458011334363731,
+      "loss": 0.9013,
+      "step": 2750
+    },
+    {
+      "epoch": 1.0819165378670788,
+      "grad_norm": 0.0015362042468041182,
+      "learning_rate": 0.000639361154044307,
+      "loss": 0.881,
+      "step": 2800
+    },
+    {
+      "epoch": 1.1012364760432767,
+      "grad_norm": 0.0013964643003419042,
+      "learning_rate": 0.0006329211746522411,
+      "loss": 0.893,
+      "step": 2850
+    },
+    {
+      "epoch": 1.1205564142194744,
+      "grad_norm": 0.0014078960521146655,
+      "learning_rate": 0.0006264811952601752,
+      "loss": 0.8959,
+      "step": 2900
+    },
+    {
+      "epoch": 1.1398763523956723,
+      "grad_norm": 0.0012278002686798573,
+      "learning_rate": 0.0006200412158681092,
+      "loss": 0.8737,
+      "step": 2950
+    },
+    {
+      "epoch": 1.1591962905718702,
+      "grad_norm": 0.0016700943233445287,
+      "learning_rate": 0.0006136012364760433,
+      "loss": 0.8926,
+      "step": 3000
+    },
+    {
+      "epoch": 1.178516228748068,
+      "grad_norm": 0.0013488128315657377,
+      "learning_rate": 0.0006071612570839773,
+      "loss": 0.8822,
+      "step": 3050
+    },
+    {
+      "epoch": 1.1978361669242659,
+      "grad_norm": 0.0013352310052141547,
+      "learning_rate": 0.0006007212776919114,
+      "loss": 0.9086,
+      "step": 3100
+    },
+    {
+      "epoch": 1.2171561051004636,
+      "grad_norm": 0.0015964731574058533,
+      "learning_rate": 0.0005942812982998454,
+      "loss": 0.8726,
+      "step": 3150
+    },
+    {
+      "epoch": 1.2364760432766615,
+      "grad_norm": 0.0015036857221275568,
+      "learning_rate": 0.0005878413189077795,
+      "loss": 0.8858,
+      "step": 3200
+    },
+    {
+      "epoch": 1.2557959814528594,
+      "grad_norm": 0.0014933788916096091,
+      "learning_rate": 0.0005814013395157137,
+      "loss": 0.8669,
+      "step": 3250
+    },
+    {
+      "epoch": 1.2751159196290571,
+      "grad_norm": 0.0014979959232732654,
+      "learning_rate": 0.0005749613601236476,
+      "loss": 0.8581,
+      "step": 3300
+    },
+    {
+      "epoch": 1.294435857805255,
+      "grad_norm": 0.001521074096672237,
+      "learning_rate": 0.0005685213807315817,
+      "loss": 0.8876,
+      "step": 3350
+    },
+    {
+      "epoch": 1.3137557959814528,
+      "grad_norm": 0.0013812105171382427,
+      "learning_rate": 0.0005620814013395156,
+      "loss": 0.8759,
+      "step": 3400
+    },
+    {
+      "epoch": 1.3330757341576507,
+      "grad_norm": 0.001809781650081277,
+      "learning_rate": 0.0005556414219474498,
+      "loss": 0.8637,
+      "step": 3450
+    },
+    {
+      "epoch": 1.3523956723338486,
+      "grad_norm": 0.001580111333169043,
+      "learning_rate": 0.0005492014425553839,
+      "loss": 0.8653,
+      "step": 3500
+    },
+    {
+      "epoch": 1.3717156105100463,
+      "grad_norm": 0.002373141935095191,
+      "learning_rate": 0.0005427614631633179,
+      "loss": 0.8998,
+      "step": 3550
+    },
+    {
+      "epoch": 1.3910355486862442,
+      "grad_norm": 0.0015024031745269895,
+      "learning_rate": 0.0005363214837712519,
+      "loss": 0.876,
+      "step": 3600
+    },
+    {
+      "epoch": 1.410355486862442,
+      "grad_norm": 0.0019381038146093488,
+      "learning_rate": 0.000529881504379186,
+      "loss": 0.8792,
+      "step": 3650
+    },
+    {
+      "epoch": 1.4296754250386399,
+      "grad_norm": 0.0015559702878817916,
+      "learning_rate": 0.0005234415249871201,
+      "loss": 0.8655,
+      "step": 3700
+    },
+    {
+      "epoch": 1.4489953632148378,
+      "grad_norm": 0.0015072495443746448,
+      "learning_rate": 0.0005170015455950541,
+      "loss": 0.8691,
+      "step": 3750
+    },
+    {
+      "epoch": 1.4683153013910355,
+      "grad_norm": 0.0016321117291226983,
+      "learning_rate": 0.0005105615662029882,
+      "loss": 0.8899,
+      "step": 3800
+    },
+    {
+      "epoch": 1.4876352395672334,
+      "grad_norm": 0.001877523958683014,
+      "learning_rate": 0.0005041215868109223,
+      "loss": 0.8605,
+      "step": 3850
+    },
+    {
+      "epoch": 1.5069551777434311,
+      "grad_norm": 0.0015456199180334806,
+      "learning_rate": 0.0004976816074188562,
+      "loss": 0.8619,
+      "step": 3900
+    },
+    {
+      "epoch": 1.526275115919629,
+      "grad_norm": 0.001545245642773807,
+      "learning_rate": 0.0004912416280267903,
+      "loss": 0.8807,
+      "step": 3950
+    },
+    {
+      "epoch": 1.545595054095827,
+      "grad_norm": 0.0014278549933806062,
+      "learning_rate": 0.00048480164863472436,
+      "loss": 0.8929,
+      "step": 4000
+    },
+    {
+      "epoch": 1.5649149922720247,
+      "grad_norm": 0.0016315317479893565,
+      "learning_rate": 0.0004783616692426584,
+      "loss": 0.8707,
+      "step": 4050
+    },
+    {
+      "epoch": 1.5842349304482226,
+      "grad_norm": 0.0015835491940379143,
+      "learning_rate": 0.0004719216898505925,
+      "loss": 0.8852,
+      "step": 4100
+    },
+    {
+      "epoch": 1.6035548686244203,
+      "grad_norm": 0.001612671185284853,
+      "learning_rate": 0.00046548171045852655,
+      "loss": 0.8796,
+      "step": 4150
+    },
+    {
+      "epoch": 1.6228748068006182,
+      "grad_norm": 0.0013706408208236098,
+      "learning_rate": 0.00045904173106646055,
+      "loss": 0.8708,
+      "step": 4200
+    },
+    {
+      "epoch": 1.6421947449768162,
+      "grad_norm": 0.0015082815662026405,
+      "learning_rate": 0.00045260175167439467,
+      "loss": 0.8963,
+      "step": 4250
+    },
+    {
+      "epoch": 1.6615146831530139,
+      "grad_norm": 0.0013959509087726474,
+      "learning_rate": 0.0004461617722823287,
+      "loss": 0.8721,
+      "step": 4300
+    },
+    {
+      "epoch": 1.6808346213292118,
+      "grad_norm": 0.0012363146524876356,
+      "learning_rate": 0.00043972179289026274,
+      "loss": 0.8722,
+      "step": 4350
+    },
+    {
+      "epoch": 1.7001545595054095,
+      "grad_norm": 0.0014632450183853507,
+      "learning_rate": 0.00043328181349819685,
+      "loss": 0.8818,
+      "step": 4400
+    },
+    {
+      "epoch": 1.7194744976816074,
+      "grad_norm": 0.0014374173479154706,
+      "learning_rate": 0.00042684183410613086,
+      "loss": 0.8841,
+      "step": 4450
+    },
+    {
+      "epoch": 1.7387944358578054,
+      "grad_norm": 0.0019015870057046413,
+      "learning_rate": 0.0004204018547140649,
+      "loss": 0.8885,
+      "step": 4500
+    },
+    {
+      "epoch": 1.758114374034003,
+      "grad_norm": 0.0018965965136885643,
+      "learning_rate": 0.000413961875321999,
+      "loss": 0.875,
+      "step": 4550
+    },
+    {
+      "epoch": 1.7774343122102008,
+      "grad_norm": 0.001437367289327085,
+      "learning_rate": 0.00040752189592993304,
+      "loss": 0.8618,
+      "step": 4600
+    },
+    {
+      "epoch": 1.7967542503863987,
+      "grad_norm": 0.0015649065608158708,
+      "learning_rate": 0.00040108191653786705,
+      "loss": 0.8914,
+      "step": 4650
+    },
+    {
+      "epoch": 1.8160741885625966,
+      "grad_norm": 0.001809162669815123,
+      "learning_rate": 0.00039464193714580116,
+      "loss": 0.8769,
+      "step": 4700
+    },
+    {
+      "epoch": 1.8353941267387945,
+      "grad_norm": 0.001575763919390738,
+      "learning_rate": 0.00038820195775373517,
+      "loss": 0.8873,
+      "step": 4750
+    },
+    {
+      "epoch": 1.8547140649149922,
+      "grad_norm": 0.0015713346656411886,
+      "learning_rate": 0.00038176197836166923,
+      "loss": 0.8689,
+      "step": 4800
+    },
+    {
+      "epoch": 1.87403400309119,
+      "grad_norm": 0.001345846801996231,
+      "learning_rate": 0.0003753219989696033,
+      "loss": 0.8759,
+      "step": 4850
+    },
+    {
+      "epoch": 1.8933539412673879,
+      "grad_norm": 0.001627634628675878,
+      "learning_rate": 0.00036888201957753735,
+      "loss": 0.8908,
+      "step": 4900
+    },
+    {
+      "epoch": 1.9126738794435858,
+      "grad_norm": 0.001344445045106113,
+      "learning_rate": 0.00036244204018547136,
+      "loss": 0.8815,
+      "step": 4950
+    },
+    {
+      "epoch": 1.9319938176197837,
+      "grad_norm": 0.0016686639282852411,
+      "learning_rate": 0.0003560020607934055,
+      "loss": 0.8668,
+      "step": 5000
+    },
+    {
+      "epoch": 1.9513137557959814,
+      "grad_norm": 0.0016382921021431684,
+      "learning_rate": 0.00034956208140133954,
+      "loss": 0.8905,
+      "step": 5050
+    },
+    {
+      "epoch": 1.9706336939721791,
+      "grad_norm": 0.001502548111602664,
+      "learning_rate": 0.0003431221020092736,
+      "loss": 0.8867,
+      "step": 5100
+    },
+    {
+      "epoch": 1.989953632148377,
+      "grad_norm": 0.001673164777457714,
+      "learning_rate": 0.00033668212261720766,
+      "loss": 0.8597,
+      "step": 5150
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.8747333288192749,
+      "eval_runtime": 3876.6431,
+      "eval_samples_per_second": 2.67,
+      "eval_steps_per_second": 0.334,
+      "step": 5176
+    },
+    {
+      "epoch": 2.009273570324575,
+      "grad_norm": 0.0014372485456988215,
+      "learning_rate": 0.00033024214322514167,
+      "loss": 0.8778,
+      "step": 5200
+    },
+    {
+      "epoch": 2.028593508500773,
+      "grad_norm": 0.0013220058754086494,
+      "learning_rate": 0.0003238021638330758,
+      "loss": 0.8707,
+      "step": 5250
+    },
+    {
+      "epoch": 2.047913446676971,
+      "grad_norm": 0.0018379129469394684,
+      "learning_rate": 0.0003173621844410098,
+      "loss": 0.8866,
+      "step": 5300
+    },
+    {
+      "epoch": 2.0672333848531683,
+      "grad_norm": 0.0014872249448671937,
+      "learning_rate": 0.00031092220504894385,
+      "loss": 0.8757,
+      "step": 5350
+    },
+    {
+      "epoch": 2.0865533230293662,
+      "grad_norm": 0.0017716609872877598,
+      "learning_rate": 0.0003044822256568779,
+      "loss": 0.8647,
+      "step": 5400
+    },
+    {
+      "epoch": 2.105873261205564,
+      "grad_norm": 0.0014926039148122072,
+      "learning_rate": 0.00029804224626481197,
+      "loss": 0.8656,
+      "step": 5450
+    },
+    {
+      "epoch": 2.125193199381762,
+      "grad_norm": 0.0015732580795884132,
+      "learning_rate": 0.000291602266872746,
+      "loss": 0.8848,
+      "step": 5500
+    },
+    {
+      "epoch": 2.1445131375579596,
+      "grad_norm": 0.0016066147945821285,
+      "learning_rate": 0.0002851622874806801,
+      "loss": 0.8922,
+      "step": 5550
+    },
+    {
+      "epoch": 2.1638330757341575,
+      "grad_norm": 0.0017319379840046167,
+      "learning_rate": 0.0002787223080886141,
+      "loss": 0.8686,
+      "step": 5600
+    },
+    {
+      "epoch": 2.1831530139103554,
+      "grad_norm": 0.001960605848580599,
+      "learning_rate": 0.00027228232869654816,
+      "loss": 0.8896,
+      "step": 5650
+    },
+    {
+      "epoch": 2.2024729520865534,
+      "grad_norm": 0.0017535175429657102,
+      "learning_rate": 0.0002658423493044823,
+      "loss": 0.8898,
+      "step": 5700
+    },
+    {
+      "epoch": 2.2217928902627513,
+      "grad_norm": 0.0013692132197320461,
+      "learning_rate": 0.0002594023699124163,
+      "loss": 0.87,
+      "step": 5750
+    },
+    {
+      "epoch": 2.2411128284389488,
+      "grad_norm": 0.0012175439624115825,
+      "learning_rate": 0.00025296239052035035,
+      "loss": 0.8833,
+      "step": 5800
+    },
+    {
+      "epoch": 2.2604327666151467,
+      "grad_norm": 0.0014353194274008274,
+      "learning_rate": 0.0002465224111282844,
+      "loss": 0.8785,
+      "step": 5850
+    },
+    {
+      "epoch": 2.2797527047913446,
+      "grad_norm": 0.0014904884155839682,
+      "learning_rate": 0.00024008243173621844,
+      "loss": 0.8827,
+      "step": 5900
+    },
+    {
+      "epoch": 2.2990726429675425,
+      "grad_norm": 0.0015136194415390491,
+      "learning_rate": 0.00023364245234415253,
+      "loss": 0.8813,
+      "step": 5950
+    },
+    {
+      "epoch": 2.3183925811437405,
+      "grad_norm": 0.0021759923547506332,
+      "learning_rate": 0.00022720247295208656,
+      "loss": 0.8712,
+      "step": 6000
+    },
+    {
+      "epoch": 2.337712519319938,
+      "grad_norm": 0.0012809019535779953,
+      "learning_rate": 0.00022076249356002062,
+      "loss": 0.8643,
+      "step": 6050
+    },
+    {
+      "epoch": 2.357032457496136,
+      "grad_norm": 0.0013298860285431147,
+      "learning_rate": 0.00021432251416795469,
+      "loss": 0.8854,
+      "step": 6100
+    },
+    {
+      "epoch": 2.376352395672334,
+      "grad_norm": 0.0017576662357896566,
+      "learning_rate": 0.00020788253477588872,
+      "loss": 0.8719,
+      "step": 6150
+    },
+    {
+      "epoch": 2.3956723338485317,
+      "grad_norm": 0.0016253705834969878,
+      "learning_rate": 0.00020144255538382278,
+      "loss": 0.8708,
+      "step": 6200
+    },
+    {
+      "epoch": 2.4149922720247297,
+      "grad_norm": 0.0015435911482200027,
+      "learning_rate": 0.00019500257599175684,
+      "loss": 0.854,
+      "step": 6250
+    },
+    {
+      "epoch": 2.434312210200927,
+      "grad_norm": 0.0013563215034082532,
+      "learning_rate": 0.00018856259659969088,
+      "loss": 0.8565,
+      "step": 6300
+    },
+    {
+      "epoch": 2.453632148377125,
+      "grad_norm": 0.0019695733208209276,
+      "learning_rate": 0.00018212261720762494,
+      "loss": 0.8587,
+      "step": 6350
+    },
+    {
+      "epoch": 2.472952086553323,
+      "grad_norm": 0.001503878622315824,
+      "learning_rate": 0.000175682637815559,
+      "loss": 0.8847,
+      "step": 6400
+    },
+    {
+      "epoch": 2.492272024729521,
+      "grad_norm": 0.0018463142914697528,
+      "learning_rate": 0.00016924265842349303,
+      "loss": 0.8865,
+      "step": 6450
+    },
+    {
+      "epoch": 2.511591962905719,
+      "grad_norm": 0.0013368335785344243,
+      "learning_rate": 0.0001628026790314271,
+      "loss": 0.8658,
+      "step": 6500
+    },
+    {
+      "epoch": 2.5309119010819163,
+      "grad_norm": 0.001461292733438313,
+      "learning_rate": 0.00015636269963936115,
+      "loss": 0.8719,
+      "step": 6550
+    },
+    {
+      "epoch": 2.5502318392581143,
+      "grad_norm": 0.0017376808682456613,
+      "learning_rate": 0.00014992272024729521,
+      "loss": 0.8645,
+      "step": 6600
+    },
+    {
+      "epoch": 2.569551777434312,
+      "grad_norm": 0.001536277704872191,
+      "learning_rate": 0.00014348274085522928,
+      "loss": 0.8839,
+      "step": 6650
+    },
+    {
+      "epoch": 2.58887171561051,
+      "grad_norm": 0.0020096334628760815,
+      "learning_rate": 0.00013704276146316334,
+      "loss": 0.8809,
+      "step": 6700
+    },
+    {
+      "epoch": 2.608191653786708,
+      "grad_norm": 0.002817530184984207,
+      "learning_rate": 0.00013060278207109737,
+      "loss": 0.8643,
+      "step": 6750
+    },
+    {
+      "epoch": 2.6275115919629055,
+      "grad_norm": 0.002030319534242153,
+      "learning_rate": 0.00012416280267903143,
+      "loss": 0.8713,
+      "step": 6800
+    },
+    {
+      "epoch": 2.6468315301391034,
+      "grad_norm": 0.0015792109770700336,
+      "learning_rate": 0.00011772282328696548,
+      "loss": 0.874,
+      "step": 6850
+    },
+    {
+      "epoch": 2.6661514683153014,
+      "grad_norm": 0.0016106871189549565,
+      "learning_rate": 0.00011128284389489954,
+      "loss": 0.8904,
+      "step": 6900
+    },
+    {
+      "epoch": 2.6854714064914993,
+      "grad_norm": 0.001518415636382997,
+      "learning_rate": 0.00010484286450283359,
+      "loss": 0.8889,
+      "step": 6950
+    },
+    {
+      "epoch": 2.704791344667697,
+      "grad_norm": 0.0014989189803600311,
+      "learning_rate": 9.840288511076765e-05,
+      "loss": 0.8765,
+      "step": 7000
+    },
+    {
+      "epoch": 2.7241112828438947,
+      "grad_norm": 0.0017810104181990027,
+      "learning_rate": 9.196290571870171e-05,
+      "loss": 0.8768,
+      "step": 7050
+    },
+    {
+      "epoch": 2.7434312210200926,
+      "grad_norm": 0.0017641150625422597,
+      "learning_rate": 8.552292632663576e-05,
+      "loss": 0.8839,
+      "step": 7100
+    },
+    {
+      "epoch": 2.7627511591962906,
+      "grad_norm": 0.001232657814398408,
+      "learning_rate": 7.908294693456982e-05,
+      "loss": 0.8833,
+      "step": 7150
+    },
+    {
+      "epoch": 2.7820710973724885,
+      "grad_norm": 0.0017207327764481306,
+      "learning_rate": 7.264296754250387e-05,
+      "loss": 0.869,
+      "step": 7200
+    },
+    {
+      "epoch": 2.8013910355486864,
+      "grad_norm": 0.0015914075775071979,
+      "learning_rate": 6.620298815043791e-05,
+      "loss": 0.8774,
+      "step": 7250
+    },
+    {
+      "epoch": 2.820710973724884,
+      "grad_norm": 0.0015985453501343727,
+      "learning_rate": 5.9763008758371975e-05,
+      "loss": 0.8753,
+      "step": 7300
+    },
+    {
+      "epoch": 2.840030911901082,
+      "grad_norm": 0.0014812527224421501,
+      "learning_rate": 5.332302936630603e-05,
+      "loss": 0.8882,
+      "step": 7350
+    },
+    {
+      "epoch": 2.8593508500772797,
+      "grad_norm": 0.0014649786753579974,
+      "learning_rate": 4.688304997424008e-05,
+      "loss": 0.8787,
+      "step": 7400
+    },
+    {
+      "epoch": 2.8786707882534777,
+      "grad_norm": 0.0014871322782710195,
+      "learning_rate": 4.044307058217414e-05,
+      "loss": 0.8795,
+      "step": 7450
+    },
+    {
+      "epoch": 2.8979907264296756,
+      "grad_norm": 0.0015188547549769282,
+      "learning_rate": 3.400309119010819e-05,
+      "loss": 0.88,
+      "step": 7500
+    },
+    {
+      "epoch": 2.917310664605873,
+      "grad_norm": 0.001243911450728774,
+      "learning_rate": 2.7563111798042246e-05,
+      "loss": 0.8755,
+      "step": 7550
+    },
+    {
+      "epoch": 2.936630602782071,
+      "grad_norm": 0.0015768723096698523,
+      "learning_rate": 2.11231324059763e-05,
+      "loss": 0.864,
+      "step": 7600
+    },
+    {
+      "epoch": 2.955950540958269,
+      "grad_norm": 0.0015496944542974234,
+      "learning_rate": 1.4683153013910355e-05,
+      "loss": 0.8697,
+      "step": 7650
+    },
+    {
+      "epoch": 2.975270479134467,
+      "grad_norm": 0.0015128754312172532,
+      "learning_rate": 8.24317362184441e-06,
+      "loss": 0.8702,
+      "step": 7700
+    },
+    {
+      "epoch": 2.9945904173106648,
+      "grad_norm": 0.0014746102970093489,
+      "learning_rate": 1.8031942297784647e-06,
+      "loss": 0.8747,
+      "step": 7750
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.8734365105628967,
+      "eval_runtime": 3266.5567,
+      "eval_samples_per_second": 3.169,
+      "eval_steps_per_second": 0.396,
+      "step": 7764
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 7764,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 50,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.570388270726652e+18,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fc87615fe03c7c0b66573781d8506f56007b5374b1204db84fc867a42df5272
+size 5560