Update model files

Browse files

Files changed (14) hide show

arc_challenge.json +25 -0
config.json +2 -1
generation_config.json +6 -0
gsm8k.json +23 -0
hellaswag.json +25 -0
mmlu.json +417 -0
model-orig.onnx +2 -2
model.data +2 -2
model.onnx +2 -2
recipe.yaml +5 -5
tokenizer.json +1 -6
tokenizer_config.json +2 -0
truthfulqa_mc.json +25 -0
winogrande.json +23 -0

arc_challenge.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "results": {
+    "arc_challenge": {
+      "acc": 0.4351535836177474,
+      "acc_stderr": 0.014487986197186047,
+      "acc_norm": 0.46757679180887374,
+      "acc_norm_stderr": 0.014580637569995421
+    }
+  },
+  "versions": {
+    "arc_challenge": 0
+  },
+  "config": {
+    "model": "sparseml",
+    "model_args": "pretrained=/cache/shubhra/models/platypus_dolphin/cerebras/spft-cerebras_llama2_sparse70_platypus_dolphin_KDFalse_GCTrue_LR1e-4_E4_quant_smooth8,trust_remote_code=True,dtype=bfloat16",
+    "num_fewshot": 25,
+    "batch_size": "16",
+    "batch_sizes": [],
+    "device": "cuda:0",
+    "no_cache": true,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}

config.json CHANGED Viewed

@@ -1,4 +1,5 @@
 {
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -22,7 +23,7 @@
   "tie_word_embeddings": false,
   "tokenizer_class": "LlamaTokenizerFast",
   "torch_dtype": "float32",
-  "transformers_version": "1.7.0.43401",
   "use_cache": true,
   "vocab_size": 32000
 }

 {
+  "_name_or_path": "/cache/shubhra/models/platypus_dolphin/cerebras/spft-cerebras_llama2_sparse70_platypus_dolphin_KDFalse_GCTrue_LR1e-4_E4/combined/",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "tie_word_embeddings": false,
   "tokenizer_class": "LlamaTokenizerFast",
   "torch_dtype": "float32",
+  "transformers_version": "4.39.3",
   "use_cache": true,
   "vocab_size": 32000
 }

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.39.3"
+}

gsm8k.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "results": {
+    "gsm8k": {
+      "acc": 0.15238817285822592,
+      "acc_stderr": 0.009899572254794198
+    }
+  },
+  "versions": {
+    "gsm8k": 0
+  },
+  "config": {
+    "model": "sparseml",
+    "model_args": "pretrained=/cache/shubhra/models/platypus_dolphin/cerebras/spft-cerebras_llama2_sparse70_platypus_dolphin_KDFalse_GCTrue_LR1e-4_E4_quant_smooth8,trust_remote_code=True,dtype=bfloat16",
+    "num_fewshot": 5,
+    "batch_size": "16",
+    "batch_sizes": [],
+    "device": "cuda:0",
+    "no_cache": true,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}

hellaswag.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "results": {
+    "hellaswag": {
+      "acc": 0.5596494722166899,
+      "acc_stderr": 0.00495414628651335,
+      "acc_norm": 0.753734315873332,
+      "acc_norm_stderr": 0.004299546103761425
+    }
+  },
+  "versions": {
+    "hellaswag": 0
+  },
+  "config": {
+    "model": "sparseml",
+    "model_args": "pretrained=/cache/shubhra/models/platypus_dolphin/cerebras/spft-cerebras_llama2_sparse70_platypus_dolphin_KDFalse_GCTrue_LR1e-4_E4_quant_smooth8,trust_remote_code=True,dtype=bfloat16",
+    "num_fewshot": 10,
+    "batch_size": "16",
+    "batch_sizes": [],
+    "device": "cuda:0",
+    "no_cache": true,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}

mmlu.json ADDED Viewed

	@@ -0,0 +1,417 @@

+{
+  "results": {
+    "hendrycksTest-abstract_algebra": {
+      "acc": 0.28,
+      "acc_stderr": 0.04512608598542129,
+      "acc_norm": 0.28,
+      "acc_norm_stderr": 0.04512608598542129
+    },
+    "hendrycksTest-anatomy": {
+      "acc": 0.42962962962962964,
+      "acc_stderr": 0.04276349494376599,
+      "acc_norm": 0.42962962962962964,
+      "acc_norm_stderr": 0.04276349494376599
+    },
+    "hendrycksTest-astronomy": {
+      "acc": 0.47368421052631576,
+      "acc_stderr": 0.04063302731486671,
+      "acc_norm": 0.47368421052631576,
+      "acc_norm_stderr": 0.04063302731486671
+    },
+    "hendrycksTest-business_ethics": {
+      "acc": 0.46,
+      "acc_stderr": 0.05009082659620332,
+      "acc_norm": 0.46,
+      "acc_norm_stderr": 0.05009082659620332
+    },
+    "hendrycksTest-clinical_knowledge": {
+      "acc": 0.47547169811320755,
+      "acc_stderr": 0.030735822206205615,
+      "acc_norm": 0.47547169811320755,
+      "acc_norm_stderr": 0.030735822206205615
+    },
+    "hendrycksTest-college_biology": {
+      "acc": 0.375,
+      "acc_stderr": 0.04048439222695598,
+      "acc_norm": 0.375,
+      "acc_norm_stderr": 0.04048439222695598
+    },
+    "hendrycksTest-college_chemistry": {
+      "acc": 0.42,
+      "acc_stderr": 0.04960449637488584,
+      "acc_norm": 0.42,
+      "acc_norm_stderr": 0.04960449637488584
+    },
+    "hendrycksTest-college_computer_science": {
+      "acc": 0.36,
+      "acc_stderr": 0.048241815132442176,
+      "acc_norm": 0.36,
+      "acc_norm_stderr": 0.048241815132442176
+    },
+    "hendrycksTest-college_mathematics": {
+      "acc": 0.26,
+      "acc_stderr": 0.044084400227680794,
+      "acc_norm": 0.26,
+      "acc_norm_stderr": 0.044084400227680794
+    },
+    "hendrycksTest-college_medicine": {
+      "acc": 0.4046242774566474,
+      "acc_stderr": 0.03742461193887248,
+      "acc_norm": 0.4046242774566474,
+      "acc_norm_stderr": 0.03742461193887248
+    },
+    "hendrycksTest-college_physics": {
+      "acc": 0.21568627450980393,
+      "acc_stderr": 0.04092563958237654,
+      "acc_norm": 0.21568627450980393,
+      "acc_norm_stderr": 0.04092563958237654
+    },
+    "hendrycksTest-computer_security": {
+      "acc": 0.53,
+      "acc_stderr": 0.050161355804659205,
+      "acc_norm": 0.53,
+      "acc_norm_stderr": 0.050161355804659205
+    },
+    "hendrycksTest-conceptual_physics": {
+      "acc": 0.34893617021276596,
+      "acc_stderr": 0.031158522131357787,
+      "acc_norm": 0.34893617021276596,
+      "acc_norm_stderr": 0.031158522131357787
+    },
+    "hendrycksTest-econometrics": {
+      "acc": 0.2807017543859649,
+      "acc_stderr": 0.042270544512322,
+      "acc_norm": 0.2807017543859649,
+      "acc_norm_stderr": 0.042270544512322
+    },
+    "hendrycksTest-electrical_engineering": {
+      "acc": 0.4482758620689655,
+      "acc_stderr": 0.04144311810878152,
+      "acc_norm": 0.4482758620689655,
+      "acc_norm_stderr": 0.04144311810878152
+    },
+    "hendrycksTest-elementary_mathematics": {
+      "acc": 0.291005291005291,
+      "acc_stderr": 0.02339382650048487,
+      "acc_norm": 0.291005291005291,
+      "acc_norm_stderr": 0.02339382650048487
+    },
+    "hendrycksTest-formal_logic": {
+      "acc": 0.23015873015873015,
+      "acc_stderr": 0.037649508797906045,
+      "acc_norm": 0.23015873015873015,
+      "acc_norm_stderr": 0.037649508797906045
+    },
+    "hendrycksTest-global_facts": {
+      "acc": 0.38,
+      "acc_stderr": 0.048783173121456316,
+      "acc_norm": 0.38,
+      "acc_norm_stderr": 0.048783173121456316
+    },
+    "hendrycksTest-high_school_biology": {
+      "acc": 0.432258064516129,
+      "acc_stderr": 0.028181739720019413,
+      "acc_norm": 0.432258064516129,
+      "acc_norm_stderr": 0.028181739720019413
+    },
+    "hendrycksTest-high_school_chemistry": {
+      "acc": 0.3054187192118227,
+      "acc_stderr": 0.03240661565868408,
+      "acc_norm": 0.3054187192118227,
+      "acc_norm_stderr": 0.03240661565868408
+    },
+    "hendrycksTest-high_school_computer_science": {
+      "acc": 0.43,
+      "acc_stderr": 0.04975698519562428,
+      "acc_norm": 0.43,
+      "acc_norm_stderr": 0.04975698519562428
+    },
+    "hendrycksTest-high_school_european_history": {
+      "acc": 0.5757575757575758,
+      "acc_stderr": 0.03859268142070265,
+      "acc_norm": 0.5757575757575758,
+      "acc_norm_stderr": 0.03859268142070265
+    },
+    "hendrycksTest-high_school_geography": {
+      "acc": 0.4797979797979798,
+      "acc_stderr": 0.035594435655639196,
+      "acc_norm": 0.4797979797979798,
+      "acc_norm_stderr": 0.035594435655639196
+    },
+    "hendrycksTest-high_school_government_and_politics": {
+      "acc": 0.6010362694300518,
+      "acc_stderr": 0.03533999094065696,
+      "acc_norm": 0.6010362694300518,
+      "acc_norm_stderr": 0.03533999094065696
+    },
+    "hendrycksTest-high_school_macroeconomics": {
+      "acc": 0.4128205128205128,
+      "acc_stderr": 0.024962683564331803,
+      "acc_norm": 0.4128205128205128,
+      "acc_norm_stderr": 0.024962683564331803
+    },
+    "hendrycksTest-high_school_mathematics": {
+      "acc": 0.26666666666666666,
+      "acc_stderr": 0.02696242432507384,
+      "acc_norm": 0.26666666666666666,
+      "acc_norm_stderr": 0.02696242432507384
+    },
+    "hendrycksTest-high_school_microeconomics": {
+      "acc": 0.40756302521008403,
+      "acc_stderr": 0.03191863374478465,
+      "acc_norm": 0.40756302521008403,
+      "acc_norm_stderr": 0.03191863374478465
+    },
+    "hendrycksTest-high_school_physics": {
+      "acc": 0.33112582781456956,
+      "acc_stderr": 0.038425817186598696,
+      "acc_norm": 0.33112582781456956,
+      "acc_norm_stderr": 0.038425817186598696
+    },
+    "hendrycksTest-high_school_psychology": {
+      "acc": 0.5082568807339449,
+      "acc_stderr": 0.021434399918214327,
+      "acc_norm": 0.5082568807339449,
+      "acc_norm_stderr": 0.021434399918214327
+    },
+    "hendrycksTest-high_school_statistics": {
+      "acc": 0.32407407407407407,
+      "acc_stderr": 0.03191923445686186,
+      "acc_norm": 0.32407407407407407,
+      "acc_norm_stderr": 0.03191923445686186
+    },
+    "hendrycksTest-high_school_us_history": {
+      "acc": 0.5588235294117647,
+      "acc_stderr": 0.034849415144292316,
+      "acc_norm": 0.5588235294117647,
+      "acc_norm_stderr": 0.034849415144292316
+    },
+    "hendrycksTest-high_school_world_history": {
+      "acc": 0.6455696202531646,
+      "acc_stderr": 0.031137304297185805,
+      "acc_norm": 0.6455696202531646,
+      "acc_norm_stderr": 0.031137304297185805
+    },
+    "hendrycksTest-human_aging": {
+      "acc": 0.4080717488789238,
+      "acc_stderr": 0.03298574607842822,
+      "acc_norm": 0.4080717488789238,
+      "acc_norm_stderr": 0.03298574607842822
+    },
+    "hendrycksTest-human_sexuality": {
+      "acc": 0.45038167938931295,
+      "acc_stderr": 0.04363643698524779,
+      "acc_norm": 0.45038167938931295,
+      "acc_norm_stderr": 0.04363643698524779
+    },
+    "hendrycksTest-international_law": {
+      "acc": 0.5867768595041323,
+      "acc_stderr": 0.04495087843548408,
+      "acc_norm": 0.5867768595041323,
+      "acc_norm_stderr": 0.04495087843548408
+    },
+    "hendrycksTest-jurisprudence": {
+      "acc": 0.42592592592592593,
+      "acc_stderr": 0.0478034362693679,
+      "acc_norm": 0.42592592592592593,
+      "acc_norm_stderr": 0.0478034362693679
+    },
+    "hendrycksTest-logical_fallacies": {
+      "acc": 0.4294478527607362,
+      "acc_stderr": 0.03889066619112722,
+      "acc_norm": 0.4294478527607362,
+      "acc_norm_stderr": 0.03889066619112722
+    },
+    "hendrycksTest-machine_learning": {
+      "acc": 0.25892857142857145,
+      "acc_stderr": 0.04157751539865629,
+      "acc_norm": 0.25892857142857145,
+      "acc_norm_stderr": 0.04157751539865629
+    },
+    "hendrycksTest-management": {
+      "acc": 0.5631067961165048,
+      "acc_stderr": 0.04911147107365777,
+      "acc_norm": 0.5631067961165048,
+      "acc_norm_stderr": 0.04911147107365777
+    },
+    "hendrycksTest-marketing": {
+      "acc": 0.5470085470085471,
+      "acc_stderr": 0.03261099873098618,
+      "acc_norm": 0.5470085470085471,
+      "acc_norm_stderr": 0.03261099873098618
+    },
+    "hendrycksTest-medical_genetics": {
+      "acc": 0.38,
+      "acc_stderr": 0.04878317312145633,
+      "acc_norm": 0.38,
+      "acc_norm_stderr": 0.04878317312145633
+    },
+    "hendrycksTest-miscellaneous": {
+      "acc": 0.5696040868454662,
+      "acc_stderr": 0.01770586877629239,
+      "acc_norm": 0.5696040868454662,
+      "acc_norm_stderr": 0.01770586877629239
+    },
+    "hendrycksTest-moral_disputes": {
+      "acc": 0.43641618497109824,
+      "acc_stderr": 0.02670054542494368,
+      "acc_norm": 0.43641618497109824,
+      "acc_norm_stderr": 0.02670054542494368
+    },
+    "hendrycksTest-moral_scenarios": {
+      "acc": 0.2581005586592179,
+      "acc_stderr": 0.014635185616527836,
+      "acc_norm": 0.2581005586592179,
+      "acc_norm_stderr": 0.014635185616527836
+    },
+    "hendrycksTest-nutrition": {
+      "acc": 0.5065359477124183,
+      "acc_stderr": 0.028627470550556054,
+      "acc_norm": 0.5065359477124183,
+      "acc_norm_stderr": 0.028627470550556054
+    },
+    "hendrycksTest-philosophy": {
+      "acc": 0.4887459807073955,
+      "acc_stderr": 0.028390897396863533,
+      "acc_norm": 0.4887459807073955,
+      "acc_norm_stderr": 0.028390897396863533
+    },
+    "hendrycksTest-prehistory": {
+      "acc": 0.47530864197530864,
+      "acc_stderr": 0.027786800931427436,
+      "acc_norm": 0.47530864197530864,
+      "acc_norm_stderr": 0.027786800931427436
+    },
+    "hendrycksTest-professional_accounting": {
+      "acc": 0.3333333333333333,
+      "acc_stderr": 0.028121636040639893,
+      "acc_norm": 0.3333333333333333,
+      "acc_norm_stderr": 0.028121636040639893
+    },
+    "hendrycksTest-professional_law": {
+      "acc": 0.333116036505867,
+      "acc_stderr": 0.012037930451512052,
+      "acc_norm": 0.333116036505867,
+      "acc_norm_stderr": 0.012037930451512052
+    },
+    "hendrycksTest-professional_medicine": {
+      "acc": 0.3492647058823529,
+      "acc_stderr": 0.028959755196824852,
+      "acc_norm": 0.3492647058823529,
+      "acc_norm_stderr": 0.028959755196824852
+    },
+    "hendrycksTest-professional_psychology": {
+      "acc": 0.4068627450980392,
+      "acc_stderr": 0.019873802005061177,
+      "acc_norm": 0.4068627450980392,
+      "acc_norm_stderr": 0.019873802005061177
+    },
+    "hendrycksTest-public_relations": {
+      "acc": 0.4818181818181818,
+      "acc_stderr": 0.04785964010794916,
+      "acc_norm": 0.4818181818181818,
+      "acc_norm_stderr": 0.04785964010794916
+    },
+    "hendrycksTest-security_studies": {
+      "acc": 0.4775510204081633,
+      "acc_stderr": 0.03197694118713672,
+      "acc_norm": 0.4775510204081633,
+      "acc_norm_stderr": 0.03197694118713672
+    },
+    "hendrycksTest-sociology": {
+      "acc": 0.5771144278606966,
+      "acc_stderr": 0.034932317774212816,
+      "acc_norm": 0.5771144278606966,
+      "acc_norm_stderr": 0.034932317774212816
+    },
+    "hendrycksTest-us_foreign_policy": {
+      "acc": 0.64,
+      "acc_stderr": 0.048241815132442176,
+      "acc_norm": 0.64,
+      "acc_norm_stderr": 0.048241815132442176
+    },
+    "hendrycksTest-virology": {
+      "acc": 0.42168674698795183,
+      "acc_stderr": 0.03844453181770917,
+      "acc_norm": 0.42168674698795183,
+      "acc_norm_stderr": 0.03844453181770917
+    },
+    "hendrycksTest-world_religions": {
+      "acc": 0.5847953216374269,
+      "acc_stderr": 0.03779275945503201,
+      "acc_norm": 0.5847953216374269,
+      "acc_norm_stderr": 0.03779275945503201
+    }
+  },
+  "versions": {
+    "hendrycksTest-abstract_algebra": 1,
+    "hendrycksTest-anatomy": 1,
+    "hendrycksTest-astronomy": 1,
+    "hendrycksTest-business_ethics": 1,
+    "hendrycksTest-clinical_knowledge": 1,
+    "hendrycksTest-college_biology": 1,
+    "hendrycksTest-college_chemistry": 1,
+    "hendrycksTest-college_computer_science": 1,
+    "hendrycksTest-college_mathematics": 1,
+    "hendrycksTest-college_medicine": 1,
+    "hendrycksTest-college_physics": 1,
+    "hendrycksTest-computer_security": 1,
+    "hendrycksTest-conceptual_physics": 1,
+    "hendrycksTest-econometrics": 1,
+    "hendrycksTest-electrical_engineering": 1,
+    "hendrycksTest-elementary_mathematics": 1,
+    "hendrycksTest-formal_logic": 1,
+    "hendrycksTest-global_facts": 1,
+    "hendrycksTest-high_school_biology": 1,
+    "hendrycksTest-high_school_chemistry": 1,
+    "hendrycksTest-high_school_computer_science": 1,
+    "hendrycksTest-high_school_european_history": 1,
+    "hendrycksTest-high_school_geography": 1,
+    "hendrycksTest-high_school_government_and_politics": 1,
+    "hendrycksTest-high_school_macroeconomics": 1,
+    "hendrycksTest-high_school_mathematics": 1,
+    "hendrycksTest-high_school_microeconomics": 1,
+    "hendrycksTest-high_school_physics": 1,
+    "hendrycksTest-high_school_psychology": 1,
+    "hendrycksTest-high_school_statistics": 1,
+    "hendrycksTest-high_school_us_history": 1,
+    "hendrycksTest-high_school_world_history": 1,
+    "hendrycksTest-human_aging": 1,
+    "hendrycksTest-human_sexuality": 1,
+    "hendrycksTest-international_law": 1,
+    "hendrycksTest-jurisprudence": 1,
+    "hendrycksTest-logical_fallacies": 1,
+    "hendrycksTest-machine_learning": 1,
+    "hendrycksTest-management": 1,
+    "hendrycksTest-marketing": 1,
+    "hendrycksTest-medical_genetics": 1,
+    "hendrycksTest-miscellaneous": 1,
+    "hendrycksTest-moral_disputes": 1,
+    "hendrycksTest-moral_scenarios": 1,
+    "hendrycksTest-nutrition": 1,
+    "hendrycksTest-philosophy": 1,
+    "hendrycksTest-prehistory": 1,
+    "hendrycksTest-professional_accounting": 1,
+    "hendrycksTest-professional_law": 1,
+    "hendrycksTest-professional_medicine": 1,
+    "hendrycksTest-professional_psychology": 1,
+    "hendrycksTest-public_relations": 1,
+    "hendrycksTest-security_studies": 1,
+    "hendrycksTest-sociology": 1,
+    "hendrycksTest-us_foreign_policy": 1,
+    "hendrycksTest-virology": 1,
+    "hendrycksTest-world_religions": 1
+  },
+  "config": {
+    "model": "sparseml",
+    "model_args": "pretrained=/cache/shubhra/models/platypus_dolphin/cerebras/spft-cerebras_llama2_sparse70_platypus_dolphin_KDFalse_GCTrue_LR1e-4_E4_quant_smooth8,trust_remote_code=True,dtype=bfloat16",
+    "num_fewshot": 5,
+    "batch_size": "8",
+    "batch_sizes": [],
+    "device": "cuda:0",
+    "no_cache": true,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}

model-orig.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e5e661b6d51ee379551157c22127ed36aecf7333389b43f9f7093d1fb498246
-size 1049663

 version https://git-lfs.github.com/spec/v1
+oid sha256:82a5192b20dc2eaaa3f89e0333c688ec9d77549add91f4afbb21f22fbced447d
+size 1047380

model.data CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43758729d4bfddcc029b637aba79dcaebd358fd610b9d253bc27799c6a266a40
-size 7154772992

 version https://git-lfs.github.com/spec/v1
+oid sha256:3aa6517696cbe012a0651daead0b5809e8b9856311b5f02cffc8f965344f6b15
+size 7425272832

model.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90e427d59cf0bf29dd98c99188e85d8c3f5c7f93c1ecda48868f00d147c94c81
-size 1034608

 version https://git-lfs.github.com/spec/v1
+oid sha256:9091e1dce25260ddbe379fba87728d10d150636c173e099eebb7756ba9c24595
+size 1032325

recipe.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 test_stage:
   obcq_modifiers:
     SmoothQuantModifier:
-      smoothing_strength: 0.9
       mappings:
       - - ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj']
         - re:.*input_layernorm
@@ -11,9 +11,9 @@ test_stage:
         - re:.*up_proj
     QuantizationModifier:
       ignore: [LlamaRotaryEmbedding, LlamaRMSNorm, SiLUActivation, model.layers.30.mlp.down_proj,
-        model.layers.1.mlp.down_proj, model.layers.0.mlp.down_proj, MatMulOutput_QK, MatMulOutput_PV,
-        MatMulLeftInput_QK, MatMulLeftInput_PV, MatMulRightInput_QK, MatMulRightInput_PV,
-        QuantizableMatMul]
       post_oneshot_calibration: true
       scheme_overrides:
         Linear:
@@ -35,5 +35,5 @@ test_stage:
         model.layers.16, model.layers.17, model.layers.18, model.layers.19, model.layers.20,
         model.layers.21, model.layers.22, model.layers.23, model.layers.24, model.layers.25,
         model.layers.26, model.layers.27, model.layers.28, model.layers.29, model.layers.30,
-        model.layers.31]
       target_ids: [attention_mask, position_ids]

 test_stage:
   obcq_modifiers:
     SmoothQuantModifier:
+      smoothing_strength: 0.8
       mappings:
       - - ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj']
         - re:.*input_layernorm
         - re:.*up_proj
     QuantizationModifier:
       ignore: [LlamaRotaryEmbedding, LlamaRMSNorm, SiLUActivation, model.layers.30.mlp.down_proj,
+        model.layers.1.mlp.down_proj, model.layers.0.mlp.down_proj, model.layers.4.mlp.down_proj,
+        model.layers.8.mlp.down_proj, MatMulOutput_QK, MatMulOutput_PV, MatMulLeftInput_QK,
+        MatMulLeftInput_PV, MatMulRightInput_QK, MatMulRightInput_PV, QuantizableMatMul]
       post_oneshot_calibration: true
       scheme_overrides:
         Linear:
         model.layers.16, model.layers.17, model.layers.18, model.layers.19, model.layers.20,
         model.layers.21, model.layers.22, model.layers.23, model.layers.24, model.layers.25,
         model.layers.26, model.layers.27, model.layers.28, model.layers.29, model.layers.30,
+        model.layers.31, lm_head]
       target_ids: [attention_mask, position_ids]

tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 4096,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

tokenizer_config.json CHANGED Viewed

@@ -1,4 +1,6 @@
 {
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",

 {
+  "add_bos_token": true,
+  "add_eos_token": false,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",

truthfulqa_mc.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "results": {
+    "truthfulqa_mc": {
+      "mc1": 0.2729498164014688,
+      "mc1_stderr": 0.015594753632006518,
+      "mc2": 0.4190461960683527,
+      "mc2_stderr": 0.01451417258125535
+    }
+  },
+  "versions": {
+    "truthfulqa_mc": 1
+  },
+  "config": {
+    "model": "sparseml",
+    "model_args": "pretrained=/cache/shubhra/models/platypus_dolphin/cerebras/spft-cerebras_llama2_sparse70_platypus_dolphin_KDFalse_GCTrue_LR1e-4_E4_quant_smooth8,trust_remote_code=True,dtype=bfloat16",
+    "num_fewshot": 0,
+    "batch_size": "16",
+    "batch_sizes": [],
+    "device": "cuda:0",
+    "no_cache": true,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}

winogrande.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "results": {
+    "winogrande": {
+      "acc": 0.6866614048934491,
+      "acc_stderr": 0.013036512096747983
+    }
+  },
+  "versions": {
+    "winogrande": 0
+  },
+  "config": {
+    "model": "sparseml",
+    "model_args": "pretrained=/cache/shubhra/models/platypus_dolphin/cerebras/spft-cerebras_llama2_sparse70_platypus_dolphin_KDFalse_GCTrue_LR1e-4_E4_quant_smooth8,trust_remote_code=True,dtype=bfloat16",
+    "num_fewshot": 5,
+    "batch_size": "16",
+    "batch_sizes": [],
+    "device": "cuda:0",
+    "no_cache": true,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}