upload auto_gptq format

Signed-off-by: n1ck-guo <heng.guo@intel.com>

Files changed (5) hide show

config.json CHANGED Viewed

@@ -20,23 +20,28 @@
   "partial_rotary_factor": 0.4,
   "qk_layernorm": false,
   "quantization_config": {
-    "autoround_version": "0.2.0.dev",
     "bits": 4,
     "damp_percent": 0.01,
     "desc_act": false,
     "enable_minmax_tuning": true,
     "enable_quanted_input": true,
     "group_size": 128,
-    "is_marlin_format": false,
     "iters": 1000,
     "lr": 0.001,
     "minmax_lr": 0.001,
-    "model_file_base_name": "model",
-    "model_name_or_path": null,
     "quant_method": "gptq",
-    "scale_dtype": "float16",
-    "static_groups": false,
     "sym": true,
     "true_sequential": false
   },
   "resid_pdrop": 0.1,
@@ -44,7 +49,7 @@
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
-  "transformers_version": "4.40.2",
   "use_cache": true,
   "vocab_size": 51200
 }

   "partial_rotary_factor": 0.4,
   "qk_layernorm": false,
   "quantization_config": {
+    "amp": true,
+    "autoround_version": "0.3.1.dev",
     "bits": 4,
     "damp_percent": 0.01,
+    "data_type": "int",
     "desc_act": false,
     "enable_minmax_tuning": true,
+    "enable_norm_bias_tuning": false,
     "enable_quanted_input": true,
+    "gradient_accumulate_steps": 1,
     "group_size": 128,
     "iters": 1000,
+    "low_gpu_mem_usage": false,
     "lr": 0.001,
     "minmax_lr": 0.001,
+    "nsamples": 512,
+    "quant_block_list": null,
     "quant_method": "gptq",
+    "scale_dtype": "torch.float16",
+    "seqlen": 2048,
     "sym": true,
+    "train_bs": 8,
     "true_sequential": false
   },
   "resid_pdrop": 0.1,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
+  "transformers_version": "4.44.2",
   "use_cache": true,
   "vocab_size": 51200
 }

generation_config.json ADDED Viewed

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.44.2"
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14c3ff2501ea2449bd14376ffe6ed545a3e6b04c0e273686acfc6f9cbe14cf22
-size 1836707656

 version https://git-lfs.github.com/spec/v1
+oid sha256:bb0bce8149e52d9bc6a535eae55b31f7f14d40341e3e0770380f4e2b2f8bb0cd
+size 1833738008

quantize_config.json CHANGED Viewed

@@ -1,20 +1,25 @@
 {
   "bits": 4,
   "group_size": 128,
-  "damp_percent": 0.01,
-  "desc_act": false,
-  "static_groups": false,
   "sym": true,
-  "true_sequential": false,
-  "model_name_or_path": null,
-  "model_file_base_name": "model",
-  "is_marlin_format": false,
-  "quant_method": "intel/auto-round",
-  "autoround_version": "0.2.0.dev",
-  "iters": 1000,
   "lr": 0.001,
   "minmax_lr": 0.001,
-  "enable_minmax_tuning": true,
-  "enable_quanted_input": true,
-  "scale_dtype": "float16"
 }

 {
   "bits": 4,
   "group_size": 128,
   "sym": true,
+  "data_type": "int",
+  "enable_quanted_input": true,
+  "enable_minmax_tuning": true,
+  "seqlen": 2048,
+  "train_bs": 8,
+  "scale_dtype": "torch.float16",
   "lr": 0.001,
   "minmax_lr": 0.001,
+  "gradient_accumulate_steps": 1,
+  "iters": 1000,
+  "amp": true,
+  "nsamples": 512,
+  "low_gpu_mem_usage": false,
+  "quant_block_list": null,
+  "enable_norm_bias_tuning": false,
+  "autoround_version": "0.3.1.dev",
+  "quant_method": "gptq",
+  "desc_act": false,
+  "true_sequential": false,
+  "damp_percent": 0.01
 }

tokenizer.json CHANGED Viewed

@@ -1,6 +1,11 @@
 {
   "version": "1.0",
-  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": {
+    "direction": "Right",
+    "max_length": 2048,
+    "strategy": "LongestFirst",
+    "stride": 0
+  },
   "padding": null,
   "added_tokens": [
     {