nm-testing
/

tinyllama-oneshot-w8a8-static-v3

Text Generation

text-generation-inference

Inference Endpoints

8-bit precision

compressed-tensors

Model card Files Files and versions Community

sadkins65 commited on Jun 17

Commit

af4eea2

•

1 Parent(s): a8fce7b

Upload folder using huggingface_hub

Files changed (4) hide show

config.json +3 -3
generation_config.json +1 -1
model.safetensors +2 -2
recipe.yaml +2 -2

config.json CHANGED Viewed

@@ -46,7 +46,7 @@
     "quantization_status": "frozen",
     "sparsity_config": {
       "format": "dense",
-      "global_sparsity": 7.8259900429979625,
       "registry_requires_subclass": false,
       "sparsity_structure": "unstructured"
     }
@@ -66,8 +66,8 @@
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
-  "torch_dtype": "float16",
-  "transformers_version": "4.40.0",
   "use_cache": true,
   "vocab_size": 32000
 }

     "quantization_status": "frozen",
     "sparsity_config": {
       "format": "dense",
+      "global_sparsity": 7.819616777874382,
       "registry_requires_subclass": false,
       "sparsity_structure": "unstructured"
     }
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.40.2",
   "use_cache": true,
   "vocab_size": 32000
 }

generation_config.json CHANGED Viewed

@@ -3,5 +3,5 @@
   "eos_token_id": 2,
   "max_length": 2048,
   "pad_token_id": 0,
-  "transformers_version": "4.40.0"
 }

   "eos_token_id": 2,
   "max_length": 2048,
   "pad_token_id": 0,
+  "transformers_version": "4.40.2"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:85e1306ddfc0991bfe20892458b4caecd4605f2f02faabee21a71e211c177a28
-size 1231269448

 version https://git-lfs.github.com/spec/v1
+oid sha256:e0b88c21c6ee2080c22ed7ee689c5f0f4dfa89b598ec702953d950c0638512c6
+size 1231270112

recipe.yaml CHANGED Viewed

@@ -5,6 +5,6 @@ quant_stage:
       ignore: [lm_head]
       config_groups:
         group_0:
-          weights: {num_bits: 8, type: int, symmetric: true, strategy: tensor}
-          input_activations: {num_bits: 8, type: int, symmetric: true, strategy: tensor}
           targets: [Linear]

       ignore: [lm_head]
       config_groups:
         group_0:
+          weights: {num_bits: 8, type: int, symmetric: true}
+          input_activations: {num_bits: 8, type: int, symmetric: true}
           targets: [Linear]