nisten
/

grokked-biggie-164m-gguf

Inference Endpoints

Model card Files Files and versions Community

nisten commited on Aug 12

Commit

bd80303

•

1 Parent(s): d526eac

Update preset.json

Files changed (1) hide show

preset.json +25 -22

preset.json CHANGED Viewed

@@ -1,35 +1,38 @@
 {
-  "name": "Biggie SmoLlm Preset",
   "load_params": {
-    "n_ctx": 1024,
     "n_batch": 512,
     "n_gpu_layers": 0,
-    "use_mlock": false,
-    "main_gpu": 0,
-    "tensor_split": [0],
-    "seed": -1,
     "f16_kv": false,
-    "use_mmap": true,
-    "quantize_kv_cache": {
-      "key_type": "q8_0",
-      "value_type": "q8_0"
-    }
   },
   "inference_params": {
     "n_threads": 1,
     "n_predict": 1024,
-    "top_k": 0,
     "top_p": 0.85,
     "temperature": 1.5,
-    "repeat_penalty": 1.0,
     "min_p": 0.3,
-    "input_prefix": "<|im_start|>Human:",
-    "input_suffix": "",
-    "antiprompt": [
-      "Human:"
-    ],
-    "pre_prompt": "You are a cracked NASA JPL Scientist.",
-    "pre_prompt_suffix": "",
-    "pre_prompt_prefix": ""
   }
-}

 {
+  "name": "Biggie SmoLlm Q8_0",
+  "model_path": "biggie_groked_int8_q8_0.gguf",
   "load_params": {
+    "n_ctx": 2048,
     "n_batch": 512,
     "n_gpu_layers": 0,
+    "use_mlock": true,
+    "rope_freq_base": 10000,
+    "rope_freq_scale": 1.0,
     "f16_kv": false,
+    "cache_type_k": "q8_0",  // 🔑 Equivalent to -ctk q8_0
+    "cache_type_v": "q8_0"
   },
   "inference_params": {
     "n_threads": 1,
     "n_predict": 1024,
+    "top_k": 40,
     "top_p": 0.85,
     "temperature": 1.5,
+    "repeat_penalty": 1.1,
     "min_p": 0.3,
+    "mirostat": 0,
+    "mirostat_tau": 5,
+    "mirostat_eta": 0.1,
+    "tfs_z": 1,
+    "typical_p": 1,
+    "presence_penalty": 0,
+    "frequency_penalty": 0,
+    "pre_prompt": "You are a NASA JPL Scientist.",
+    "pre_prompt_suffix": "\n",
+    "pre_prompt_prefix": "",
+    "input_prefix": "<|im_start|>Human: ",
+    "input_suffix": "\n",
+    "antiprompt": ["Human:"],
+    "stop_sequences": []
   }
+}