merlinite_7B_gguf_inference

Sleeping

TobDeBer commited on Dec 14, 2024

Commit

cd90565

verified ·

1 Parent(s): c3727be

use deepthought-8b-llama-v0.01-alpha-Q4_0_8_8.gguf

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Tuple, List
 import time
 DESCRIPTION = f"""
-# Chat with Arco 500M as GGUF on CPU
 """
 MAX_MAX_NEW_TOKENS = 1024
@@ -13,8 +13,8 @@ DEFAULT_MAX_NEW_TOKENS = 200
 # Download the GGUF file
 model_path = hf_hub_download(
-    repo_id="TobDeBer/arco-Q4_K_M-GGUF",
-    filename="arco-q4_k_m.gguf",
     repo_type="model"
 )
 # Load the GGUF model

 import time
 DESCRIPTION = f"""
+# Chat with Deepthought 8B as GGUF on CPU
 """
 MAX_MAX_NEW_TOKENS = 1024
 # Download the GGUF file
 model_path = hf_hub_download(
+    repo_id="bartowski/deepthought-8b-llama-v0.01-alpha-GGUF",
+    filename="deepthought-8b-llama-v0.01-alpha-Q4_0_8_8.gguf",
     repo_type="model"
 )
 # Load the GGUF model