Spaces:
Runtime error
Runtime error
Federico Galatolo
commited on
Commit
•
fafd74a
1
Parent(s):
7b3a47b
Q4_K quantization
Browse files- .gitignore +1 -0
- app.py +2 -2
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/env
|
app.py
CHANGED
@@ -9,7 +9,7 @@ from huggingface_hub import hf_hub_download
|
|
9 |
llm = Llama(
|
10 |
model_path=hf_hub_download(
|
11 |
repo_id="galatolo/cerbero-7b-gguf",
|
12 |
-
filename="ggml-model-
|
13 |
),
|
14 |
n_ctx=4086,
|
15 |
)
|
@@ -51,7 +51,7 @@ def generate_text(message, history):
|
|
51 |
|
52 |
demo = gr.ChatInterface(
|
53 |
generate_text,
|
54 |
-
title="cerbero-7b running on CPU (quantized)",
|
55 |
description="This is a quantized version of cerbero-7b running on CPU. It is less powerful than the original version, but it is much faster and it can even run on a Raspberry Pi 4.",
|
56 |
examples=[
|
57 |
"Dammi 3 idee di ricette che posso fare con i pistacchi",
|
|
|
9 |
llm = Llama(
|
10 |
model_path=hf_hub_download(
|
11 |
repo_id="galatolo/cerbero-7b-gguf",
|
12 |
+
filename="ggml-model-Q4_K.gguf",
|
13 |
),
|
14 |
n_ctx=4086,
|
15 |
)
|
|
|
51 |
|
52 |
demo = gr.ChatInterface(
|
53 |
generate_text,
|
54 |
+
title="cerbero-7b running on CPU (quantized Q4_K)",
|
55 |
description="This is a quantized version of cerbero-7b running on CPU. It is less powerful than the original version, but it is much faster and it can even run on a Raspberry Pi 4.",
|
56 |
examples=[
|
57 |
"Dammi 3 idee di ricette che posso fare con i pistacchi",
|