Spaces:
Runtime error
Runtime error
SpaceGhost
commited on
Commit
•
aaa2105
1
Parent(s):
66357bc
Update app.py
Browse files
app.py
CHANGED
@@ -6,8 +6,15 @@ from huggingface_hub import InferenceClient
|
|
6 |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
7 |
"""
|
8 |
#client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
9 |
-
client = InferenceClient("unsloth/Llama-3.2-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
def respond(
|
13 |
message,
|
|
|
6 |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
7 |
"""
|
8 |
#client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
9 |
+
client = InferenceClient("unsloth/Llama-3.2-3B-Instruct")
|
10 |
+
|
11 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
12 |
+
model_name = "unsloth/mistral-7b-bnb-4bit", # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B
|
13 |
+
max_seq_length = max_seq_length,
|
14 |
+
dtype = dtype,
|
15 |
+
load_in_4bit = load_in_4bit,
|
16 |
+
# token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
|
17 |
+
)
|
18 |
|
19 |
def respond(
|
20 |
message,
|