Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -17,15 +17,19 @@ print("Embedding model loaded...")
|
|
17 |
|
18 |
# Loading the LLM
|
19 |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
20 |
-
|
21 |
llm = AutoModelForCausalLM.from_pretrained(
|
22 |
-
"
|
23 |
model_file="llama-2-7b-chat.Q3_K_S.gguf",
|
24 |
model_type="llama",
|
25 |
temperature=0.2,
|
26 |
repetition_penalty=1.5,
|
27 |
max_new_tokens=300,
|
28 |
)
|
|
|
|
|
|
|
|
|
29 |
|
30 |
print("LLM loaded...")
|
31 |
|
|
|
17 |
|
18 |
# Loading the LLM
|
19 |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
20 |
+
'''
|
21 |
llm = AutoModelForCausalLM.from_pretrained(
|
22 |
+
"refuelai/Llama-3-Refueled",
|
23 |
model_file="llama-2-7b-chat.Q3_K_S.gguf",
|
24 |
model_type="llama",
|
25 |
temperature=0.2,
|
26 |
repetition_penalty=1.5,
|
27 |
max_new_tokens=300,
|
28 |
)
|
29 |
+
'''
|
30 |
+
model_id = "refuelai/Llama-3-Refueled"
|
31 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
32 |
+
llm = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
|
33 |
|
34 |
print("LLM loaded...")
|
35 |
|