Mattral commited on
Commit
11a69e0
·
verified ·
1 Parent(s): d367cd8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -17,15 +17,19 @@ print("Embedding model loaded...")
17
 
18
  # Loading the LLM
19
  callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
20
-
21
  llm = AutoModelForCausalLM.from_pretrained(
22
- "TheBloke/Llama-2-7B-Chat-GGUF",
23
  model_file="llama-2-7b-chat.Q3_K_S.gguf",
24
  model_type="llama",
25
  temperature=0.2,
26
  repetition_penalty=1.5,
27
  max_new_tokens=300,
28
  )
 
 
 
 
29
 
30
  print("LLM loaded...")
31
 
 
17
 
18
  # Loading the LLM
19
  callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
20
+ '''
21
  llm = AutoModelForCausalLM.from_pretrained(
22
+ "refuelai/Llama-3-Refueled",
23
  model_file="llama-2-7b-chat.Q3_K_S.gguf",
24
  model_type="llama",
25
  temperature=0.2,
26
  repetition_penalty=1.5,
27
  max_new_tokens=300,
28
  )
29
+ '''
30
+ model_id = "refuelai/Llama-3-Refueled"
31
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
32
+ llm = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
33
 
34
  print("LLM loaded...")
35