chat-with-docs

Runtime error

Mattral commited on May 14, 2024

Commit

11a69e0

verified ·

1 Parent(s): d367cd8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,15 +17,19 @@ print("Embedding model loaded...")
 # Loading the LLM
 callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
 llm = AutoModelForCausalLM.from_pretrained(
-    "TheBloke/Llama-2-7B-Chat-GGUF",
     model_file="llama-2-7b-chat.Q3_K_S.gguf",
     model_type="llama",
     temperature=0.2,
     repetition_penalty=1.5,
     max_new_tokens=300,
 )
 print("LLM loaded...")

 # Loading the LLM
 callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
+'''
 llm = AutoModelForCausalLM.from_pretrained(
+    "refuelai/Llama-3-Refueled",
     model_file="llama-2-7b-chat.Q3_K_S.gguf",
     model_type="llama",
     temperature=0.2,
     repetition_penalty=1.5,
     max_new_tokens=300,
 )
+'''
+model_id = "refuelai/Llama-3-Refueled"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+llm = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
 print("LLM loaded...")