Spaces:
Running
Running
Sean-Case
commited on
Commit
•
e4df9f2
1
Parent(s):
d2ddc62
Added support for Mistral Orca
Browse files- chatfuncs/chatfuncs.py +15 -14
chatfuncs/chatfuncs.py
CHANGED
@@ -47,7 +47,7 @@ import gradio as gr
|
|
47 |
|
48 |
if torch.cuda.is_available():
|
49 |
torch_device = "cuda"
|
50 |
-
gpu_layers =
|
51 |
else: torch_device = "cpu"
|
52 |
|
53 |
print("Running on device:", torch_device)
|
@@ -76,8 +76,8 @@ reset: bool = False
|
|
76 |
stream: bool = True
|
77 |
threads: int = threads
|
78 |
batch_size:int = 512
|
79 |
-
context_length:int =
|
80 |
-
gpu_layers:int = 0#
|
81 |
sample = True
|
82 |
|
83 |
@dataclass
|
@@ -114,13 +114,13 @@ kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniL
|
|
114 |
|
115 |
## Chat models ##
|
116 |
ctrans_llm = [] # Not leaded by default
|
117 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/orca_mini_3B-GGML', model_type='llama', model_file='orca-mini-3b.ggmlv3.q4_0.bin')
|
118 |
ctrans_llm = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(GenerationConfig()))
|
119 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/vicuna-13B-v1.5-16K-GGUF', model_type='llama', model_file='vicuna-13b-v1.5-16k.Q4_K_M.gguf')
|
120 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeUp-Llama-2-13B-Chat-HF-GGUF', model_type='llama', model_file='codeup-llama-2-13b-chat-hf.Q4_K_M.gguf')
|
121 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeLlama-13B-Instruct-GGUF', model_type='llama', model_file='codellama-13b-instruct.Q4_K_M.gguf')
|
122 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-Instruct-v0.1-GGUF', model_type='mistral', model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf')
|
123 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf')
|
|
|
124 |
|
125 |
|
126 |
#ctokenizer = AutoTokenizer.from_pretrained(ctrans_llm)
|
@@ -222,16 +222,14 @@ def create_prompt_templates():
|
|
222 |
|
223 |
### Response:"""
|
224 |
|
225 |
-
instruction_prompt_template_orca_input = """
|
226 |
-
### System:
|
227 |
-
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
228 |
-
### User:
|
229 |
-
Answer the QUESTION using information from the following input.
|
230 |
-
### Input:
|
231 |
-
{summaries}
|
232 |
-
QUESTION: {question}
|
233 |
|
234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
|
236 |
|
237 |
|
@@ -986,6 +984,9 @@ def _get_chat_history(chat_history: List[Tuple[str, str]], max_memory_length:int
|
|
986 |
|
987 |
def add_inputs_answer_to_history(user_message, history, current_topic):
|
988 |
|
|
|
|
|
|
|
989 |
#history.append((user_message, [-1]))
|
990 |
|
991 |
chat_history_str, chat_history_first_q, chat_history_first_ans, max_memory_length = _get_chat_history(history)
|
|
|
47 |
|
48 |
if torch.cuda.is_available():
|
49 |
torch_device = "cuda"
|
50 |
+
gpu_layers = 5
|
51 |
else: torch_device = "cpu"
|
52 |
|
53 |
print("Running on device:", torch_device)
|
|
|
76 |
stream: bool = True
|
77 |
threads: int = threads
|
78 |
batch_size:int = 512
|
79 |
+
context_length:int = 4096
|
80 |
+
gpu_layers:int = 0#5#gpu_layers
|
81 |
sample = True
|
82 |
|
83 |
@dataclass
|
|
|
114 |
|
115 |
## Chat models ##
|
116 |
ctrans_llm = [] # Not leaded by default
|
|
|
117 |
ctrans_llm = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(GenerationConfig()))
|
118 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/vicuna-13B-v1.5-16K-GGUF', model_type='llama', model_file='vicuna-13b-v1.5-16k.Q4_K_M.gguf')
|
119 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeUp-Llama-2-13B-Chat-HF-GGUF', model_type='llama', model_file='codeup-llama-2-13b-chat-hf.Q4_K_M.gguf')
|
120 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeLlama-13B-Instruct-GGUF', model_type='llama', model_file='codellama-13b-instruct.Q4_K_M.gguf')
|
121 |
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-Instruct-v0.1-GGUF', model_type='mistral', model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf')
|
122 |
+
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **asdict(GenerationConfig()))
|
123 |
+
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q2_K.gguf', **asdict(GenerationConfig()))
|
124 |
|
125 |
|
126 |
#ctokenizer = AutoTokenizer.from_pretrained(ctrans_llm)
|
|
|
222 |
|
223 |
### Response:"""
|
224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
|
226 |
+
instruction_prompt_mistral_orca = """<|im_start|>system\n
|
227 |
+
You are an AI assistant that follows instruction extremely well. Help as much as you can.
|
228 |
+
<|im_start|>user\n
|
229 |
+
Answer the QUESTION using information from the following CONTENT.
|
230 |
+
CONTENT: {summaries}
|
231 |
+
QUESTION: {question}\n
|
232 |
+
<|im_end|>"""
|
233 |
|
234 |
|
235 |
|
|
|
984 |
|
985 |
def add_inputs_answer_to_history(user_message, history, current_topic):
|
986 |
|
987 |
+
if history is None:
|
988 |
+
history = [("","")]
|
989 |
+
|
990 |
#history.append((user_message, [-1]))
|
991 |
|
992 |
chat_history_str, chat_history_first_q, chat_history_first_ans, max_memory_length = _get_chat_history(history)
|