Konect-U-AI

Runtime error

App Files Files Community

Sean-Case commited on Oct 16, 2023

Commit

0a7a8db

•

1 Parent(s): 8249fd3

Build fail when gpu_layers > 0, so will set to 0 at start and modify in app.

Browse files

Files changed (2) hide show

app.py +4 -4
chatfuncs/chatfuncs.py +2 -2

app.py CHANGED Viewed

@@ -81,7 +81,7 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
     if model_type == "Mistral Open Orca (larger, slow)":
         if torch_device == "cuda":
-            gpu_config.update_gpu(12)
         else:
             gpu_config.update_gpu(gpu_layers)
             cpu_config.update_gpu(gpu_layers)
@@ -94,12 +94,12 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
         try:
             #model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
             #model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
-            model = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='llama', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
         except:
             #model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
             #model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
-            model = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='llama', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
         tokenizer = []
@@ -233,7 +233,7 @@ with block:
     with gr.Tab("Advanced features"):
         model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca (small, fast)", choices = ["Flan Alpaca (small, fast)", "Mistral Open Orca (larger, slow)"])
         with gr.Row():
-            gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (WARNING: please don't modify unless you have a GPU).", value=0, minimum=0, maximum=5, step = 1, visible=False)
             change_model_button = gr.Button(value="Load model", scale=0)
         load_text = gr.Text(label="Load status")

     if model_type == "Mistral Open Orca (larger, slow)":
         if torch_device == "cuda":
+            gpu_config.update_gpu(gpu_layers)
         else:
             gpu_config.update_gpu(gpu_layers)
             cpu_config.update_gpu(gpu_layers)
         try:
             #model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
             #model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
+            model = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
         except:
             #model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
             #model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
+            model = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
         tokenizer = []
     with gr.Tab("Advanced features"):
         model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca (small, fast)", choices = ["Flan Alpaca (small, fast)", "Mistral Open Orca (larger, slow)"])
         with gr.Row():
+            gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (WARNING: please don't modify unless you have a GPU).", value=0, minimum=0, maximum=20, step = 1, visible=True)
             change_model_button = gr.Button(value="Load model", scale=0)
         load_text = gr.Text(label="Load status")

chatfuncs/chatfuncs.py CHANGED Viewed

@@ -69,7 +69,7 @@ kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniL
 if torch.cuda.is_available():
     torch_device = "cuda"
-    gpu_layers = 12
 else:
     torch_device =  "cpu"
     gpu_layers = 0
@@ -92,7 +92,7 @@ reset: bool = False
 stream: bool = True
 threads: int = threads
 batch_size:int = 256
-context_length:int = 2048
 sample = True

 if torch.cuda.is_available():
     torch_device = "cuda"
+    gpu_layers = 0
 else:
     torch_device =  "cpu"
     gpu_layers = 0
 stream: bool = True
 threads: int = threads
 batch_size:int = 256
+context_length:int = 4096
 sample = True