GGUF
Inference Endpoints
Files changed (2) hide show
  1. metadata.yml +0 -4
  2. model.yml +1 -1
metadata.yml DELETED
@@ -1,4 +0,0 @@
1
- # metadata.yml
2
- version: 1
3
- name: mixtral
4
- default: 7x8b-gguf
 
 
 
 
 
model.yml CHANGED
@@ -15,5 +15,5 @@ stream: true # true | false
15
  # Engine / Model Settings
16
  ngl: 33 # Infer from base config.json -> num_attention_heads
17
  ctx_len: 32768 # Infer from base config.json -> max_position_embeddings
18
- engine: llama-cpp
19
  prompt_template: "[INST] {prompt} [/INST]"
 
15
  # Engine / Model Settings
16
  ngl: 33 # Infer from base config.json -> num_attention_heads
17
  ctx_len: 32768 # Infer from base config.json -> max_position_embeddings
18
+ engine: cortex.llamacpp
19
  prompt_template: "[INST] {prompt} [/INST]"