--- hub: repo_id: TheBloke/vicuna-13b-v1.3.0-GGML filename: vicuna-13b-v1.3.0.ggmlv3.q2_K.bin # repo_id: TheBloke/Wizard-Vicuna-13B-Uncensored-GGML # filename: Wizard-Vicuna-13B-Uncensored.ggmlv3.q4_1.bin llama_cpp: n_ctx: 2048 # n_gpu_layers: 40 # llama 13b has 40 layers chat: stop: - "" - "" - "### USER:" - "USER:" queue: max_size: 16 concurrency_count: 1 # leave this at 1, llama-cpp-python doesn't handle concurrent requests and will crash the entire app