server: host: "0.0.0.0" port: 8001 model: base_path: "." generation: max_new_tokens: 256 do_sample: true temperature: 0.7 repetition_penalty: 1.1 defaults: #model_name: "huihui-ai/Llama-3.2-3B-Instruct-abliterated" model_name: "huihui-ai/Qwen2.5-Coder-32B-Instruct-abliterated" folders: models: "main/models" cache: "main/.cache" logs: "main/logs" logging: level: "INFO" format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" file: "llm_api.log" api: version: "v1" prefix: "/api" cors: origins: ["*"] credentials: true