Spaces:
Paused
Paused
# Service URLs Configuration | |
LLM_ENGINE_URL=http://localhost:8001 | |
RAG_ENGINE_URL=http://localhost:8002 | |
# LLM Engine Server Configuration | |
LLM_ENGINE_HOST=0.0.0.0 | |
LLM_ENGINE_PORT=8001 | |
# RAG Engine Server Configuration (if running locally) | |
RAG_ENGINE_HOST=0.0.0.0 | |
RAG_ENGINE_PORT=8002 | |
# Base Paths Configuration | |
BAS_MODEL_PATH=/path/to/your/model | |
BAS_RESOURCES=/path/to/resources | |
# CUDA Memory Management | |
PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128,garbage_collection_threshold:0.8,expandable_segments:True | |
# Other memory-related settings | |
CUDA_LAUNCH_BLOCKING=0 | |
CUDA_VISIBLE_DEVICES=0 | |
# Logging Configuration | |
LOG_LEVEL=INFO # DEBUG, INFO, WARNING, ERROR, CRITICAL | |
# GPU Configuration (optional) | |
# CUDA_VISIBLE_DEVICES=0,1 # Specify which GPUs to use | |
# Memory Configuration (optional) | |
# MAX_GPU_MEMORY=16Gi # Maximum GPU memory to use | |
# MAX_CPU_MEMORY=32Gi # Maximum CPU memory to use | |
# Security (if needed) | |
# API_KEY=your-api-key-here | |
# SSL_CERT_PATH=/path/to/cert | |
# SSL_KEY_PATH=/path/to/key | |
# Development Settings | |
# DEBUG=True # Enable debug mode | |
# RELOAD=False # Enable auto-reload for development | |
# Model Default Parameters (optional) | |
# DEFAULT_MAX_NEW_TOKENS=50 | |
# DEFAULT_TEMPERATURE=1.0 | |
# DEFAULT_TOP_K=50 | |
# DEFAULT_TOP_P=1.0 | |
# Cache Settings (optional) | |
# CACHE_DIR=/path/to/cache | |
# MAX_CACHE_SIZE=10Gi | |
# Monitoring (optional) | |
# ENABLE_METRICS=True | |
# PROMETHEUS_PORT=9090 | |