# Service URLs Configuration LLM_ENGINE_URL=http://localhost:8001 RAG_ENGINE_URL=http://localhost:8002 # LLM Engine Server Configuration LLM_ENGINE_HOST=0.0.0.0 LLM_ENGINE_PORT=8001 # RAG Engine Server Configuration (if running locally) RAG_ENGINE_HOST=0.0.0.0 RAG_ENGINE_PORT=8002 # Base Paths Configuration BAS_MODEL_PATH=/path/to/your/model BAS_RESOURCES=/path/to/resources # CUDA Memory Management PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128,garbage_collection_threshold:0.8,expandable_segments:True # Other memory-related settings CUDA_LAUNCH_BLOCKING=0 CUDA_VISIBLE_DEVICES=0 # Logging Configuration LOG_LEVEL=INFO # DEBUG, INFO, WARNING, ERROR, CRITICAL # GPU Configuration (optional) # CUDA_VISIBLE_DEVICES=0,1 # Specify which GPUs to use # Memory Configuration (optional) # MAX_GPU_MEMORY=16Gi # Maximum GPU memory to use # MAX_CPU_MEMORY=32Gi # Maximum CPU memory to use # Security (if needed) # API_KEY=your-api-key-here # SSL_CERT_PATH=/path/to/cert # SSL_KEY_PATH=/path/to/key # Development Settings # DEBUG=True # Enable debug mode # RELOAD=False # Enable auto-reload for development # Model Default Parameters (optional) # DEFAULT_MAX_NEW_TOKENS=50 # DEFAULT_TEMPERATURE=1.0 # DEFAULT_TOP_K=50 # DEFAULT_TOP_P=1.0 # Cache Settings (optional) # CACHE_DIR=/path/to/cache # MAX_CACHE_SIZE=10Gi # Monitoring (optional) # ENABLE_METRICS=True # PROMETHEUS_PORT=9090