LLMServer / main /env_template
AurelioAguirre's picture
Fixed Dockerfile v12
cd8667a
raw
history blame
770 Bytes
# Hugging Face Authentication
HF_TOKEN=your_token_here
# CUDA Device Configuration
CUDA_VISIBLE_DEVICES=0,1 # Specify GPUs to use (e.g., 0 for first GPU, 0,1 for first two)
# Memory Management
PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
CUDA_LAUNCH_BLOCKING=1 # Set to 1 for debugging
CUDA_AUTO_BOOST=0 # Disable auto boost for consistent performance
# Cache Paths
CUDA_CACHE_PATH=/path/to/cuda/cache
TRANSFORMERS_CACHE=/path/to/transformers/cache
# Performance Settings
TF_ENABLE_ONEDNN_OPTS=1
TF_GPU_ALLOCATOR=cuda_malloc_async
# Model Settings
TRANSFORMERS_OFFLINE=0 # Set to 1 for offline mode
# Logging
LOG_LEVEL=INFO # Options: DEBUG, INFO, WARNING, ERROR, CRITICAL
# Add any additional environment-specific variables below