LLMServer / Dockerfile
AurelioAguirre's picture
Fixed dockerfile
d311e85
raw
history blame
971 Bytes
# Use NVIDIA CUDA base image
FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04 as base
# Set working directory to /code (Hugging Face Spaces convention)
WORKDIR /code
# Install system dependencies
RUN apt-get update && apt-get install -y \
python3.10 \
python3-pip \
git \
&& rm -rf /var/lib/apt/lists/*
# Install Python packages
COPY requirements.txt .
RUN pip3 install --no-cache-dir -r requirements.txt
# Install any additional dependencies needed for litgpt
RUN pip3 install --no-cache-dir \
einops \
xformers \
bitsandbytes \
accelerate \
sentencepiece
# Copy the application code
COPY . .
# Create model directory structure
RUN mkdir -p /code/checkout/meta \
/code/checkout/microsoft \
/code/checkout/mistralai
# Set environment variables
ENV PYTHONPATH=/code
ENV LLM_ENGINE_HOST=0.0.0.0
ENV LLM_ENGINE_PORT=7860
# Expose the port the app runs on
EXPOSE 8001
# Command to run the application
CMD ["python3", "main.py"]