LLMServer / Dockerfile
AurelioAguirre's picture
Refactor v2
cfaa883
raw
history blame
739 Bytes
# Start from NVIDIA CUDA base image
FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04
# Set working directory
WORKDIR /code
# Install system dependencies
RUN apt-get update && apt-get install -y \
python3.12 \
python3-pip \
git \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements first to leverage Docker cache
COPY requirements.txt .
# Install Python dependencies
RUN pip3 install --no-cache-dir -r requirements.txt
# Copy the application code
COPY ./app /code/app
COPY ./utils /code/utils
# Set environment variables
ENV PYTHONPATH=/code
ENV TRANSFORMERS_CACHE=/code/app/.cache
ENV CUDA_VISIBLE_DEVICES=0
# Expose the port the app runs on
EXPOSE 8000
# Command to run the application
CMD ["python3", "-m", "app.main"]