# Use an official CUDA runtime as a parent image FROM nvidia/cuda:12.1-cudnn8-runtime-ubuntu20.04 # Set the working directory in the container WORKDIR /usr/src/app # Install system dependencies RUN apt-get update && apt-get install -y \ ffmpeg \ build-essential \ cmake \ git \ && rm -rf /var/lib/apt/lists/* # Install Python 3.12 and pip RUN apt-get update && apt-get install -y python3.12 python3.12-venv python3.12-dev && \ curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12 # Set Python 3.12 as the default python RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 # Copy the current directory contents into the container at /usr/src/app COPY . . # Install Python dependencies RUN pip install --no-cache-dir -r requirements.txt # Install Flash Attention 2 dependencies RUN pip install packaging ninja RUN pip install flash-attn --no-build-isolation # Expose port 7860 for Gradio EXPOSE 7860 # Define environment variable to avoid Python buffering ENV PYTHONUNBUFFERED=1 # Set the entry point for the container to run any script ENTRYPOINT ["python", "app.py"]