chore: removed OPENBLAS_NUM_THREADS as no performance improvement had been observed.
41122b6
# Grab a fresh copy of the Python image | |
FROM python:3.11-slim | |
# Install build and runtime dependencies | |
RUN apt-get update && \ | |
apt-get install -y \ | |
libopenblas-dev \ | |
ninja-build \ | |
build-essential \ | |
pkg-config \ | |
curl | |
RUN pip install -U pip setuptools wheel && \ | |
CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 pip install --verbose llama-cpp-python[server] | |
# Download model | |
RUN mkdir model && \ | |
curl -L https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf -o model/gguf-model.bin | |
COPY ./start_server.sh ./ | |
COPY ./main.py ./ | |
COPY ./index.html ./ | |
# Make the server start script executable | |
RUN chmod +x ./start_server.sh | |
# Set environment variable for the host | |
ENV HOST=0.0.0.0 | |
ENV PORT=7860 | |
# Expose a port for the server | |
EXPOSE ${PORT} | |
# Run the server start script | |
CMD ["/bin/sh", "./start_server.sh"] |