Spaces:

rgerganov
/

llama-rpc-server

Paused

rgerganov commited on Aug 14, 2024

Commit

154fc14

1 Parent(s): daa46bc

Initial commit

Files changed (1) hide show

Dockerfile ADDED Viewed

+ARG UBUNTU_VERSION=22.04
+# This needs to generally match the container host's environment.
+ARG CUDA_VERSION=11.7.1
+# Target the CUDA build image
+ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
+FROM ${BASE_CUDA_DEV_CONTAINER} AS build
+# Unless otherwise specified, we make a fat build.
+ARG CUDA_DOCKER_ARCH=all
+RUN apt-get update && \
+    apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
+COPY requirements.txt   requirements.txt
+COPY requirements       requirements
+RUN pip install --upgrade pip setuptools wheel \
+    && pip install -r requirements.txt
+RUN git clone https://github.com/ggerganov/llama.cpp /app
+WORKDIR /app
+# Set nvcc architecture
+ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
+# Enable CUDA
+ENV GGML_CUDA=1
+# Enable cURL
+ENV LLAMA_CURL=1
+# Enable RPC
+ENV GGML_RPC=1
+RUN make -j$(nproc)
+CMD ["/app/rpc-server", "-p", "7860", "-H", "0.0.0.0"]