File size: 1,096 Bytes
0ea9ef5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
FROM nvcr.io/nvidia/tritonserver:22.11-py3
WORKDIR /workspace
RUN apt-get update && apt-get install cmake -y
RUN pip install --upgrade pip && pip install --upgrade tensorrt
RUN git clone https://github.com/NVIDIA/TensorRT.git -b main --single-branch \
&& cd TensorRT \
&& git submodule update --init --recursive
ENV TRT_OSSPATH=/workspace/TensorRT
WORKDIR ${TRT_OSSPATH}
RUN mkdir -p build \
&& cd build \
&& cmake .. -DTRT_OUT_DIR=$PWD/out \
&& cd plugin \
&& make -j$(nproc)
ENV PLUGIN_LIBS="${TRT_OSSPATH}/build/out/libnvinfer_plugin.so"
WORKDIR /weights
RUN wget https://huggingface.co/remyxai/SpaceLLaVA/resolve/main/ggml-model-q4_0.gguf
RUN wget https://huggingface.co/remyxai/SpaceLLaVA/resolve/main/mmproj-model-f16.gguf
RUN python3 -m pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python==0.2.45 --force-reinstall --no-cache-dir
WORKDIR /models
COPY ./models/ .
WORKDIR /workspace
CMD ["tritonserver", "--model-store=/models"]
|