lhhj commited on
Commit
57e3690
1 Parent(s): 4aac1ef
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .clang-tidy +24 -0
  2. .devops/cloud-v-pipeline +22 -0
  3. .devops/full-cuda.Dockerfile +33 -0
  4. .devops/full-musa.Dockerfile +26 -0
  5. .devops/full-rocm.Dockerfile +50 -0
  6. .devops/full.Dockerfile +25 -0
  7. .devops/llama-cli-cann.Dockerfile +44 -0
  8. .devops/llama-cli-cuda.Dockerfile +37 -0
  9. .devops/llama-cli-intel.Dockerfile +28 -0
  10. .devops/llama-cli-musa.Dockerfile +30 -0
  11. .devops/llama-cli-rocm.Dockerfile +45 -0
  12. .devops/llama-cli-vulkan.Dockerfile +27 -0
  13. .devops/llama-cli.Dockerfile +23 -0
  14. .devops/llama-cpp-cuda.srpm.spec +83 -0
  15. .devops/llama-cpp.srpm.spec +85 -0
  16. .devops/llama-server-cuda.Dockerfile +42 -0
  17. .devops/llama-server-intel.Dockerfile +34 -0
  18. .devops/llama-server-musa.Dockerfile +35 -0
  19. .devops/llama-server-rocm.Dockerfile +54 -0
  20. .devops/llama-server-vulkan.Dockerfile +31 -0
  21. .devops/llama-server.Dockerfile +29 -0
  22. .devops/nix/apps.nix +21 -0
  23. .devops/nix/devshells.nix +52 -0
  24. .devops/nix/docker.nix +37 -0
  25. .devops/nix/jetson-support.nix +39 -0
  26. .devops/nix/nixpkgs-instances.nix +45 -0
  27. .devops/nix/package-gguf-py.nix +36 -0
  28. .devops/nix/package.nix +246 -0
  29. .devops/nix/python-scripts.nix +66 -0
  30. .devops/nix/scope.nix +41 -0
  31. .devops/nix/sif.nix +27 -0
  32. .devops/tools.sh +41 -0
  33. .dockerignore +20 -0
  34. .ecrc +6 -0
  35. .editorconfig +32 -0
  36. .flake8 +17 -0
  37. .gitattributes +1 -0
  38. .gitignore +135 -0
  39. .gitmodules +3 -0
  40. .pre-commit-config.yaml +16 -0
  41. AUTHORS +782 -0
  42. CMakeLists.txt +216 -0
  43. CMakePresets.json +81 -0
  44. CONTRIBUTING.md +33 -0
  45. LICENSE +21 -0
  46. Makefile +1702 -0
  47. Package.swift +80 -0
  48. SECURITY.md +67 -0
  49. ci/README.md +29 -0
  50. ci/run.sh +851 -0
.clang-tidy ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ Checks: >
3
+ bugprone-*,
4
+ -bugprone-easily-swappable-parameters,
5
+ -bugprone-implicit-widening-of-multiplication-result,
6
+ -bugprone-misplaced-widening-cast,
7
+ -bugprone-narrowing-conversions,
8
+ readability-*,
9
+ -readability-avoid-unconditional-preprocessor-if,
10
+ -readability-function-cognitive-complexity,
11
+ -readability-identifier-length,
12
+ -readability-implicit-bool-conversion,
13
+ -readability-magic-numbers,
14
+ -readability-uppercase-literal-suffix,
15
+ -readability-simplify-boolean-expr,
16
+ clang-analyzer-*,
17
+ -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
18
+ performance-*,
19
+ portability-*,
20
+ misc-*,
21
+ -misc-const-correctness,
22
+ -misc-non-private-member-variables-in-classes,
23
+ -misc-no-recursion,
24
+ FormatStyle: none
.devops/cloud-v-pipeline ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries
2
+ stage('Cleanup'){
3
+ cleanWs() // Cleaning previous CI build in workspace
4
+ }
5
+ stage('checkout repo'){
6
+ retry(5){ // Retry if the cloning fails due to some reason
7
+ checkout scm // Clone the repo on Runner
8
+ }
9
+ }
10
+ stage('Compiling llama.cpp'){
11
+ sh'''#!/bin/bash
12
+ make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V
13
+ '''
14
+ }
15
+ stage('Running llama.cpp'){
16
+ sh'''#!/bin/bash
17
+ module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
18
+ qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
19
+ cat llama_log.txt # Printing results
20
+ '''
21
+ }
22
+ }
.devops/full-cuda.Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG CUDA_VERSION=12.6.0
4
+ # Target the CUDA build image
5
+ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+
7
+ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
8
+
9
+ # CUDA architecture to build for (defaults to all supported archs)
10
+ ARG CUDA_DOCKER_ARCH=default
11
+
12
+ RUN apt-get update && \
13
+ apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
14
+
15
+ COPY requirements.txt requirements.txt
16
+ COPY requirements requirements
17
+
18
+ RUN pip install --upgrade pip setuptools wheel \
19
+ && pip install -r requirements.txt
20
+
21
+ WORKDIR /app
22
+
23
+ COPY . .
24
+
25
+ # Use the default CUDA archs if not specified
26
+ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
27
+ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
28
+ fi && \
29
+ cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
30
+ cmake --build build --config Release -j$(nproc) && \
31
+ cp build/bin/* .
32
+
33
+ ENTRYPOINT ["/app/.devops/tools.sh"]
.devops/full-musa.Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG MUSA_VERSION=rc3.1.0
4
+ # Target the MUSA build image
5
+ ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+
7
+ FROM ${BASE_MUSA_DEV_CONTAINER} AS build
8
+
9
+ RUN apt-get update && \
10
+ apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
11
+
12
+ COPY requirements.txt requirements.txt
13
+ COPY requirements requirements
14
+
15
+ RUN pip install --upgrade pip setuptools wheel \
16
+ && pip install -r requirements.txt
17
+
18
+ WORKDIR /app
19
+
20
+ COPY . .
21
+
22
+ RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
23
+ cmake --build build --config Release -j$(nproc) && \
24
+ cp build/bin/* .
25
+
26
+ ENTRYPOINT ["/app/.devops/tools.sh"]
.devops/full-rocm.Dockerfile ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ # This needs to generally match the container host's environment.
4
+ ARG ROCM_VERSION=5.6
5
+
6
+ # Target the CUDA build image
7
+ ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
8
+
9
+ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
10
+
11
+ # Unless otherwise specified, we make a fat build.
12
+ # List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
13
+ # This is mostly tied to rocBLAS supported archs.
14
+ ARG ROCM_DOCKER_ARCH="\
15
+ gfx803 \
16
+ gfx900 \
17
+ gfx906 \
18
+ gfx908 \
19
+ gfx90a \
20
+ gfx1010 \
21
+ gfx1030 \
22
+ gfx1100 \
23
+ gfx1101 \
24
+ gfx1102"
25
+
26
+ COPY requirements.txt requirements.txt
27
+ COPY requirements requirements
28
+
29
+ RUN pip install --upgrade pip setuptools wheel \
30
+ && pip install -r requirements.txt
31
+
32
+ WORKDIR /app
33
+
34
+ COPY . .
35
+
36
+ # Set nvcc architecture
37
+ ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
38
+ # Enable ROCm
39
+ ENV GGML_HIPBLAS=1
40
+ ENV CC=/opt/rocm/llvm/bin/clang
41
+ ENV CXX=/opt/rocm/llvm/bin/clang++
42
+
43
+ # Enable cURL
44
+ ENV LLAMA_CURL=1
45
+ RUN apt-get update && \
46
+ apt-get install -y libcurl4-openssl-dev
47
+
48
+ RUN make -j$(nproc)
49
+
50
+ ENTRYPOINT ["/app/.devops/tools.sh"]
.devops/full.Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ RUN apt-get update && \
6
+ apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
7
+
8
+ COPY requirements.txt requirements.txt
9
+ COPY requirements requirements
10
+
11
+ RUN pip install --upgrade pip setuptools wheel \
12
+ && pip install -r requirements.txt
13
+
14
+ WORKDIR /app
15
+
16
+ COPY . .
17
+
18
+ ENV LLAMA_CURL=1
19
+
20
+
21
+ RUN make -j$(nproc)
22
+
23
+ ENV LC_ALL=C.utf8
24
+
25
+ ENTRYPOINT ["/app/.devops/tools.sh"]
.devops/llama-cli-cann.Dockerfile ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
2
+
3
+ FROM cosdt/cann:$ASCEND_VERSION AS build
4
+
5
+ WORKDIR /app
6
+
7
+ COPY . .
8
+
9
+ RUN yum install -y gcc g++ cmake make
10
+ ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
11
+ ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
12
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
13
+ ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
14
+ ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
15
+ ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
16
+ ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
17
+ ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
18
+ ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
19
+
20
+ # find libascend_hal.so, because the drive hasn`t been mounted.
21
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
22
+
23
+ RUN echo "Building with static libs" && \
24
+ source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
25
+ cmake -B build -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \
26
+ cmake --build build --config Release --target llama-cli
27
+
28
+ # TODO: use image with NNRT
29
+ FROM cosdt/cann:$ASCEND_VERSION AS runtime
30
+ COPY --from=build /app/build/bin/llama-cli /llama-cli
31
+
32
+ ENV LC_ALL=C.utf8
33
+
34
+ ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
35
+ ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
36
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
37
+ ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
38
+ ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
39
+ ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
40
+ ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
41
+ ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
42
+ ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
43
+
44
+ ENTRYPOINT ["/llama-cli" ]
.devops/llama-cli-cuda.Dockerfile ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG CUDA_VERSION=12.6.0
4
+ # Target the CUDA build image
5
+ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+ # Target the CUDA runtime image
7
+ ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
10
+
11
+ # CUDA architecture to build for (defaults to all supported archs)
12
+ ARG CUDA_DOCKER_ARCH=default
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y build-essential git cmake
16
+
17
+ WORKDIR /app
18
+
19
+ COPY . .
20
+
21
+ # Use the default CUDA archs if not specified
22
+ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
23
+ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
24
+ fi && \
25
+ cmake -B build -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
26
+ cmake --build build --config Release --target llama-cli -j$(nproc)
27
+
28
+ FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
29
+
30
+ RUN apt-get update && \
31
+ apt-get install -y libgomp1
32
+
33
+ COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
34
+ COPY --from=build /app/build/src/libllama.so /libllama.so
35
+ COPY --from=build /app/build/bin/llama-cli /llama-cli
36
+
37
+ ENTRYPOINT [ "/llama-cli" ]
.devops/llama-cli-intel.Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
2
+
3
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
4
+
5
+ ARG GGML_SYCL_F16=OFF
6
+ RUN apt-get update && \
7
+ apt-get install -y git
8
+
9
+ WORKDIR /app
10
+
11
+ COPY . .
12
+
13
+ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
14
+ echo "GGML_SYCL_F16 is set" && \
15
+ export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
16
+ fi && \
17
+ echo "Building with static libs" && \
18
+ cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx \
19
+ ${OPT_SYCL_F16} -DBUILD_SHARED_LIBS=OFF && \
20
+ cmake --build build --config Release --target llama-cli
21
+
22
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
23
+
24
+ COPY --from=build /app/build/bin/llama-cli /llama-cli
25
+
26
+ ENV LC_ALL=C.utf8
27
+
28
+ ENTRYPOINT [ "/llama-cli" ]
.devops/llama-cli-musa.Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG MUSA_VERSION=rc3.1.0
4
+ # Target the MUSA build image
5
+ ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+ # Target the MUSA runtime image
7
+ ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_MUSA_DEV_CONTAINER} AS build
10
+
11
+ RUN apt-get update && \
12
+ apt-get install -y build-essential git cmake
13
+
14
+ WORKDIR /app
15
+
16
+ COPY . .
17
+
18
+ RUN cmake -B build -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
19
+ cmake --build build --config Release --target llama-cli -j$(nproc)
20
+
21
+ FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
22
+
23
+ RUN apt-get update && \
24
+ apt-get install -y libgomp1
25
+
26
+ COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
27
+ COPY --from=build /app/build/src/libllama.so /libllama.so
28
+ COPY --from=build /app/build/bin/llama-cli /llama-cli
29
+
30
+ ENTRYPOINT [ "/llama-cli" ]
.devops/llama-cli-rocm.Dockerfile ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ # This needs to generally match the container host's environment.
4
+ ARG ROCM_VERSION=5.6
5
+
6
+ # Target the CUDA build image
7
+ ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
8
+
9
+ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
10
+
11
+ # Unless otherwise specified, we make a fat build.
12
+ # List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
13
+ # This is mostly tied to rocBLAS supported archs.
14
+ ARG ROCM_DOCKER_ARCH="\
15
+ gfx803 \
16
+ gfx900 \
17
+ gfx906 \
18
+ gfx908 \
19
+ gfx90a \
20
+ gfx1010 \
21
+ gfx1030 \
22
+ gfx1100 \
23
+ gfx1101 \
24
+ gfx1102"
25
+
26
+ COPY requirements.txt requirements.txt
27
+ COPY requirements requirements
28
+
29
+ RUN pip install --upgrade pip setuptools wheel \
30
+ && pip install -r requirements.txt
31
+
32
+ WORKDIR /app
33
+
34
+ COPY . .
35
+
36
+ # Set nvcc architecture
37
+ ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
38
+ # Enable ROCm
39
+ ENV GGML_HIPBLAS=1
40
+ ENV CC=/opt/rocm/llvm/bin/clang
41
+ ENV CXX=/opt/rocm/llvm/bin/clang++
42
+
43
+ RUN make -j$(nproc) llama-cli
44
+
45
+ ENTRYPOINT [ "/app/llama-cli" ]
.devops/llama-cli-vulkan.Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=jammy
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ # Install build tools
6
+ RUN apt update && apt install -y git build-essential cmake wget libgomp1
7
+
8
+ # Install Vulkan SDK
9
+ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
10
+ wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
11
+ apt update -y && \
12
+ apt-get install -y vulkan-sdk
13
+
14
+ # Build it
15
+ WORKDIR /app
16
+ COPY . .
17
+ RUN cmake -B build -DGGML_VULKAN=1 && \
18
+ cmake --build build --config Release --target llama-cli
19
+
20
+ # Clean up
21
+ WORKDIR /
22
+ RUN cp /app/build/bin/llama-cli /llama-cli && \
23
+ rm -rf /app
24
+
25
+ ENV LC_ALL=C.utf8
26
+
27
+ ENTRYPOINT [ "/llama-cli" ]
.devops/llama-cli.Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ RUN apt-get update && \
6
+ apt-get install -y build-essential git
7
+
8
+ WORKDIR /app
9
+
10
+ COPY . .
11
+
12
+ RUN make -j$(nproc) llama-cli
13
+
14
+ FROM ubuntu:$UBUNTU_VERSION AS runtime
15
+
16
+ RUN apt-get update && \
17
+ apt-get install -y libgomp1
18
+
19
+ COPY --from=build /app/llama-cli /llama-cli
20
+
21
+ ENV LC_ALL=C.utf8
22
+
23
+ ENTRYPOINT [ "/llama-cli" ]
.devops/llama-cpp-cuda.srpm.spec ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SRPM for building from source and packaging an RPM for RPM-based distros.
2
+ # https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
3
+ # Built and maintained by John Boero - boeroboy@gmail.com
4
+ # In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5
+
6
+ # Notes for llama.cpp:
7
+ # 1. Tags are currently based on hash - which will not sort asciibetically.
8
+ # We need to declare standard versioning if people want to sort latest releases.
9
+ # 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
10
+ # 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
11
+ # Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
12
+ # 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
13
+ # It is up to the user to install the correct vendor-specific support.
14
+
15
+ Name: llama.cpp-cuda
16
+ Version: %( date "+%%Y%%m%%d" )
17
+ Release: 1%{?dist}
18
+ Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
19
+ License: MIT
20
+ Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
21
+ BuildRequires: coreutils make gcc-c++ git cuda-toolkit
22
+ Requires: cuda-toolkit
23
+ URL: https://github.com/ggerganov/llama.cpp
24
+
25
+ %define debug_package %{nil}
26
+ %define source_date_epoch_from_changelog 0
27
+
28
+ %description
29
+ CPU inference for Meta's Lllama2 models using default options.
30
+
31
+ %prep
32
+ %setup -n llama.cpp-master
33
+
34
+ %build
35
+ make -j GGML_CUDA=1
36
+
37
+ %install
38
+ mkdir -p %{buildroot}%{_bindir}/
39
+ cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
40
+ cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
41
+ cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
42
+
43
+ mkdir -p %{buildroot}/usr/lib/systemd/system
44
+ %{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
45
+ [Unit]
46
+ Description=Llama.cpp server, CPU only (no GPU support in this build).
47
+ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
48
+
49
+ [Service]
50
+ Type=simple
51
+ EnvironmentFile=/etc/sysconfig/llama
52
+ ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
53
+ ExecReload=/bin/kill -s HUP $MAINPID
54
+ Restart=never
55
+
56
+ [Install]
57
+ WantedBy=default.target
58
+ EOF
59
+
60
+ mkdir -p %{buildroot}/etc/sysconfig
61
+ %{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
62
+ LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
63
+ EOF
64
+
65
+ %clean
66
+ rm -rf %{buildroot}
67
+ rm -rf %{_builddir}/*
68
+
69
+ %files
70
+ %{_bindir}/llama-cuda-cli
71
+ %{_bindir}/llama-cuda-server
72
+ %{_bindir}/llama-cuda-simple
73
+ /usr/lib/systemd/system/llamacuda.service
74
+ %config /etc/sysconfig/llama
75
+
76
+ %pre
77
+
78
+ %post
79
+
80
+ %preun
81
+ %postun
82
+
83
+ %changelog
.devops/llama-cpp.srpm.spec ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SRPM for building from source and packaging an RPM for RPM-based distros.
2
+ # https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
3
+ # Built and maintained by John Boero - boeroboy@gmail.com
4
+ # In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5
+
6
+ # Notes for llama.cpp:
7
+ # 1. Tags are currently based on hash - which will not sort asciibetically.
8
+ # We need to declare standard versioning if people want to sort latest releases.
9
+ # In the meantime, YYYYMMDD format will be used.
10
+ # 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
11
+ # 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
12
+ # Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
13
+ # 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
14
+ # It is up to the user to install the correct vendor-specific support.
15
+
16
+ Name: llama.cpp
17
+ Version: %( date "+%%Y%%m%%d" )
18
+ Release: 1%{?dist}
19
+ Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
20
+ License: MIT
21
+ Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
22
+ BuildRequires: coreutils make gcc-c++ git libstdc++-devel
23
+ Requires: libstdc++
24
+ URL: https://github.com/ggerganov/llama.cpp
25
+
26
+ %define debug_package %{nil}
27
+ %define source_date_epoch_from_changelog 0
28
+
29
+ %description
30
+ CPU inference for Meta's Lllama2 models using default options.
31
+ Models are not included in this package and must be downloaded separately.
32
+
33
+ %prep
34
+ %setup -n llama.cpp-master
35
+
36
+ %build
37
+ make -j
38
+
39
+ %install
40
+ mkdir -p %{buildroot}%{_bindir}/
41
+ cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
42
+ cp -p llama-server %{buildroot}%{_bindir}/llama-server
43
+ cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
44
+
45
+ mkdir -p %{buildroot}/usr/lib/systemd/system
46
+ %{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
47
+ [Unit]
48
+ Description=Llama.cpp server, CPU only (no GPU support in this build).
49
+ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
50
+
51
+ [Service]
52
+ Type=simple
53
+ EnvironmentFile=/etc/sysconfig/llama
54
+ ExecStart=/usr/bin/llama-server $LLAMA_ARGS
55
+ ExecReload=/bin/kill -s HUP $MAINPID
56
+ Restart=never
57
+
58
+ [Install]
59
+ WantedBy=default.target
60
+ EOF
61
+
62
+ mkdir -p %{buildroot}/etc/sysconfig
63
+ %{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
64
+ LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
65
+ EOF
66
+
67
+ %clean
68
+ rm -rf %{buildroot}
69
+ rm -rf %{_builddir}/*
70
+
71
+ %files
72
+ %{_bindir}/llama-cli
73
+ %{_bindir}/llama-server
74
+ %{_bindir}/llama-simple
75
+ /usr/lib/systemd/system/llama.service
76
+ %config /etc/sysconfig/llama
77
+
78
+ %pre
79
+
80
+ %post
81
+
82
+ %preun
83
+ %postun
84
+
85
+ %changelog
.devops/llama-server-cuda.Dockerfile ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG CUDA_VERSION=12.6.0
4
+ # Target the CUDA build image
5
+ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+ # Target the CUDA runtime image
7
+ ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
10
+
11
+ # CUDA architecture to build for (defaults to all supported archs)
12
+ ARG CUDA_DOCKER_ARCH=default
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y build-essential git cmake libcurl4-openssl-dev
16
+
17
+ WORKDIR /app
18
+
19
+ COPY . .
20
+
21
+ # Use the default CUDA archs if not specified
22
+ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
23
+ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
24
+ fi && \
25
+ cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
26
+ cmake --build build --config Release --target llama-server -j$(nproc)
27
+
28
+ FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
29
+
30
+ RUN apt-get update && \
31
+ apt-get install -y libcurl4-openssl-dev libgomp1 curl
32
+
33
+ COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
34
+ COPY --from=build /app/build/src/libllama.so /libllama.so
35
+ COPY --from=build /app/build/bin/llama-server /llama-server
36
+
37
+ # Must be set to 0.0.0.0 so it can listen to requests from host machine
38
+ ENV LLAMA_ARG_HOST=0.0.0.0
39
+
40
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
41
+
42
+ ENTRYPOINT [ "/llama-server" ]
.devops/llama-server-intel.Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
2
+
3
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
4
+
5
+ ARG GGML_SYCL_F16=OFF
6
+ RUN apt-get update && \
7
+ apt-get install -y git libcurl4-openssl-dev
8
+
9
+ WORKDIR /app
10
+
11
+ COPY . .
12
+
13
+ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
14
+ echo "GGML_SYCL_F16 is set" && \
15
+ export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
16
+ fi && \
17
+ echo "Building with dynamic libs" && \
18
+ cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
19
+ cmake --build build --config Release --target llama-server
20
+
21
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
22
+
23
+ RUN apt-get update && \
24
+ apt-get install -y libcurl4-openssl-dev curl
25
+
26
+ COPY --from=build /app/build/bin/llama-server /llama-server
27
+
28
+ ENV LC_ALL=C.utf8
29
+ # Must be set to 0.0.0.0 so it can listen to requests from host machine
30
+ ENV LLAMA_ARG_HOST=0.0.0.0
31
+
32
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
33
+
34
+ ENTRYPOINT [ "/llama-server" ]
.devops/llama-server-musa.Dockerfile ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG MUSA_VERSION=rc3.1.0
4
+ # Target the MUSA build image
5
+ ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+ # Target the MUSA runtime image
7
+ ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_MUSA_DEV_CONTAINER} AS build
10
+
11
+ RUN apt-get update && \
12
+ apt-get install -y build-essential git cmake libcurl4-openssl-dev
13
+
14
+ WORKDIR /app
15
+
16
+ COPY . .
17
+
18
+ RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
19
+ cmake --build build --config Release --target llama-server -j$(nproc)
20
+
21
+ FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
22
+
23
+ RUN apt-get update && \
24
+ apt-get install -y libcurl4-openssl-dev libgomp1 curl
25
+
26
+ COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
27
+ COPY --from=build /app/build/src/libllama.so /libllama.so
28
+ COPY --from=build /app/build/bin/llama-server /llama-server
29
+
30
+ # Must be set to 0.0.0.0 so it can listen to requests from host machine
31
+ ENV LLAMA_ARG_HOST=0.0.0.0
32
+
33
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
34
+
35
+ ENTRYPOINT [ "/llama-server" ]
.devops/llama-server-rocm.Dockerfile ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ # This needs to generally match the container host's environment.
4
+ ARG ROCM_VERSION=5.6
5
+
6
+ # Target the CUDA build image
7
+ ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
8
+
9
+ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
10
+
11
+ # Unless otherwise specified, we make a fat build.
12
+ # List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
13
+ # This is mostly tied to rocBLAS supported archs.
14
+ ARG ROCM_DOCKER_ARCH="\
15
+ gfx803 \
16
+ gfx900 \
17
+ gfx906 \
18
+ gfx908 \
19
+ gfx90a \
20
+ gfx1010 \
21
+ gfx1030 \
22
+ gfx1100 \
23
+ gfx1101 \
24
+ gfx1102"
25
+
26
+ COPY requirements.txt requirements.txt
27
+ COPY requirements requirements
28
+
29
+ RUN pip install --upgrade pip setuptools wheel \
30
+ && pip install -r requirements.txt
31
+
32
+ WORKDIR /app
33
+
34
+ COPY . .
35
+
36
+ # Set nvcc architecture
37
+ ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
38
+ # Enable ROCm
39
+ ENV GGML_HIPBLAS=1
40
+ ENV CC=/opt/rocm/llvm/bin/clang
41
+ ENV CXX=/opt/rocm/llvm/bin/clang++
42
+ # Must be set to 0.0.0.0 so it can listen to requests from host machine
43
+ ENV LLAMA_ARG_HOST=0.0.0.0
44
+
45
+ # Enable cURL
46
+ ENV LLAMA_CURL=1
47
+ RUN apt-get update && \
48
+ apt-get install -y libcurl4-openssl-dev curl
49
+
50
+ RUN make -j$(nproc) llama-server
51
+
52
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
53
+
54
+ ENTRYPOINT [ "/app/llama-server" ]
.devops/llama-server-vulkan.Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=jammy
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ # Install build tools
6
+ RUN apt update && apt install -y git build-essential cmake wget
7
+
8
+ # Install Vulkan SDK and cURL
9
+ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
10
+ wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
11
+ apt update -y && \
12
+ apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
13
+
14
+ # Build it
15
+ WORKDIR /app
16
+ COPY . .
17
+ RUN cmake -B build -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
18
+ cmake --build build --config Release --target llama-server
19
+
20
+ # Clean up
21
+ WORKDIR /
22
+ RUN cp /app/build/bin/llama-server /llama-server && \
23
+ rm -rf /app
24
+
25
+ ENV LC_ALL=C.utf8
26
+ # Must be set to 0.0.0.0 so it can listen to requests from host machine
27
+ ENV LLAMA_ARG_HOST=0.0.0.0
28
+
29
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
30
+
31
+ ENTRYPOINT [ "/llama-server" ]
.devops/llama-server.Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ RUN apt-get update && \
6
+ apt-get install -y build-essential git libcurl4-openssl-dev
7
+
8
+ WORKDIR /app
9
+
10
+ COPY . .
11
+
12
+ ENV LLAMA_CURL=1
13
+
14
+ RUN make -j$(nproc) llama-server
15
+
16
+ FROM ubuntu:$UBUNTU_VERSION AS runtime
17
+
18
+ RUN apt-get update && \
19
+ apt-get install -y libcurl4-openssl-dev libgomp1 curl
20
+
21
+ COPY --from=build /app/llama-server /llama-server
22
+
23
+ ENV LC_ALL=C.utf8
24
+ # Must be set to 0.0.0.0 so it can listen to requests from host machine
25
+ ENV LLAMA_ARG_HOST=0.0.0.0
26
+
27
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
28
+
29
+ ENTRYPOINT [ "/llama-server" ]
.devops/nix/apps.nix ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ perSystem =
3
+ { config, lib, ... }:
4
+ {
5
+ apps =
6
+ let
7
+ inherit (config.packages) default;
8
+ binaries = [
9
+ "llama-cli"
10
+ "llama-embedding"
11
+ "llama-server"
12
+ "llama-quantize"
13
+ ];
14
+ mkApp = name: {
15
+ type = "app";
16
+ program = "${default}/bin/${name}";
17
+ };
18
+ in
19
+ lib.genAttrs binaries mkApp;
20
+ };
21
+ }
.devops/nix/devshells.nix ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+
3
+ {
4
+ perSystem =
5
+ {
6
+ config,
7
+ lib,
8
+ system,
9
+ ...
10
+ }:
11
+ {
12
+ devShells =
13
+ let
14
+ pkgs = import inputs.nixpkgs { inherit system; };
15
+ stdenv = pkgs.stdenv;
16
+ scripts = config.packages.python-scripts;
17
+ in
18
+ lib.pipe (config.packages) [
19
+ (lib.concatMapAttrs (
20
+ name: package: {
21
+ ${name} = pkgs.mkShell {
22
+ name = "${name}";
23
+ inputsFrom = [ package ];
24
+ shellHook = ''
25
+ echo "Entering ${name} devShell"
26
+ '';
27
+ };
28
+ "${name}-extra" =
29
+ if (name == "python-scripts") then
30
+ null
31
+ else
32
+ pkgs.mkShell {
33
+ name = "${name}-extra";
34
+ inputsFrom = [
35
+ package
36
+ scripts
37
+ ];
38
+ # Extra packages that *may* be used by some scripts
39
+ packages = [
40
+ pkgs.python3Packages.tiktoken
41
+ ];
42
+ shellHook = ''
43
+ echo "Entering ${name} devShell"
44
+ addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
45
+ '';
46
+ };
47
+ }
48
+ ))
49
+ (lib.filterAttrs (name: value: value != null))
50
+ ];
51
+ };
52
+ }
.devops/nix/docker.nix ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ dockerTools,
4
+ buildEnv,
5
+ llama-cpp,
6
+ interactive ? true,
7
+ coreutils,
8
+ }:
9
+
10
+ # A tar that can be fed into `docker load`:
11
+ #
12
+ # $ nix build .#llamaPackages.docker
13
+ # $ docker load < result
14
+
15
+ # For details and variations cf.
16
+ # - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
17
+ # - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
18
+ # - https://nixery.dev/
19
+
20
+ # Approximate (compressed) sizes, at the time of writing, are:
21
+ #
22
+ # .#llamaPackages.docker: 125M;
23
+ # .#llamaPackagesCuda.docker: 537M;
24
+ # .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
25
+
26
+ dockerTools.buildLayeredImage {
27
+ name = llama-cpp.pname;
28
+ tag = "latest";
29
+
30
+ contents =
31
+ [ llama-cpp ]
32
+ ++ lib.optionals interactive [
33
+ coreutils
34
+ dockerTools.binSh
35
+ dockerTools.caCertificates
36
+ ];
37
+ }
.devops/nix/jetson-support.nix ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+ {
3
+ perSystem =
4
+ {
5
+ config,
6
+ system,
7
+ lib,
8
+ pkgsCuda,
9
+ ...
10
+ }:
11
+ {
12
+ legacyPackages =
13
+ let
14
+ caps.llamaPackagesXavier = "7.2";
15
+ caps.llamaPackagesOrin = "8.7";
16
+ caps.llamaPackagesTX2 = "6.2";
17
+ caps.llamaPackagesNano = "5.3";
18
+
19
+ pkgsFor =
20
+ cap:
21
+ import inputs.nixpkgs {
22
+ inherit system;
23
+ config = {
24
+ cudaSupport = true;
25
+ cudaCapabilities = [ cap ];
26
+ cudaEnableForwardCompat = false;
27
+ inherit (pkgsCuda.config) allowUnfreePredicate;
28
+ };
29
+ };
30
+ in
31
+ builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps;
32
+
33
+ packages = lib.optionalAttrs (system == "aarch64-linux") {
34
+ jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp;
35
+ jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp;
36
+ jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp;
37
+ };
38
+ };
39
+ }
.devops/nix/nixpkgs-instances.nix ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+ {
3
+ # The _module.args definitions are passed on to modules as arguments. E.g.
4
+ # the module `{ pkgs ... }: { /* config */ }` implicitly uses
5
+ # `_module.args.pkgs` (defined in this case by flake-parts).
6
+ perSystem =
7
+ { system, ... }:
8
+ {
9
+ _module.args = {
10
+ # Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
11
+ # again, the below creates several nixpkgs instances which the
12
+ # flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
13
+ #
14
+ # This is currently "slow" and "expensive", on a certain scale.
15
+ # This also isn't "right" in that this hinders dependency injection at
16
+ # the level of flake inputs. This might get removed in the foreseeable
17
+ # future.
18
+ #
19
+ # Note that you can use these expressions without Nix
20
+ # (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).
21
+
22
+ pkgsCuda = import inputs.nixpkgs {
23
+ inherit system;
24
+ # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
25
+ # and ucx are built with CUDA support)
26
+ config.cudaSupport = true;
27
+ config.allowUnfreePredicate =
28
+ p:
29
+ builtins.all (
30
+ license:
31
+ license.free
32
+ || builtins.elem license.shortName [
33
+ "CUDA EULA"
34
+ "cuDNN EULA"
35
+ ]
36
+ ) (p.meta.licenses or [ p.meta.license ]);
37
+ };
38
+ # Ensure dependencies use ROCm consistently
39
+ pkgsRocm = import inputs.nixpkgs {
40
+ inherit system;
41
+ config.rocmSupport = true;
42
+ };
43
+ };
44
+ };
45
+ }
.devops/nix/package-gguf-py.nix ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ llamaVersion,
4
+ numpy,
5
+ tqdm,
6
+ sentencepiece,
7
+ pyyaml,
8
+ poetry-core,
9
+ buildPythonPackage,
10
+ pytestCheckHook,
11
+ }:
12
+
13
+ buildPythonPackage {
14
+ pname = "gguf";
15
+ version = llamaVersion;
16
+ pyproject = true;
17
+ nativeBuildInputs = [ poetry-core ];
18
+ propagatedBuildInputs = [
19
+ numpy
20
+ tqdm
21
+ sentencepiece
22
+ pyyaml
23
+ ];
24
+ src = lib.cleanSource ../../gguf-py;
25
+ pythonImportsCheck = [
26
+ "numpy"
27
+ "gguf"
28
+ ];
29
+ nativeCheckInputs = [ pytestCheckHook ];
30
+ doCheck = true;
31
+ meta = with lib; {
32
+ description = "Python package for writing binary files in the GGUF format";
33
+ license = licenses.mit;
34
+ maintainers = [ maintainers.ditsuke ];
35
+ };
36
+ }
.devops/nix/package.nix ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ glibc,
4
+ config,
5
+ stdenv,
6
+ runCommand,
7
+ cmake,
8
+ ninja,
9
+ pkg-config,
10
+ git,
11
+ mpi,
12
+ blas,
13
+ cudaPackages,
14
+ autoAddDriverRunpath,
15
+ darwin,
16
+ rocmPackages,
17
+ vulkan-headers,
18
+ vulkan-loader,
19
+ curl,
20
+ shaderc,
21
+ useBlas ?
22
+ builtins.all (x: !x) [
23
+ useCuda
24
+ useMetalKit
25
+ useRocm
26
+ useVulkan
27
+ ]
28
+ && blas.meta.available,
29
+ useCuda ? config.cudaSupport,
30
+ useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
31
+ # Increases the runtime closure size by ~700M
32
+ useMpi ? false,
33
+ useRocm ? config.rocmSupport,
34
+ enableCurl ? true,
35
+ useVulkan ? false,
36
+ llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
37
+
38
+ # It's necessary to consistently use backendStdenv when building with CUDA support,
39
+ # otherwise we get libstdc++ errors downstream.
40
+ effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
41
+ enableStatic ? effectiveStdenv.hostPlatform.isStatic,
42
+ precompileMetalShaders ? false,
43
+ }:
44
+
45
+ let
46
+ inherit (lib)
47
+ cmakeBool
48
+ cmakeFeature
49
+ optionals
50
+ strings
51
+ ;
52
+
53
+ stdenv = throw "Use effectiveStdenv instead";
54
+
55
+ suffices =
56
+ lib.optionals useBlas [ "BLAS" ]
57
+ ++ lib.optionals useCuda [ "CUDA" ]
58
+ ++ lib.optionals useMetalKit [ "MetalKit" ]
59
+ ++ lib.optionals useMpi [ "MPI" ]
60
+ ++ lib.optionals useRocm [ "ROCm" ]
61
+ ++ lib.optionals useVulkan [ "Vulkan" ];
62
+
63
+ pnameSuffix =
64
+ strings.optionalString (suffices != [ ])
65
+ "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
66
+ descriptionSuffix = strings.optionalString (
67
+ suffices != [ ]
68
+ ) ", accelerated with ${strings.concatStringsSep ", " suffices}";
69
+
70
+ xcrunHost = runCommand "xcrunHost" { } ''
71
+ mkdir -p $out/bin
72
+ ln -s /usr/bin/xcrun $out/bin
73
+ '';
74
+
75
+ # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
76
+ # separately
77
+ darwinBuildInputs =
78
+ with darwin.apple_sdk.frameworks;
79
+ [
80
+ Accelerate
81
+ CoreVideo
82
+ CoreGraphics
83
+ ]
84
+ ++ optionals useMetalKit [ MetalKit ];
85
+
86
+ cudaBuildInputs = with cudaPackages; [
87
+ cuda_cudart
88
+ cuda_cccl # <nv/target>
89
+ libcublas
90
+ ];
91
+
92
+ rocmBuildInputs = with rocmPackages; [
93
+ clr
94
+ hipblas
95
+ rocblas
96
+ ];
97
+
98
+ vulkanBuildInputs = [
99
+ vulkan-headers
100
+ vulkan-loader
101
+ shaderc
102
+ ];
103
+ in
104
+
105
+ effectiveStdenv.mkDerivation (finalAttrs: {
106
+ pname = "llama-cpp${pnameSuffix}";
107
+ version = llamaVersion;
108
+
109
+ # Note: none of the files discarded here are visible in the sandbox or
110
+ # affect the output hash. This also means they can be modified without
111
+ # triggering a rebuild.
112
+ src = lib.cleanSourceWith {
113
+ filter =
114
+ name: type:
115
+ let
116
+ noneOf = builtins.all (x: !x);
117
+ baseName = baseNameOf name;
118
+ in
119
+ noneOf [
120
+ (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
121
+ (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
122
+ (lib.hasPrefix "." baseName) # Skip hidden files and directories
123
+ (baseName == "flake.lock")
124
+ ];
125
+ src = lib.cleanSource ../../.;
126
+ };
127
+
128
+ postPatch = ''
129
+ substituteInPlace ./ggml/src/ggml-metal.m \
130
+ --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
131
+ substituteInPlace ./ggml/src/ggml-metal.m \
132
+ --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
133
+ '';
134
+
135
+ # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
136
+ # `default.metallib` may be compiled with Metal compiler from XCode
137
+ # and we need to escape sandbox on MacOS to access Metal compiler.
138
+ # `xcrun` is used find the path of the Metal compiler, which is varible
139
+ # and not on $PATH
140
+ # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
141
+ __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
142
+
143
+ nativeBuildInputs =
144
+ [
145
+ cmake
146
+ ninja
147
+ pkg-config
148
+ git
149
+ ]
150
+ ++ optionals useCuda [
151
+ cudaPackages.cuda_nvcc
152
+
153
+ autoAddDriverRunpath
154
+ ]
155
+ ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
156
+ ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
157
+
158
+ buildInputs =
159
+ optionals effectiveStdenv.isDarwin darwinBuildInputs
160
+ ++ optionals useCuda cudaBuildInputs
161
+ ++ optionals useMpi [ mpi ]
162
+ ++ optionals useRocm rocmBuildInputs
163
+ ++ optionals useBlas [ blas ]
164
+ ++ optionals useVulkan vulkanBuildInputs
165
+ ++ optionals enableCurl [ curl ];
166
+
167
+ cmakeFlags =
168
+ [
169
+ (cmakeBool "LLAMA_BUILD_SERVER" true)
170
+ (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
171
+ (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
172
+ (cmakeBool "LLAMA_CURL" enableCurl)
173
+ (cmakeBool "GGML_NATIVE" false)
174
+ (cmakeBool "GGML_BLAS" useBlas)
175
+ (cmakeBool "GGML_CUDA" useCuda)
176
+ (cmakeBool "GGML_HIPBLAS" useRocm)
177
+ (cmakeBool "GGML_METAL" useMetalKit)
178
+ (cmakeBool "GGML_VULKAN" useVulkan)
179
+ (cmakeBool "GGML_STATIC" enableStatic)
180
+ ]
181
+ ++ optionals useCuda [
182
+ (
183
+ with cudaPackages.flags;
184
+ cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
185
+ builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
186
+ )
187
+ )
188
+ ]
189
+ ++ optionals useRocm [
190
+ (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
191
+ (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
192
+ ]
193
+ ++ optionals useMetalKit [
194
+ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
195
+ (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
196
+ ];
197
+
198
+ # Environment variables needed for ROCm
199
+ env = optionals useRocm {
200
+ ROCM_PATH = "${rocmPackages.clr}";
201
+ HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
202
+ };
203
+
204
+ # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
205
+ # if they haven't been added yet.
206
+ postInstall = ''
207
+ mkdir -p $out/include
208
+ cp $src/include/llama.h $out/include/
209
+ '';
210
+
211
+ meta = {
212
+ # Configurations we don't want even the CI to evaluate. Results in the
213
+ # "unsupported platform" messages. This is mostly a no-op, because
214
+ # cudaPackages would've refused to evaluate anyway.
215
+ badPlatforms = optionals useCuda lib.platforms.darwin;
216
+
217
+ # Configurations that are known to result in build failures. Can be
218
+ # overridden by importing Nixpkgs with `allowBroken = true`.
219
+ broken = (useMetalKit && !effectiveStdenv.isDarwin);
220
+
221
+ description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
222
+ homepage = "https://github.com/ggerganov/llama.cpp/";
223
+ license = lib.licenses.mit;
224
+
225
+ # Accommodates `nix run` and `lib.getExe`
226
+ mainProgram = "llama-cli";
227
+
228
+ # These people might respond, on the best effort basis, if you ping them
229
+ # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
230
+ # Consider adding yourself to this list if you want to ensure this flake
231
+ # stays maintained and you're willing to invest your time. Do not add
232
+ # other people without their consent. Consider removing people after
233
+ # they've been unreachable for long periods of time.
234
+
235
+ # Note that lib.maintainers is defined in Nixpkgs, but you may just add
236
+ # an attrset following the same format as in
237
+ # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
238
+ maintainers = with lib.maintainers; [
239
+ philiptaron
240
+ SomeoneSerge
241
+ ];
242
+
243
+ # Extend `badPlatforms` instead
244
+ platforms = lib.platforms.all;
245
+ };
246
+ })
.devops/nix/python-scripts.nix ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ stdenv,
4
+ buildPythonPackage,
5
+ poetry-core,
6
+ mkShell,
7
+ python3Packages,
8
+ gguf-py,
9
+ }@inputs:
10
+
11
+ let
12
+ llama-python-deps = with python3Packages; [
13
+ numpy
14
+ sentencepiece
15
+ transformers
16
+ protobuf
17
+ torchWithoutCuda
18
+ gguf-py
19
+ tqdm
20
+
21
+ # for scripts/compare-llama-bench.py
22
+ gitpython
23
+ tabulate
24
+
25
+ # for examples/pydantic-models-to-grammar-examples.py
26
+ docstring-parser
27
+ pydantic
28
+
29
+ ];
30
+
31
+ llama-python-test-deps = with python3Packages; [
32
+ # Server bench
33
+ matplotlib
34
+
35
+ # server tests
36
+ openai
37
+ behave
38
+ prometheus-client
39
+ ];
40
+ in
41
+
42
+ buildPythonPackage ({
43
+ pname = "llama-scripts";
44
+ version = "0.0.0";
45
+ pyproject = true;
46
+
47
+ # NOTE: The files filtered out here are not visible in the build sandbox, neither
48
+ # do they affect the output hash. They can be modified without triggering a rebuild.
49
+ src = lib.cleanSourceWith {
50
+ filter =
51
+ name: type:
52
+ let
53
+ any = builtins.any (x: x);
54
+ baseName = builtins.baseNameOf name;
55
+ in
56
+ any [
57
+ (lib.hasSuffix ".py" name)
58
+ (baseName == "README.md")
59
+ (baseName == "pyproject.toml")
60
+ ];
61
+ src = lib.cleanSource ../../.;
62
+ };
63
+ nativeBuildInputs = [ poetry-core ];
64
+ nativeCheckInputs = llama-python-test-deps;
65
+ dependencies = llama-python-deps;
66
+ })
.devops/nix/scope.nix ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ newScope,
4
+ python3,
5
+ llamaVersion ? "0.0.0",
6
+ }:
7
+
8
+ let
9
+ pythonPackages = python3.pkgs;
10
+ buildPythonPackage = pythonPackages.buildPythonPackage;
11
+ numpy = pythonPackages.numpy;
12
+ tqdm = pythonPackages.tqdm;
13
+ sentencepiece = pythonPackages.sentencepiece;
14
+ pyyaml = pythonPackages.pyyaml;
15
+ poetry-core = pythonPackages.poetry-core;
16
+ pytestCheckHook = pythonPackages.pytestCheckHook;
17
+ in
18
+
19
+ # We're using `makeScope` instead of just writing out an attrset
20
+ # because it allows users to apply overlays later using `overrideScope'`.
21
+ # Cf. https://noogle.dev/f/lib/makeScope
22
+
23
+ lib.makeScope newScope (self: {
24
+ inherit llamaVersion;
25
+ gguf-py = self.callPackage ./package-gguf-py.nix {
26
+ inherit
27
+ buildPythonPackage
28
+ numpy
29
+ tqdm
30
+ sentencepiece
31
+ poetry-core
32
+ pyyaml
33
+ pytestCheckHook
34
+ ;
35
+ };
36
+ python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
37
+ llama-cpp = self.callPackage ./package.nix { };
38
+ docker = self.callPackage ./docker.nix { };
39
+ docker-min = self.callPackage ./docker.nix { interactive = false; };
40
+ sif = self.callPackage ./sif.nix { };
41
+ })
.devops/nix/sif.nix ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ singularity-tools,
4
+ llama-cpp,
5
+ bashInteractive,
6
+ interactive ? false,
7
+ }:
8
+
9
+ let
10
+ optionalInt = cond: x: if cond then x else 0;
11
+ in
12
+ singularity-tools.buildImage rec {
13
+ inherit (llama-cpp) name;
14
+ contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
15
+
16
+ # These are excessive (but safe) for most variants. Building singularity
17
+ # images requires superuser privileges, so we build them inside a VM in a
18
+ # writable image of pre-determined size.
19
+ #
20
+ # ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
21
+ #
22
+ # Expected image sizes:
23
+ # - cpu/blas: 150M,
24
+ # - cuda, all gencodes: 560M,
25
+ diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
26
+ memSize = diskSize;
27
+ }
.devops/tools.sh ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ # Read the first argument into a variable
5
+ arg1="$1"
6
+
7
+ # Shift the arguments to remove the first one
8
+ shift
9
+
10
+ if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
11
+ python3 ./convert_hf_to_gguf.py "$@"
12
+ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
13
+ ./llama-quantize "$@"
14
+ elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
15
+ ./llama-cli "$@"
16
+ elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
17
+ echo "Converting PTH to GGML..."
18
+ for i in `ls $1/$2/ggml-model-f16.bin*`; do
19
+ if [ -f "${i/f16/q4_0}" ]; then
20
+ echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
21
+ else
22
+ echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
23
+ ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
24
+ fi
25
+ done
26
+ elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
27
+ ./llama-server "$@"
28
+ else
29
+ echo "Unknown command: $arg1"
30
+ echo "Available commands: "
31
+ echo " --run (-r): Run a model previously converted into ggml"
32
+ echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
33
+ echo " --convert (-c): Convert a llama model into ggml"
34
+ echo " ex: --outtype f16 \"/models/7B/\" "
35
+ echo " --quantize (-q): Optimize with quantization process ggml"
36
+ echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
37
+ echo " --all-in-one (-a): Execute --convert & --quantize"
38
+ echo " ex: \"/models/\" 7B"
39
+ echo " --server (-s): Run a model on the server"
40
+ echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080"
41
+ fi
.dockerignore ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.o
2
+ *.a
3
+ .cache/
4
+ # Do not ignore .git directory, otherwise the reported build number will always be 0
5
+ .github/
6
+ .gitignore
7
+ .vs/
8
+ .vscode/
9
+ .DS_Store
10
+
11
+ build*/
12
+
13
+ models/*
14
+
15
+ /llama-cli
16
+ /llama-quantize
17
+
18
+ arm_neon.h
19
+ compile_commands.json
20
+ Dockerfile
.ecrc ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
3
+ "Disable": {
4
+ "IndentSize": true
5
+ }
6
+ }
.editorconfig ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://EditorConfig.org
2
+
3
+ # Top-most EditorConfig file
4
+ root = true
5
+
6
+ # Unix-style newlines with a newline ending every file, utf-8 charset
7
+ [*]
8
+ end_of_line = lf
9
+ insert_final_newline = true
10
+ trim_trailing_whitespace = true
11
+ charset = utf-8
12
+ indent_style = space
13
+ indent_size = 4
14
+
15
+ [Makefile]
16
+ indent_style = tab
17
+
18
+ [scripts/*.mk]
19
+ indent_style = tab
20
+
21
+ [prompts/*.txt]
22
+ insert_final_newline = unset
23
+
24
+ [examples/server/public/*]
25
+ indent_size = 2
26
+
27
+ [examples/llama.swiftui/llama.swiftui.xcodeproj/*]
28
+ indent_style = tab
29
+
30
+ [examples/cvector-generator/*.txt]
31
+ trim_trailing_whitespace = unset
32
+ insert_final_newline = unset
.flake8 ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [flake8]
2
+ max-line-length = 125
3
+ ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
4
+ exclude =
5
+ # Do not traverse examples
6
+ examples,
7
+ # Do not include package initializers
8
+ __init__.py,
9
+ # No need to traverse our git directory
10
+ .git,
11
+ # There's no value in checking cache directories
12
+ __pycache__,
13
+ # No need to include the build path
14
+ build,
15
+ # This contains builds that we don't want to check
16
+ dist # This is generated with `python build .` for package releases
17
+ # max-complexity = 10
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.gguf filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Extensions
2
+
3
+ *.a
4
+ *.bat
5
+ *.bin
6
+ *.dll
7
+ *.dot
8
+ *.etag
9
+ *.exe
10
+ *.gcda
11
+ *.gcno
12
+ *.gcov
13
+ *.gguf
14
+ *.gguf.json
15
+ *.lastModified
16
+ *.log
17
+ *.metallib
18
+ *.o
19
+ *.so
20
+ *.tmp
21
+
22
+ # IDE / OS
23
+
24
+ .cache/
25
+ .ccls-cache/
26
+ .direnv/
27
+ .DS_Store
28
+ .envrc
29
+ .idea/
30
+ .swiftpm
31
+ .vs/
32
+ .vscode/
33
+ nppBackup
34
+
35
+
36
+ # Coverage
37
+
38
+ gcovr-report/
39
+ lcov-report/
40
+
41
+ # Build Artifacts
42
+
43
+ tags
44
+ .build/
45
+ build*
46
+ !build-info.cmake
47
+ !build-info.cpp.in
48
+ !build-info.sh
49
+ !build.zig
50
+ !docs/build.md
51
+ /libllama.so
52
+ /llama-*
53
+ /vulkan-shaders-gen
54
+ android-ndk-*
55
+ arm_neon.h
56
+ cmake-build-*
57
+ CMakeSettings.json
58
+ compile_commands.json
59
+ ggml-metal-embed.metal
60
+ llama-batched-swift
61
+ /rpc-server
62
+ out/
63
+ tmp/
64
+ autogen-*.md
65
+
66
+ # Deprecated
67
+
68
+ /main
69
+ /server
70
+
71
+ # CI
72
+
73
+ !.github/workflows/*.yml
74
+
75
+ # Models
76
+
77
+ models/*
78
+ models-mnt
79
+ !models/.editorconfig
80
+ !models/ggml-vocab-*.gguf*
81
+
82
+ # Zig
83
+ zig-out/
84
+ zig-cache/
85
+
86
+ # Logs
87
+
88
+ ppl-*.txt
89
+ qnt-*.txt
90
+ perf-*.txt
91
+
92
+ # Examples
93
+
94
+ examples/jeopardy/results.txt
95
+ examples/server/*.css.hpp
96
+ examples/server/*.html.hpp
97
+ examples/server/*.js.hpp
98
+ examples/server/*.mjs.hpp
99
+ !build_64.sh
100
+ !examples/*.bat
101
+ !examples/*/*.kts
102
+ !examples/*/*/*.kts
103
+ !examples/sycl/*.bat
104
+ !examples/sycl/*.sh
105
+
106
+ # Python
107
+
108
+ /.venv
109
+ __pycache__/
110
+ */poetry.lock
111
+ poetry.toml
112
+
113
+ # Nix
114
+ /result
115
+
116
+ # Test binaries
117
+ /tests/test-backend-ops
118
+ /tests/test-double-float
119
+ /tests/test-grad0
120
+ /tests/test-grammar-parser
121
+ /tests/test-llama-grammar
122
+ /tests/test-opt
123
+ /tests/test-quantize-fns
124
+ /tests/test-quantize-perf
125
+ /tests/test-rope
126
+ /tests/test-sampling
127
+ /tests/test-tokenizer-0
128
+ /tests/test-tokenizer-1-bpe
129
+ /tests/test-tokenizer-1-spm
130
+
131
+ # Scripts
132
+ !/scripts/install-oneapi.bat
133
+
134
+ # Test models for lora adapters
135
+ /lora-tests
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "kompute"]
2
+ path = ggml/src/kompute
3
+ url = https://github.com/nomic-ai/kompute.git
.pre-commit-config.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # See https://pre-commit.com for more information
2
+ # See https://pre-commit.com/hooks.html for more hooks
3
+ exclude: prompts/.*.txt
4
+ repos:
5
+ - repo: https://github.com/pre-commit/pre-commit-hooks
6
+ rev: v4.6.0
7
+ hooks:
8
+ - id: trailing-whitespace
9
+ - id: end-of-file-fixer
10
+ - id: check-yaml
11
+ - id: check-added-large-files
12
+ - repo: https://github.com/PyCQA/flake8
13
+ rev: 7.0.0
14
+ hooks:
15
+ - id: flake8
16
+ additional_dependencies: [flake8-no-print]
AUTHORS ADDED
@@ -0,0 +1,782 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # date: Wed Jun 26 19:36:34 EEST 2024
2
+ # this file is auto-generated by scripts/gen-authors.sh
3
+
4
+ 0cc4m <picard12@live.de>
5
+ 0xspringtime <110655352+0xspringtime@users.noreply.github.com>
6
+ 20kdc <asdd2808@gmail.com>
7
+ 2f38b454 <dxf@protonmail.com>
8
+ 3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com>
9
+ 44670 <44670@users.noreply.github.com>
10
+ AN Long <aisk@users.noreply.github.com>
11
+ AT <manyoso@users.noreply.github.com>
12
+ Aarni Koskela <akx@iki.fi>
13
+ Aaron Miller <apage43@ninjawhale.com>
14
+ Aaryaman Vasishta <aaryaman.vasishta@amd.com>
15
+ Abheek Gulati <abheekg@hotmail.com>
16
+ Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
17
+ Abhishek Gopinath K <31348521+overtunned@users.noreply.github.com>
18
+ Adithya Balaji <adithya.b94@gmail.com>
19
+ AdithyanI <adithyan.i4internet@gmail.com>
20
+ Adrian <smith.adriane@gmail.com>
21
+ Adrian Hesketh <a-h@users.noreply.github.com>
22
+ Ahmet Zeer <ahmed.zeer@std.yildiz.edu.tr>
23
+ AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
24
+ Aisuko <urakiny@gmail.com>
25
+ Akarshan Biswas <akarshanbiswas@fedoraproject.org>
26
+ Albert Jin <albert.jin@gmail.com>
27
+ Alberto <57916483+albbus-stack@users.noreply.github.com>
28
+ Alex <awhill19@icloud.com>
29
+ Alex Azarov <alex@azarov.by>
30
+ Alex Azarov <alexander.azarov@mapbox.com>
31
+ Alex Klinkhamer <from.github.com.917@grencez.dev>
32
+ Alex Klinkhamer <git@grencez.dev>
33
+ Alex Nguyen <tiendung@users.noreply.github.com>
34
+ Alex Petenchea <alex.petenchea@gmail.com>
35
+ Alex Renda <alexrenda@users.noreply.github.com>
36
+ Alex von Gluck IV <kallisti5@unixzen.com>
37
+ Alexey Parfenov <zxed@alkatrazstudio.net>
38
+ Ali Chraghi <63465728+alichraghi@users.noreply.github.com>
39
+ Ali Nehzat <ali.nehzat@thanks.dev>
40
+ Ali Tariq <ali.tariq@10xengineers.ai>
41
+ Alon <alonfaraj@gmail.com>
42
+ AlpinDale <52078762+AlpinDale@users.noreply.github.com>
43
+ Amir <amir_zia@outlook.com>
44
+ AmirAli Mirian <37371367+amiralimi@users.noreply.github.com>
45
+ Ananta Bastola <anantarajbastola@gmail.com>
46
+ Anas Ahouzi <112881240+aahouzi@users.noreply.github.com>
47
+ András Salamon <ott2@users.noreply.github.com>
48
+ Andrei <abetlen@gmail.com>
49
+ Andrew Canis <andrew.canis@gmail.com>
50
+ Andrew Downing <andrew2085@gmail.com>
51
+ Andrew Duffy <a10y@users.noreply.github.com>
52
+ Andrew Godfrey <AndrewGodfrey@users.noreply.github.com>
53
+ Andy Tai <andy-tai@users.noreply.github.com>
54
+ Arik Poznanski <arikpoz@users.noreply.github.com>
55
+ Artem <guinmoon@gmail.com>
56
+ Artem Zinnatullin <ceo@abstractny.gay>
57
+ Artyom Lebedev <vagran.ast@gmail.com>
58
+ Asbjørn Olling <asbjornolling@gmail.com>
59
+ Ásgeir Bjarni Ingvarsson <asgeir@fundinn.org>
60
+ Ashish <1856117+ashishdatta@users.noreply.github.com>
61
+ Ashok Gelal <401055+ashokgelal@users.noreply.github.com>
62
+ Ashraful Islam <ashraful.meche@gmail.com>
63
+ Atsushi Tatsuma <yoshoku@outlook.com>
64
+ Austin <77757836+teleprint-me@users.noreply.github.com>
65
+ AustinMroz <austinmroz@utexas.edu>
66
+ BADR <contact@pythops.com>
67
+ Bach Le <bach@bullno1.com>
68
+ Bailey Chittle <39804642+bachittle@users.noreply.github.com>
69
+ BarfingLemurs <128182951+BarfingLemurs@users.noreply.github.com>
70
+ Bartowski <ckealty1182@gmail.com>
71
+ Behnam M <58621210+ibehnam@users.noreply.github.com>
72
+ Ben Ashbaugh <ben.ashbaugh@intel.com>
73
+ Ben Garney <bengarney@users.noreply.github.com>
74
+ Ben Siraphob <bensiraphob@gmail.com>
75
+ Ben Williams <ben@719ben.com>
76
+ Benjamin Findley <39356821+Kartoffelsaft@users.noreply.github.com>
77
+ Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com>
78
+ Bernat Vadell <hounter.caza@gmail.com>
79
+ Bingan <70050083+binganao@users.noreply.github.com>
80
+ Bodo Graumann <mail@bodograumann.de>
81
+ Bono Lv <lvscar@users.noreply.github.com>
82
+ Borislav Stanimirov <b.stanimirov@abv.bg>
83
+ Branden Butler <bwtbutler@hotmail.com>
84
+ Brian <mofosyne@gmail.com>
85
+ Bruce MacDonald <brucewmacdonald@gmail.com>
86
+ Bryan Honof <bryanhonof@gmail.com>
87
+ CJ Pais <cj@cjpais.com>
88
+ CRD716 <crd716@gmail.com>
89
+ Calvin Laurenson <calvin@laurenson.dev>
90
+ Cameron <csteele@steelecameron.com>
91
+ Cameron Kaiser <classilla@users.noreply.github.com>
92
+ Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
93
+ Casey Primozic <casey@cprimozic.net>
94
+ Casey Primozic <me@ameo.link>
95
+ CausalLM <148736309+CausalLM@users.noreply.github.com>
96
+ Cebtenzzre <cebtenzzre@gmail.com>
97
+ Chad Brewbaker <crb002@gmail.com>
98
+ Chao Jiang <jc19chaoj@zoho.com>
99
+ Cheng Shao <terrorjack@type.dance>
100
+ Chris Elrod <elrodc@gmail.com>
101
+ Chris Kuehl <ckuehl@ckuehl.me>
102
+ Christian Demsar <christian@github.email.demsar.us>
103
+ Christian Demsar <crasm@git.vczf.us>
104
+ Christian Falch <875252+chrfalch@users.noreply.github.com>
105
+ Christian Kögler <ck3d@gmx.de>
106
+ Christian Zhou-Zheng <59622928+christianazinn@users.noreply.github.com>
107
+ Clark Saben <76020733+csaben@users.noreply.github.com>
108
+ Clint Herron <hanclinto@gmail.com>
109
+ CrispStrobe <154636388+CrispStrobe@users.noreply.github.com>
110
+ Cuong Trinh Manh <nguoithichkhampha@gmail.com>
111
+ DAN™ <dranger003@gmail.com>
112
+ Damian Stewart <d@damianstewart.com>
113
+ Dane Madsen <dane_madsen@hotmail.com>
114
+ DaniAndTheWeb <57776841+DaniAndTheWeb@users.noreply.github.com>
115
+ Daniel Bevenius <daniel.bevenius@gmail.com>
116
+ Daniel Drake <drake@endlessos.org>
117
+ Daniel Hiltgen <dhiltgen@users.noreply.github.com>
118
+ Daniel Illescas Romero <illescas.daniel@protonmail.com>
119
+ Daniele <57776841+daniandtheweb@users.noreply.github.com>
120
+ DannyDaemonic <DannyDaemonic@gmail.com>
121
+ Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com>
122
+ Dave <dave-fl@users.noreply.github.com>
123
+ Dave Airlie <airlied@gmail.com>
124
+ Dave Airlie <airlied@redhat.com>
125
+ Dave Della Costa <ddellacosta+github@gmail.com>
126
+ David Friehs <david@friehs.info>
127
+ David Kennedy <dakennedyd@gmail.com>
128
+ David Pflug <david@pflug.email>
129
+ David Renshaw <dwrenshaw@gmail.com>
130
+ David Sommers <12738+databyte@users.noreply.github.com>
131
+ David Yang <davidyang6us@gmail.com>
132
+ Dawid Potocki <github@dawidpotocki.com>
133
+ Dawid Wysocki <62249621+TortillaZHawaii@users.noreply.github.com>
134
+ Dean <Dean.Sinaean@gmail.com>
135
+ Deins <deinsegle@gmail.com>
136
+ Deven Mistry <31466137+deven367@users.noreply.github.com>
137
+ Didzis Gosko <didzis@users.noreply.github.com>
138
+ Djip007 <djip.perois@free.fr>
139
+ Don Mahurin <dmahurin@users.noreply.github.com>
140
+ DooWoong Lee (David) <manics99@naver.com>
141
+ Doomsdayrs <38189170+Doomsdayrs@users.noreply.github.com>
142
+ Douglas Hanley <thesecretaryofwar@gmail.com>
143
+ Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
144
+ Ebey Abraham <ebey97@gmail.com>
145
+ Ed Lee <edilee@mozilla.com>
146
+ Ed Lepedus <ed.lepedus@googlemail.com>
147
+ Eddie-Wang <wangjinheng1120@163.com>
148
+ Edward Taylor <edeetee@gmail.com>
149
+ Elaine <elaine.zosa@gmail.com>
150
+ Elbios <141279586+Elbios@users.noreply.github.com>
151
+ Elton Kola <eltonkola@gmail.com>
152
+ Engininja2 <139037756+Engininja2@users.noreply.github.com>
153
+ Equim <sayaka@ekyu.moe>
154
+ Eric Sommerlade <es0m@users.noreply.github.com>
155
+ Eric Zhang <34133756+EZForever@users.noreply.github.com>
156
+ Erik Garrison <erik.garrison@gmail.com>
157
+ Erik Scholz <Green-Sky@users.noreply.github.com>
158
+ Ettore Di Giacinto <mudler@users.noreply.github.com>
159
+ Evan Jones <evan.q.jones@gmail.com>
160
+ Evan Miller <emmiller@gmail.com>
161
+ Eve <139727413+netrunnereve@users.noreply.github.com>
162
+ Evgeny Kurnevsky <kurnevsky@gmail.com>
163
+ Ewout ter Hoeven <E.M.terHoeven@student.tudelft.nl>
164
+ ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com>
165
+ FK <sozforex@gmail.com>
166
+ Fabian <cmdrf@users.noreply.github.com>
167
+ Fabio R. Sluzala <Fabio3rs@users.noreply.github.com>
168
+ Faez Shakil <faez.shakil@gmail.com>
169
+ FantasyGmm <16450052+FantasyGmm@users.noreply.github.com>
170
+ Fattire <528174+fat-tire@users.noreply.github.com>
171
+ Felix <stenbackfelix@gmail.com>
172
+ Finn Voorhees <finnvoorhees@gmail.com>
173
+ Firat <firatkiral@gmail.com>
174
+ Folko-Ven <71110216+Folko-Ven@users.noreply.github.com>
175
+ Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com>
176
+ Francisco Melo <43780565+francis2tm@users.noreply.github.com>
177
+ Frank Mai <thxcode0824@gmail.com>
178
+ FrankHB <frankhb1989@gmail.com>
179
+ Fred Douglas <43351173+fredlas@users.noreply.github.com>
180
+ Frederik Vogel <Schaltfehler@users.noreply.github.com>
181
+ Gabe Goodhart <gabe.l.hart@gmail.com>
182
+ GainLee <perfecter.gen@gmail.com>
183
+ Galunid <karolek1231456@gmail.com>
184
+ Gary Linscott <glinscott@gmail.com>
185
+ Gary Mulder <gjmulder@gmail.com>
186
+ Gavin Zhao <gavinzhaojw@protonmail.com>
187
+ Genkagaku.GPT <hlhr202@163.com>
188
+ Georgi Gerganov <ggerganov@gmail.com>
189
+ Gilad S <giladgd@users.noreply.github.com>
190
+ Giuseppe Scrivano <giuseppe@scrivano.org>
191
+ GiviMAD <GiviMAD@users.noreply.github.com>
192
+ Govlzkoy <gotope@users.noreply.github.com>
193
+ Guillaume "Vermeille" Sanchez <Guillaume.V.Sanchez@gmail.com>
194
+ Guillaume Wenzek <gwenzek@users.noreply.github.com>
195
+ Guoteng <32697156+SolenoidWGT@users.noreply.github.com>
196
+ Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com>
197
+ Haggai Nuchi <h.nuchi@gmail.com>
198
+ Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
199
+ Hamdoud Hakem <90524568+hamdoudhakem@users.noreply.github.com>
200
+ HanishKVC <hanishkvc@gmail.com>
201
+ Haohui Mai <ricetons@gmail.com>
202
+ Haoxiang Fei <tonyfettes@tonyfettes.com>
203
+ Harald Fernengel <harald.fernengel@here.com>
204
+ Hatsune Miku <129688334+at8u@users.noreply.github.com>
205
+ HatsuneMikuUwU33 <173229399+HatsuneMikuUwU33@users.noreply.github.com>
206
+ Henk Poley <HenkPoley@gmail.com>
207
+ Henri Vasserman <henv@hot.ee>
208
+ Henrik Forstén <henrik.forsten@gmail.com>
209
+ Herman Semenov <GermanAizek@yandex.ru>
210
+ Hesen Peng <hesen.peng@gmail.com>
211
+ Hoang Nguyen <hugo53@users.noreply.github.com>
212
+ Hong Bo PENG <penghb@cn.ibm.com>
213
+ Hongyu Ouyang <96765450+casavaca@users.noreply.github.com>
214
+ Howard Su <howard0su@gmail.com>
215
+ Hua Jiang <allenhjiang@outlook.com>
216
+ Huawei Lin <huaweilin.cs@gmail.com>
217
+ Hugo Roussel <hugo.rous@gmail.com>
218
+ Ian Bull <irbull@eclipsesource.com>
219
+ Ian Bull <irbull@gmail.com>
220
+ Ian Scrivener <github@zilogy.asia>
221
+ Ido S <ido.pluto@gmail.com>
222
+ IgnacioFDM <ignaciofdm@gmail.com>
223
+ Igor Okulist <okigan@gmail.com>
224
+ Ikko Eltociear Ashimine <eltociear@gmail.com>
225
+ Ilya Kurdyukov <59548320+ilyakurdyukov@users.noreply.github.com>
226
+ Ionoclast Laboratories <brigham@ionoclast.com>
227
+ Isaac McFadyen <isaac@imcf.me>
228
+ IsaacDynamo <61521674+IsaacDynamo@users.noreply.github.com>
229
+ Ivan Komarov <Ivan.Komarov@dfyz.info>
230
+ Ivan Stepanov <ivanstepanovftw@gmail.com>
231
+ JH23X <165871467+JH23X@users.noreply.github.com>
232
+ Jack Mousseau <jmousseau@users.noreply.github.com>
233
+ JackJollimore <130917767+JackJollimore@users.noreply.github.com>
234
+ Jaemin Son <woalsdnd@gmail.com>
235
+ Jag Chadha <jagtesh@gmail.com>
236
+ Jakub N <jakubniemczyk97@gmail.com>
237
+ James A Capozzoli <157492257+jac-jim@users.noreply.github.com>
238
+ James Reynolds <magnusviri@users.noreply.github.com>
239
+ Jan Boon <jan.boon@kaetemi.be>
240
+ Jan Boon <kaetemi@gmail.com>
241
+ Jan Ploski <jpl@plosquare.com>
242
+ Jannis Schönleber <joennlae@gmail.com>
243
+ Jared Van Bortel <cebtenzzre@gmail.com>
244
+ Jared Van Bortel <jared@nomic.ai>
245
+ Jason McCartney <jmac@theroot.org>
246
+ Jean-Christophe Hoelt <hoelt@fovea.cc>
247
+ Jean-Michaël Celerier <jeanmichael.celerier+github@gmail.com>
248
+ Jed Fox <git@jedfox.com>
249
+ Jeffrey Quesnelle <emozilla@nousresearch.com>
250
+ Jesse Jojo Johnson <williamsaintgeorge@gmail.com>
251
+ Jeximo <jeximo@gmail.com>
252
+ Jhen-Jie Hong <iainst0409@gmail.com>
253
+ Jiahao Li <liplus17@163.com>
254
+ Jian Liao <jianliao@users.noreply.github.com>
255
+ JidongZhang-THU <1119708529@qq.com>
256
+ Jinwoo Jeong <33892306+williamjeong2@users.noreply.github.com>
257
+ Jiří Podivín <66251151+jpodivin@users.noreply.github.com>
258
+ Jiří Sejkora <Sejseloid@gmail.com>
259
+ Joan Fontanals <jfontanalsmartinez@gmail.com>
260
+ Joan Fontanals <joan.fontanals.martinez@jina.ai>
261
+ Johan <JohanAR@users.noreply.github.com>
262
+ Johannes Gäßler <johannesg@5d6.de>
263
+ Johannes Rudolph <johannes.rudolph@gmail.com>
264
+ John <78893154+cmp-nct@users.noreply.github.com>
265
+ John Balis <phobossystems@gmail.com>
266
+ John Smith <67539080+kingsidelee@users.noreply.github.com>
267
+ JohnnyB <jboero@users.noreply.github.com>
268
+ Jonas Wunderlich <32615971+jonas-w@users.noreply.github.com>
269
+ Jorge A <161275481+jorgealias@users.noreply.github.com>
270
+ Jose Maldonado <63384398+yukiteruamano@users.noreply.github.com>
271
+ Joseph Stahl <1269177+josephst@users.noreply.github.com>
272
+ Josh Ramer <josh.ramer@icloud.com>
273
+ Joyce <joycebrum@google.com>
274
+ Juan Calderon-Perez <835733+gaby@users.noreply.github.com>
275
+ Judd <foldl@users.noreply.github.com>
276
+ Julius Arkenberg <arki05@users.noreply.github.com>
277
+ Jun Jie <71215065+junnjiee16@users.noreply.github.com>
278
+ Junyang Lin <justinlin930319@hotmail.com>
279
+ Juraj Bednar <juraj@bednar.io>
280
+ Justin Parker <jparkerweb@gmail.com>
281
+ Justin Suess <justin.suess@westpoint.edu>
282
+ Justina Cho <justcho5@gmail.com>
283
+ Justine Tunney <jtunney@gmail.com>
284
+ Justine Tunney <jtunney@mozilla.com>
285
+ Juuso Alasuutari <juuso.alasuutari@gmail.com>
286
+ KASR <karim.asrih@gmail.com>
287
+ Kamil Tomšík <info@tomsik.cz>
288
+ Karsten Weiss <knweiss@gmail.com>
289
+ Karthick <j.karthic2004@gmail.com>
290
+ Karthik Kumar Viswanathan <195178+guilt@users.noreply.github.com>
291
+ Karthik Sethuraman <k.seth1993@gmail.com>
292
+ Kasumi <90275229+kasumi-1@users.noreply.github.com>
293
+ Kawrakow <48489457+ikawrakow@users.noreply.github.com>
294
+ Keiichi Tabata <keiichi.tabata@outlook.com>
295
+ Kenvix ⭐ <kenvixzure@live.com>
296
+ Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>
297
+ Kevin Gibbons <bakkot@gmail.com>
298
+ Kevin Ji <1146876+kevinji@users.noreply.github.com>
299
+ Kevin Kwok <antimatter15@gmail.com>
300
+ Kevin Lo <kevlo@kevlo.org>
301
+ Kolen Cheung <ickc@users.noreply.github.com>
302
+ Konstantin Herud <konstantin.herud@denkbares.com>
303
+ Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
304
+ Kunshang Ji <kunshang.ji@intel.com>
305
+ Kyle Liang <liangmanlai@gmail.com>
306
+ Kyle Mistele <kyle@mistele.com>
307
+ Kylin <56434533+KyL0N@users.noreply.github.com>
308
+ Lars Grammel <lars.grammel@gmail.com>
309
+ Laura <Tijntje_7@msn.com>
310
+ Lee <44310445+lx200916@users.noreply.github.com>
311
+ Lee Drake <b.lee.drake@gmail.com>
312
+ Leng Yue <lengyue@lengyue.me>
313
+ Leon Knauer <git@leonknauer.com>
314
+ LeonEricsson <70749762+LeonEricsson@users.noreply.github.com>
315
+ Leonardo Neumann <leonardo@neumann.dev.br>
316
+ Li Tan <tanliboy@gmail.com>
317
+ Linwei Wang <wanix1988@gmail.com>
318
+ LoganDark <github@logandark.mozmail.com>
319
+ LostRuins <39025047+LostRuins@users.noreply.github.com>
320
+ Luciano <lucianostrika44@gmail.com>
321
+ Luo Tian <lt@basecity.com>
322
+ Lyle Dean <dean@lyle.dev>
323
+ M. Yusuf Sarıgöz <yusufsarigoz@gmail.com>
324
+ Maarten ter Huurne <maarten@treewalker.org>
325
+ Mack Straight <eiz@users.noreply.github.com>
326
+ Maël Kerbiriou <m431.kerbiriou@gmail.com>
327
+ MaggotHATE <clay1326@gmail.com>
328
+ Manuel <44313466+makuche@users.noreply.github.com>
329
+ Marc Köhlbrugge <subscriptions@marckohlbrugge.com>
330
+ Marco Matthies <71844+marcom@users.noreply.github.com>
331
+ Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com>
332
+ Marian Cepok <marian.cepok@gmail.com>
333
+ Mark Fairbairn <thebaron88@gmail.com>
334
+ Marko Tasic <mtasic85@gmail.com>
335
+ Markus Tavenrath <mtavenrath@users.noreply.github.com>
336
+ Martin Delille <martin@delille.org>
337
+ Martin Krasser <krasserm@googlemail.com>
338
+ Martin Schwaighofer <mschwaig@users.noreply.github.com>
339
+ Marvin Gießing <marvin.giessing@gmail.com>
340
+ Masaya, Kato <62578291+msy-kato@users.noreply.github.com>
341
+ MasterYi1024 <39848311+MasterYi1024@users.noreply.github.com>
342
+ Mateusz Charytoniuk <mateusz.charytoniuk@protonmail.com>
343
+ Matheus C. França <matheus-catarino@hotmail.com>
344
+ Matheus Gabriel Alves Silva <matheusgasource@gmail.com>
345
+ Mathieu Nayrolles <MathieuNls@users.noreply.github.com>
346
+ Mathijs de Bruin <mathijs@mathijsfietst.nl>
347
+ Matt Clayton <156335168+mattjcly@users.noreply.github.com>
348
+ Matt Pulver <matt.pulver@heavy.ai>
349
+ Matteo Boschini <12133566+mbosc@users.noreply.github.com>
350
+ Mattheus Chediak <shammcity00@gmail.com>
351
+ Matthew Tejo <matthew.tejo@gmail.com>
352
+ Matvey Soloviev <blackhole89@gmail.com>
353
+ Max Krasnyansky <max.krasnyansky@gmail.com>
354
+ Max Krasnyansky <quic_maxk@quicinc.com>
355
+ Maxime <672982+maximegmd@users.noreply.github.com>
356
+ Maximilian Winter <maximilian.winter.91@gmail.com>
357
+ Meng Zhang <meng@tabbyml.com>
358
+ Meng, Hengyu <hengyu.meng@intel.com>
359
+ Merrick Christensen <merrick.christensen@gmail.com>
360
+ Michael Coppola <m18coppola@gmail.com>
361
+ Michael Hueschen <m@mhueschen.dev>
362
+ Michael Kesper <mkesper@schokokeks.org>
363
+ Michael Klimenko <mklimenko29@gmail.com>
364
+ Michael Podvitskiy <podvitskiymichael@gmail.com>
365
+ Michael Potter <NanoTekGuy@Gmail.com>
366
+ Michael de Gans <michael.john.degans@gmail.com>
367
+ Michaël de Vries <vriesdemichael@gmail.com>
368
+ Mihai <mihai.chirculescu@yahoo.com>
369
+ Mike <ytianhui2004@gmail.com>
370
+ Mikko Juola <mikjuo@gmail.com>
371
+ Minsoo Cheong <54794500+mscheong01@users.noreply.github.com>
372
+ Mirko185 <mirkosig@gmail.com>
373
+ Mirror Azure <54669636+MirrorAzure@users.noreply.github.com>
374
+ Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com>
375
+ Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
376
+ Mohammadreza Hendiani <mohammad.r.hendiani@gmail.com>
377
+ Murilo Santana <mvrilo@gmail.com>
378
+ Musab Gultekin <musabgultekin@users.noreply.github.com>
379
+ Nam D. Tran <42194884+namtranase@users.noreply.github.com>
380
+ Nathan Epstein <nate2@umbc.edu>
381
+ NawafAlansari <72708095+NawafAlansari@users.noreply.github.com>
382
+ Nebula <infinitewormhole@gmail.com>
383
+ Neo Zhang <14088817+arthw@users.noreply.github.com>
384
+ Neo Zhang <zhang.jianyu@outlook.com>
385
+ Neo Zhang Jianyu <jianyu.zhang@intel.com>
386
+ Neuman Vong <neuman.vong@gmail.com>
387
+ Nexesenex <124105151+Nexesenex@users.noreply.github.com>
388
+ Niall Coates <1349685+Niall-@users.noreply.github.com>
389
+ Nicolai Weitkemper <kontakt@nicolaiweitkemper.de>
390
+ Nicolás Pérez <nicolas_perez@brown.edu>
391
+ Nigel Bosch <pnigelb@gmail.com>
392
+ Niklas Korz <niklas@niklaskorz.de>
393
+ Nikolas <127742645+nneubacher@users.noreply.github.com>
394
+ Nindaleth <Nindaleth@users.noreply.github.com>
395
+ Oleksandr Nikitin <oleksandr@tvori.info>
396
+ Oleksii Maryshchenko <oleksii.maryshchenko@gmail.com>
397
+ Olivier Chafik <ochafik@users.noreply.github.com>
398
+ Ondřej Čertík <ondrej@certik.us>
399
+ Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
400
+ Patrice Ferlet <metal3d@gmail.com>
401
+ Paul Tsochantaris <ptsochantaris@icloud.com>
402
+ Pavol Rusnak <pavol@rusnak.io>
403
+ Pedro Cuenca <pedro@huggingface.co>
404
+ Peter Sugihara <peter@campsh.com>
405
+ Phil H <5756783+phiharri@users.noreply.github.com>
406
+ Philip Taron <philip.taron@gmail.com>
407
+ Phillip Kravtsov <phillip@kravtsov.net>
408
+ Pierre Alexandre SCHEMBRI <pa.schembri@gmail.com>
409
+ Pierrick Hymbert <pierrick.hymbert@gmail.com>
410
+ Przemysław Pawełczyk <przemoc@gmail.com>
411
+ Qin Yue Chen <71813199+chenqiny@users.noreply.github.com>
412
+ Qingyou Meng <meng.qingyou@gmail.com>
413
+ Qu Zongfu <43257352+yancaoweidaode@users.noreply.github.com>
414
+ RJ Adriaansen <adriaansen@eshcc.eur.nl>
415
+ Radoslav Gerganov <rgerganov@gmail.com>
416
+ Radosław Gryta <radek.gryta@gmail.com>
417
+ Rahul Vivek Nair <68507071+RahulVivekNair@users.noreply.github.com>
418
+ Raj Hammeer Singh Hada <hammeerraj@gmail.com>
419
+ Ralph Soika <ralph.soika@imixs.com>
420
+ Rand Xie <randxiexyy29@gmail.com>
421
+ Randall Fitzgerald <randall@dasaku.net>
422
+ Reinforce-II <fate@eastal.com>
423
+ Ren Xuancheng <jklj077@users.noreply.github.com>
424
+ Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com>
425
+ RhinoDevel <RhinoDevel@users.noreply.github.com>
426
+ Riceball LEE <snowyu.lee@gmail.com>
427
+ Richard Kiss <him@richardkiss.com>
428
+ Richard Roberson <richardr1126@gmail.com>
429
+ Rick G <26732651+TheFlipbook@users.noreply.github.com>
430
+ Rickard Edén <rickardeden@gmail.com>
431
+ Rickard Hallerbäck <rickard.hallerback@gmail.com>
432
+ Rickey Bowers Jr <bitRAKE@gmail.com>
433
+ Riley Stewart <ristew@users.noreply.github.com>
434
+ Rinne <AsakusaRinne@gmail.com>
435
+ Rinne <liu_yaohui1998@126.com>
436
+ Robert Brisita <986796+rbrisita@users.noreply.github.com>
437
+ Robert Sung-wook Shin <edp1096@users.noreply.github.com>
438
+ Robey Holderith <robey@flaminglunchbox.net>
439
+ Robyn <robyngraf@users.noreply.github.com>
440
+ Roger Meier <r.meier@siemens.com>
441
+ Roland <14355895+rbur0425@users.noreply.github.com>
442
+ Romain D <90720+Artefact2@users.noreply.github.com>
443
+ Romain Neutron <romain@neutron.io>
444
+ Roman Parykin <donderom@gmail.com>
445
+ Ron Evans <ron@hybridgroup.com>
446
+ Ron Jailall <rojailal@gmail.com>
447
+ Ronny Brendel <ronnybrendel@gmail.com>
448
+ Ronsor <ronsor@ronsor.pw>
449
+ Rowan Hart <rowanbhart@gmail.com>
450
+ Rune <43761327+Rune-AI@users.noreply.github.com>
451
+ Ryan Landay <rlanday@gmail.com>
452
+ Ryder Wishart <ryderwishart@gmail.com>
453
+ Ryuei <louixs@users.noreply.github.com>
454
+ Rőczey Barnabás <31726601+An0nie@users.noreply.github.com>
455
+ SakuraUmi <yukinon244@gmail.com>
456
+ Salvador E. Tropea <stropea@inti.gob.ar>
457
+ Sam Spilsbury <smspillaz@gmail.com>
458
+ Sami Farin <3876865+Safari77@users.noreply.github.com>
459
+ Samuel Maynard <samwmaynard@gmail.com>
460
+ Sang-Kil Park <sang.park@42dot.ai>
461
+ Seb C <47074056+Sebby37@users.noreply.github.com>
462
+ Sebastián A <sebastian.aedo29@gmail.com>
463
+ SebastianApel <13675545+SebastianApel@users.noreply.github.com>
464
+ Senemu <10880819+Senemu@users.noreply.github.com>
465
+ Sergey Alirzaev <zl29ah@gmail.com>
466
+ Sergio López <slp@sinrega.org>
467
+ Sertaç Özercan <852750+sozercan@users.noreply.github.com>
468
+ SeungWon Jeong <65549245+redlion0929@users.noreply.github.com>
469
+ ShadovvBeast <ShadovvBeast@gmail.com>
470
+ Shakhar Dasgupta <shakhardasgupta@gmail.com>
471
+ Shangning Xu <32517059+xushangning@users.noreply.github.com>
472
+ Shijie <821898965@qq.com>
473
+ Shintarou Okada <kokuzen@gmail.com>
474
+ Shouzheng Liu <61452103+lshzh-ww@users.noreply.github.com>
475
+ Shouzheng Liu <lshzh.hi@gmail.com>
476
+ Shuichi Tsutsumi <shuichi0526@gmail.com>
477
+ Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
478
+ Simon Willison <swillison@gmail.com>
479
+ Siwen Yu <yusiwen@gmail.com>
480
+ Sky Yan <skyan83@gmail.com>
481
+ Slaren <2141330+slaren@users.noreply.github.com>
482
+ Slava Primenko <primenko.s@gmail.com>
483
+ SoftwareRenderer <138734813+SoftwareRenderer@users.noreply.github.com>
484
+ Someone <sergei.kozlukov@aalto.fi>
485
+ Someone Serge <sergei.kozlukov@aalto.fi>
486
+ Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com>
487
+ Spencer Sutton <spencersutton@users.noreply.github.com>
488
+ Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com>
489
+ Srinivas Billa <nivibilla@gmail.com>
490
+ Stefan Sydow <stefan@sydow.email>
491
+ Steffen Röcker <sroecker@gmail.com>
492
+ Stephan Walter <stephan@walter.name>
493
+ Stephen Nichols <snichols@users.noreply.github.com>
494
+ Steve Grubb <ausearch.1@gmail.com>
495
+ Steven Prichard <spprichard20@gmail.com>
496
+ Steven Roussey <sroussey@gmail.com>
497
+ Steward Garcia <57494570+FSSRepo@users.noreply.github.com>
498
+ Suaj Carrot <72162667+SuajCarrot@users.noreply.github.com>
499
+ SuperUserNameMan <yoann@terminajones.com>
500
+ Tai Duc Nguyen <taiducnguyen.drexel@gmail.com>
501
+ Taikono-Himazin <kazu@po.harenet.ne.jp>
502
+ Tameem <113388789+AhmadTameem@users.noreply.github.com>
503
+ Tamotsu Takahashi <ttakah+github@gmail.com>
504
+ Thái Hoàng Tâm <75922889+RoyalHeart@users.noreply.github.com>
505
+ Thatcher Chamberlin <j.thatcher.c@gmail.com>
506
+ Theia Vogel <theia@vgel.me>
507
+ Thérence <13496987+Royalphax@users.noreply.github.com>
508
+ Thibault Terrasson <thibault.terrasson@gmail.com>
509
+ Thomas Klausner <wiz@gatalith.at>
510
+ Tim Miller <drasticactions@users.noreply.github.com>
511
+ Timmy Knight <r2d2fish@gmail.com>
512
+ Timothy Cronin <40186632+4imothy@users.noreply.github.com>
513
+ Ting Lou <ting.lou@gmail.com>
514
+ Ting Sun <suntcrick@gmail.com>
515
+ Tobias Lütke <tobi@shopify.com>
516
+ Tom C <tom.corelis@gmail.com>
517
+ Tom Jobbins <784313+TheBloke@users.noreply.github.com>
518
+ Tomas <tom.tomas.36478119@gmail.com>
519
+ Tomáš Pazdiora <tomas.pazdiora@gmail.com>
520
+ Tristan Druyen <tristan@vault81.mozmail.com>
521
+ Tristan Ross <rosscomputerguy@protonmail.com>
522
+ Tungsten842 <886724vf@anonaddy.me>
523
+ Tungsten842 <quantmint@protonmail.com>
524
+ Tushar <ditsuke@protonmail.com>
525
+ UEXTM.com <84163508+uextm@users.noreply.github.com>
526
+ Ulrich Drepper <drepper@gmail.com>
527
+ Uzo Nweke <uzoechi@gmail.com>
528
+ Vaibhav Srivastav <vaibhavs10@gmail.com>
529
+ Val Kharitonov <mail@kharvd.com>
530
+ Valentin Konovalov <valle.ketsujin@gmail.com>
531
+ Valentyn Bezshapkin <61702053+valentynbez@users.noreply.github.com>
532
+ Victor Nogueira <felladrin@gmail.com>
533
+ Victor Z. Peng <ziliangdotme@gmail.com>
534
+ Vlad <spitfireage@gmail.com>
535
+ Vladimir <bogdad@gmail.com>
536
+ Vladimir Malyutin <first-leon@yandex.ru>
537
+ Vladimir Zorin <vladimir@deviant.guru>
538
+ Volodymyr Vitvitskyi <72226+signalpillar@users.noreply.github.com>
539
+ WangHaoranRobin <56047610+WangHaoranRobin@users.noreply.github.com>
540
+ Weird Constructor <weirdconstructor@gmail.com>
541
+ Welby Seely <welbyseely@gmail.com>
542
+ Wentai Zhang <rchardx@gmail.com>
543
+ WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com>
544
+ William Tambellini <william.tambellini@gmail.com>
545
+ Willy Tarreau <w@1wt.eu>
546
+ Wouter <9594229+DifferentialityDevelopment@users.noreply.github.com>
547
+ Wu Jian Ping <wujjpp@hotmail.com>
548
+ Wu Jian Ping <wujp@greatld.com>
549
+ Xiake Sun <xiake.sun@intel.com>
550
+ Xiang (Kevin) Li <kevinli020508@gmail.com>
551
+ Xiao-Yong Jin <jinxiaoyong@gmail.com>
552
+ XiaotaoChen <chenxiaotao1234@gmail.com>
553
+ Xiaoyi Chen <cxychina@gmail.com>
554
+ Xingchen Song(宋星辰) <xingchensong1996@163.com>
555
+ Xuan Son Nguyen <thichthat@gmail.com>
556
+ Yann Follet <131855179+YannFollet@users.noreply.github.com>
557
+ Yaroslav <yaroslav.yashin@me.com>
558
+ Yazan Agha-Schrader <mountaiin@icloud.com>
559
+ Yiming Cui <conandiy@vip.qq.com>
560
+ Yishuo Wang <MeouSker77@outlook.com>
561
+ Yueh-Po Peng <94939112+y10ab1@users.noreply.github.com>
562
+ Yui <dev@sleepyyui.com>
563
+ Yusuf Kağan Hanoğlu <hanoglu@yahoo.com>
564
+ Yuval Peled <31162840+Yuval-Peled@users.noreply.github.com>
565
+ ZHAOKAI WANG <sanxianwei@163.com>
566
+ Zane Shannon <z@zcs.me>
567
+ Zay <95888118+isaiahbjork@users.noreply.github.com>
568
+ Zenix <zenixls2@gmail.com>
569
+ Zhang Peiyuan <a1286225768@gmail.com>
570
+ Zheng.Deng <32841220+dengzheng-cloud@users.noreply.github.com>
571
+ ZhouYuChen <zhouyuchen@naver.com>
572
+ Ziad Ben Hadj-Alouane <zied.benhadjalouane@gmail.com>
573
+ Ziang Wu <97337387+ZiangWu-77@users.noreply.github.com>
574
+ Zsapi <martin1.zsapka@gmail.com>
575
+ a-n-n-a-l-e-e <150648636+a-n-n-a-l-e-e@users.noreply.github.com>
576
+ adel boussaken <netdur@gmail.com>
577
+ afrideva <95653597+afrideva@users.noreply.github.com>
578
+ agray3 <agray3@users.noreply.github.com>
579
+ akawrykow <142945436+akawrykow@users.noreply.github.com>
580
+ alexpinel <93524949+alexpinel@users.noreply.github.com>
581
+ alonfaraj <alonfaraj@gmail.com>
582
+ alwqx <kenan3015@gmail.com>
583
+ amd-lalithnc <lalithnc@amd.com>
584
+ andrijdavid <david@geek.mg>
585
+ anon998 <131767832+anon998@users.noreply.github.com>
586
+ anzz1 <anzz1@live.com>
587
+ apaz <aarpazdera@gmail.com>
588
+ apcameron <37645737+apcameron@users.noreply.github.com>
589
+ arch-btw <57669023+arch-btw@users.noreply.github.com>
590
+ arcrank <arcrank@gmail.com>
591
+ arlo-phoenix <140345165+arlo-phoenix@users.noreply.github.com>
592
+ at8u <129688334+at8u@users.noreply.github.com>
593
+ automaticcat <daogiatuank54@gmail.com>
594
+ bandoti <141645996+bandoti@users.noreply.github.com>
595
+ beiller <beiller@gmail.com>
596
+ bhubbb <79117352+bhubbb@users.noreply.github.com>
597
+ bmwl <brian.marshall@tolko.com>
598
+ bobqianic <129547291+bobqianic@users.noreply.github.com>
599
+ bryanSwk <93190252+bryanSwk@users.noreply.github.com>
600
+ bsilvereagle <bsilvereagle@users.noreply.github.com>
601
+ bssrdf <merlintiger@hotmail.com>
602
+ byte-6174 <88070277+byte-6174@users.noreply.github.com>
603
+ cebtenzzre <cebtenzzre@gmail.com>
604
+ chaihahaha <chai836275709@gmail.com>
605
+ chiranko <96988916+chiranko@users.noreply.github.com>
606
+ clibdev <52199778+clibdev@users.noreply.github.com>
607
+ clyang <clyang@clyang.net>
608
+ cocktailpeanut <121128867+cocktailpeanut@users.noreply.github.com>
609
+ coezbek <c.oezbek@gmail.com>
610
+ comex <comexk@gmail.com>
611
+ compilade <113953597+compilade@users.noreply.github.com>
612
+ compilade <git@compilade.net>
613
+ cpumaxx <163466046+cpumaxx@users.noreply.github.com>
614
+ crasm <crasm@git.vczf.net>
615
+ crasm <crasm@git.vczf.us>
616
+ daboe01 <daboe01@googlemail.com>
617
+ david raistrick <keen99@users.noreply.github.com>
618
+ ddh0 <dylanhalladay02@icloud.com>
619
+ ddpasa <112642920+ddpasa@users.noreply.github.com>
620
+ deepdiffuser <112834445+deepdiffuser@users.noreply.github.com>
621
+ divinity76 <divinity76@gmail.com>
622
+ dm4 <sunrisedm4@gmail.com>
623
+ dotpy314 <33351922+dotpy314@users.noreply.github.com>
624
+ drbh <david.richard.holtz@gmail.com>
625
+ ds5t5 <145942675+ds5t5@users.noreply.github.com>
626
+ dylan <canardleteer@users.noreply.github.com>
627
+ eastriver <lee@eastriver.dev>
628
+ ebraminio <ebraminio@gmail.com>
629
+ eiery <19350831+eiery@users.noreply.github.com>
630
+ eric8607242 <e0928021388@gmail.com>
631
+ fairydreaming <166155368+fairydreaming@users.noreply.github.com>
632
+ fraxy-v <65565042+fraxy-v@users.noreply.github.com>
633
+ github-actions[bot] <github-actions[bot]@users.noreply.github.com>
634
+ gliptic <gliptic@users.noreply.github.com>
635
+ goerch <jhr.walter@t-online.de>
636
+ grahameth <96447521+grahameth@users.noreply.github.com>
637
+ gwjr <502526+gwjr@users.noreply.github.com>
638
+ h-h-h-h <13482553+h-h-h-h@users.noreply.github.com>
639
+ hankcs <cnhankmc@gmail.com>
640
+ hoangmit <hoangmit@users.noreply.github.com>
641
+ hongbo.mo <352280764@qq.com>
642
+ hopkins385 <98618192+hopkins385@users.noreply.github.com>
643
+ howlger <eclipse@voormann.de>
644
+ howlger <github@voormann.de>
645
+ hutli <6594598+hutli@users.noreply.github.com>
646
+ hutli <hutli@hutli.hu>
647
+ hutli <jensstaermose@hotmail.com>
648
+ hxer7963 <hxer7963@gmail.com>
649
+ hydai <z54981220@gmail.com>
650
+ iSma <ismail.senhaji@gmail.com>
651
+ iacore <74560659+iacore@users.noreply.github.com>
652
+ igarnier <igarnier@protonmail.com>
653
+ intelmatt <61025942+intelmatt@users.noreply.github.com>
654
+ iohub <rickyang.pro@gmail.com>
655
+ jacobi petrucciani <8117202+jpetrucciani@users.noreply.github.com>
656
+ jaime-m-p <167997752+jaime-m-p@users.noreply.github.com>
657
+ jameswu2014 <545426914@qq.com>
658
+ jiez <373447296@qq.com>
659
+ jneem <joeneeman@gmail.com>
660
+ joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
661
+ johnson442 <56517414+johnson442@users.noreply.github.com>
662
+ jojorne <jojorne@users.noreply.github.com>
663
+ jon-chuang <9093549+jon-chuang@users.noreply.github.com>
664
+ jp-x-g <jpxg-dev@protonmail.com>
665
+ jukofyork <69222624+jukofyork@users.noreply.github.com>
666
+ junchao-loongson <68935141+junchao-loongson@users.noreply.github.com>
667
+ jwj7140 <32943891+jwj7140@users.noreply.github.com>
668
+ k.h.lai <adrian.k.h.lai@outlook.com>
669
+ kaizau <kaizau@users.noreply.github.com>
670
+ kalomaze <66376113+kalomaze@users.noreply.github.com>
671
+ kang <tpdns9032100@gmail.com>
672
+ katsu560 <118887472+katsu560@users.noreply.github.com>
673
+ kchro3 <62481661+kchro3@users.noreply.github.com>
674
+ khimaros <me@khimaros.com>
675
+ kiltyj <kiltyj@gmail.com>
676
+ klosax <131523366+klosax@users.noreply.github.com>
677
+ kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com>
678
+ kunnis <kunnis@users.noreply.github.com>
679
+ kuronekosaiko <EvanChanJ@163.com>
680
+ kuvaus <22169537+kuvaus@users.noreply.github.com>
681
+ kwin1412 <42286931+kwin1412@users.noreply.github.com>
682
+ l3utterfly <gc.pthzfoldr@gmail.com>
683
+ ldwang <ftgreat@163.com>
684
+ le.chang <cljs118@126.com>
685
+ leejet <leejet714@gmail.com>
686
+ limitedAtonement <limitedAtonement@users.noreply.github.com>
687
+ liuwei-git <14815172+liuwei-git@users.noreply.github.com>
688
+ lon <114724657+longregen@users.noreply.github.com>
689
+ loonerin <132926317+loonerin@users.noreply.github.com>
690
+ luoyu-intel <yu.luo@intel.com>
691
+ m3ndax <adrian.goessl@outlook.com>
692
+ maddes8cht <55592906+maddes8cht@users.noreply.github.com>
693
+ makomk <makosoft@googlemail.com>
694
+ manikbhandari <mbbhandarimanik2@gmail.com>
695
+ maor-ps <154728172+maor-ps@users.noreply.github.com>
696
+ mdrokz <mohammadmunshi@gmail.com>
697
+ mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com>
698
+ minarchist <minarchist@users.noreply.github.com>
699
+ mj-shifu <77107165+mj-shifu@users.noreply.github.com>
700
+ mmyjona <jonathan.gonse@gmail.com>
701
+ momonga <115213907+mmnga@users.noreply.github.com>
702
+ moritzbrantner <31051084+moritzbrantner@users.noreply.github.com>
703
+ mzcu <milos.cubrilo@gmail.com>
704
+ nanahi <130121847+na-na-hi@users.noreply.github.com>
705
+ ngc92 <7938269+ngc92@users.noreply.github.com>
706
+ nhamanasu <45545786+nhamanasu@users.noreply.github.com>
707
+ niansa/tuxifan <anton-sa@web.de>
708
+ niansa/tuxifan <tuxifan@posteo.de>
709
+ nickp27 <nb.porter@gmail.com>
710
+ ningshanwutuobang <ningshanwutuobang@gmail.com>
711
+ nold <Nold360@users.noreply.github.com>
712
+ nopperl <54780682+nopperl@users.noreply.github.com>
713
+ nusu-github <29514220+nusu-github@users.noreply.github.com>
714
+ olexiyb <olexiyb@gmail.com>
715
+ omahs <73983677+omahs@users.noreply.github.com>
716
+ oobabooga <112222186+oobabooga@users.noreply.github.com>
717
+ opparco <parco.opaai@gmail.com>
718
+ ostix360 <55257054+ostix360@users.noreply.github.com>
719
+ pengxin99 <pengxin.yuan@intel.com>
720
+ perserk <perserk@gmail.com>
721
+ pmysl <piotr.myslinski@outlook.com>
722
+ postmasters <namnguyen@google.com>
723
+ pudepiedj <pudepiedj@gmail.com>
724
+ qingfengfenga <41416092+qingfengfenga@users.noreply.github.com>
725
+ qouoq <qouoq@fastmail.com>
726
+ qunash <anzoria@gmail.com>
727
+ rabidcopy <rabidcopy@yahoo.com>
728
+ rankaiyx <rankaiyx@rankaiyx.com>
729
+ rhjdvsgsgks <26178113+rhjdvsgsgks@users.noreply.github.com>
730
+ rhuddleston <ryan.huddleston@percona.com>
731
+ rimoliga <53384203+rimoliga@users.noreply.github.com>
732
+ runfuture <runfuture@users.noreply.github.com>
733
+ sandyiscool <sandyiscool@gmail.com>
734
+ sasha0552 <admin@sasha0552.org>
735
+ semidark <me@semidark.net>
736
+ sharpHL <132747147+sharpHL@users.noreply.github.com>
737
+ shibe2 <shibe@tuta.io>
738
+ singularity <12184989+singularity-s0@users.noreply.github.com>
739
+ sjinzh <sjinzh@gmail.com>
740
+ sjxx <63994076+ylsdamxssjxxdd@users.noreply.github.com>
741
+ slaren <2141330+slaren@users.noreply.github.com>
742
+ slaren <slarengh@gmail.com>
743
+ snadampal <87143774+snadampal@users.noreply.github.com>
744
+ staviq <staviq@gmail.com>
745
+ stduhpf <stephduh@live.fr>
746
+ strawberrymelonpanda <152940198+strawberrymelonpanda@users.noreply.github.com>
747
+ swittk <switt1995@gmail.com>
748
+ takov751 <40316768+takov751@users.noreply.github.com>
749
+ tarcey <cey.tarik@gmail.com>
750
+ texmex76 <40733439+texmex76@users.noreply.github.com>
751
+ thement <40525767+thement@users.noreply.github.com>
752
+ tjohnman <tjohnman@users.noreply.github.com>
753
+ tslmy <tslmy@users.noreply.github.com>
754
+ ubik2 <ubik2@users.noreply.github.com>
755
+ uint256_t <konndennsa@gmail.com>
756
+ uint256_t <maekawatoshiki1017@gmail.com>
757
+ unbounded <haakon@likedan.net>
758
+ valiray <133289098+valiray@users.noreply.github.com>
759
+ vik <vikhyatk@gmail.com>
760
+ viric <viric@viric.name>
761
+ vodkaslime <646329483@qq.com>
762
+ vvhg1 <94630311+vvhg1@users.noreply.github.com>
763
+ vxiiduu <73044267+vxiiduu@users.noreply.github.com>
764
+ wbpxre150 <100937007+wbpxre150@users.noreply.github.com>
765
+ whoreson <139810751+whoreson@users.noreply.github.com>
766
+ woachk <24752637+woachk@users.noreply.github.com>
767
+ wonjun Jang <strutive07@gmail.com>
768
+ woodx <124784234+woodx9@users.noreply.github.com>
769
+ wzy <32936898+Freed-Wu@users.noreply.github.com>
770
+ xaedes <xaedes@gmail.com>
771
+ xaedes <xaedes@googlemail.com>
772
+ xloem <0xloem@gmail.com>
773
+ yangli2 <yangli2@gmail.com>
774
+ yuiseki <yuiseki@gmail.com>
775
+ zakkor <edward.partenie@gmail.com>
776
+ zhangkaihuo <zhangkaihuo@gmail.com>
777
+ zhouwg <6889919+zhouwg@users.noreply.github.com>
778
+ zhouwg <zhouwg2000@gmail.com>
779
+ zrm <trustiosity.zrm@gmail.com>
780
+ Ștefan-Gabriel Muscalu <legraphista@users.noreply.github.com>
781
+ 源文雨 <41315874+fumiama@users.noreply.github.com>
782
+ Нияз Гарифзянов <112617865+garrnizon@users.noreply.github.com>
CMakeLists.txt ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
2
+ project("llama.cpp" C CXX)
3
+ include(CheckIncludeFileCXX)
4
+
5
+ #set(CMAKE_WARN_DEPRECATED YES)
6
+ set(CMAKE_WARN_UNUSED_CLI YES)
7
+
8
+ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
9
+
10
+ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
11
+ set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
12
+ set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
13
+ endif()
14
+
15
+ # Add path to modules
16
+ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
17
+
18
+ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
19
+
20
+ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
21
+ set(LLAMA_STANDALONE ON)
22
+
23
+ include(git-vars)
24
+
25
+ # configure project version
26
+ # TODO
27
+ else()
28
+ set(LLAMA_STANDALONE OFF)
29
+ endif()
30
+
31
+ if (EMSCRIPTEN)
32
+ set(BUILD_SHARED_LIBS_DEFAULT OFF)
33
+
34
+ option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON)
35
+ else()
36
+ if (MINGW)
37
+ set(BUILD_SHARED_LIBS_DEFAULT OFF)
38
+ else()
39
+ set(BUILD_SHARED_LIBS_DEFAULT ON)
40
+ endif()
41
+ endif()
42
+
43
+ option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
44
+
45
+ if (WIN32)
46
+ add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
47
+ endif()
48
+
49
+ #
50
+ # option list
51
+ #
52
+
53
+ # debug
54
+ option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON)
55
+ option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF)
56
+
57
+ # build
58
+ option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF)
59
+
60
+ # sanitizers
61
+ option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF)
62
+ option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF)
63
+ option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
64
+
65
+ # utils
66
+ option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE})
67
+
68
+ # extra artifacts
69
+ option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
70
+ option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
71
+ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
72
+
73
+ # 3rd party libs
74
+ option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
75
+
76
+ # Required for relocatable CMake package
77
+ include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
78
+
79
+ # override ggml options
80
+ set(GGML_SANITIZE_THREAD ${LLAMA_SANITIZE_THREAD})
81
+ set(GGML_SANITIZE_ADDRESS ${LLAMA_SANITIZE_ADDRESS})
82
+ set(GGML_SANITIZE_UNDEFINED ${LLAMA_SANITIZE_UNDEFINED})
83
+ set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
84
+ set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
85
+
86
+ # change the default for these ggml options
87
+ if (NOT DEFINED GGML_LLAMAFILE)
88
+ set(GGML_LLAMAFILE_DEFAULT ON)
89
+ endif()
90
+
91
+ if (NOT DEFINED GGML_AMX)
92
+ set(GGML_AMX ON)
93
+ endif()
94
+
95
+ if (NOT DEFINED GGML_CUDA_GRAPHS)
96
+ set(GGML_CUDA_GRAPHS_DEFAULT ON)
97
+ endif()
98
+
99
+ # transition helpers
100
+ function (llama_option_depr TYPE OLD NEW)
101
+ if (${OLD})
102
+ message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
103
+ set(${NEW} ON PARENT_SCOPE)
104
+ endif()
105
+ endfunction()
106
+
107
+ llama_option_depr(FATAL_ERROR LLAMA_CUBLAS GGML_CUDA)
108
+ llama_option_depr(WARNING LLAMA_CUDA GGML_CUDA)
109
+ llama_option_depr(WARNING LLAMA_KOMPUTE GGML_KOMPUTE)
110
+ llama_option_depr(WARNING LLAMA_METAL GGML_METAL)
111
+ llama_option_depr(WARNING LLAMA_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
112
+ llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE)
113
+ llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
114
+ llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
115
+ llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
116
+ llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
117
+
118
+ #
119
+ # build the library
120
+ #
121
+
122
+ if (NOT TARGET ggml)
123
+ add_subdirectory(ggml)
124
+ # ... otherwise assume ggml is added by a parent CMakeLists.txt
125
+ endif()
126
+ add_subdirectory(src)
127
+
128
+ #
129
+ # install
130
+ #
131
+
132
+ include(GNUInstallDirs)
133
+ include(CMakePackageConfigHelpers)
134
+
135
+ set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
136
+ set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
137
+ set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
138
+
139
+ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
140
+ set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
141
+ set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
142
+
143
+
144
+ # At the moment some compile definitions are placed within the ggml/src
145
+ # directory but not exported on the `ggml` target. This could be improved by
146
+ # determining _precisely_ which defines are necessary for the llama-config
147
+ # package.
148
+ #
149
+ set(GGML_TRANSIENT_DEFINES)
150
+ get_target_property(GGML_DIRECTORY ggml SOURCE_DIR)
151
+ get_directory_property(GGML_DIR_DEFINES DIRECTORY ${GGML_DIRECTORY} COMPILE_DEFINITIONS)
152
+ if (GGML_DIR_DEFINES)
153
+ list(APPEND GGML_TRANSIENT_DEFINES ${GGML_DIR_DEFINES})
154
+ endif()
155
+ get_target_property(GGML_TARGET_DEFINES ggml COMPILE_DEFINITIONS)
156
+ if (GGML_TARGET_DEFINES)
157
+ list(APPEND GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES})
158
+ endif()
159
+ get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)
160
+
161
+ set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h)
162
+ install(TARGETS llama LIBRARY PUBLIC_HEADER)
163
+
164
+ configure_package_config_file(
165
+ ${CMAKE_CURRENT_SOURCE_DIR}/cmake/llama-config.cmake.in
166
+ ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
167
+ INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama
168
+ PATH_VARS LLAMA_INCLUDE_INSTALL_DIR
169
+ LLAMA_LIB_INSTALL_DIR
170
+ LLAMA_BIN_INSTALL_DIR )
171
+
172
+ write_basic_package_version_file(
173
+ ${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake
174
+ VERSION ${LLAMA_INSTALL_VERSION}
175
+ COMPATIBILITY SameMajorVersion)
176
+
177
+ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake
178
+ ${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake
179
+ DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama)
180
+
181
+ install(
182
+ FILES convert_hf_to_gguf.py
183
+ PERMISSIONS
184
+ OWNER_READ
185
+ OWNER_WRITE
186
+ OWNER_EXECUTE
187
+ GROUP_READ
188
+ GROUP_EXECUTE
189
+ WORLD_READ
190
+ WORLD_EXECUTE
191
+ DESTINATION ${CMAKE_INSTALL_BINDIR})
192
+
193
+ configure_file(cmake/llama.pc.in
194
+ "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
195
+ @ONLY)
196
+
197
+ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
198
+ DESTINATION lib/pkgconfig)
199
+
200
+ #
201
+ # utils, programs, examples and tests
202
+ #
203
+
204
+ if (LLAMA_BUILD_COMMON)
205
+ add_subdirectory(common)
206
+ endif()
207
+
208
+ if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
209
+ include(CTest)
210
+ add_subdirectory(tests)
211
+ endif()
212
+
213
+ if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
214
+ add_subdirectory(examples)
215
+ add_subdirectory(pocs)
216
+ endif()
CMakePresets.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": 4,
3
+ "configurePresets": [
4
+ {
5
+ "name": "base",
6
+ "hidden": true,
7
+ "generator": "Ninja",
8
+ "binaryDir": "${sourceDir}/build-${presetName}",
9
+ "cacheVariables": {
10
+ "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
11
+ "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
12
+ }
13
+ },
14
+ {
15
+ "name": "sycl-base",
16
+ "hidden": true,
17
+ "generator": "Ninja",
18
+ "binaryDir": "${sourceDir}/build-${presetName}",
19
+ "cacheVariables": {
20
+ "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
21
+ "CMAKE_CXX_COMPILER": "icx",
22
+ "CMAKE_C_COMPILER": "cl",
23
+ "GGML_SYCL": "ON",
24
+ "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
25
+ }
26
+ },
27
+ { "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
28
+ { "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
29
+ { "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
30
+ { "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } },
31
+ { "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } },
32
+
33
+ {
34
+ "name": "arm64-windows-msvc", "hidden": true,
35
+ "architecture": { "value": "arm64", "strategy": "external" },
36
+ "toolset": { "value": "host=x64", "strategy": "external" },
37
+ "cacheVariables": {
38
+ "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-msvc.cmake"
39
+ }
40
+ },
41
+
42
+ {
43
+ "name": "arm64-windows-llvm", "hidden": true,
44
+ "architecture": { "value": "arm64", "strategy": "external" },
45
+ "toolset": { "value": "host=x64", "strategy": "external" },
46
+ "cacheVariables": {
47
+ "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake"
48
+ }
49
+ },
50
+
51
+ {
52
+ "name": "arm64-apple-clang", "hidden": true,
53
+ "architecture": { "value": "arm64", "strategy": "external" },
54
+ "toolset": { "value": "host=x64", "strategy": "external" },
55
+ "cacheVariables": {
56
+ "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake"
57
+ }
58
+ },
59
+
60
+ { "name": "arm64-windows-llvm-debug" , "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
61
+ { "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
62
+ { "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
63
+
64
+ { "name": "arm64-apple-clang-debug" , "inherits": [ "base", "arm64-apple-clang", "debug" ] },
65
+ { "name": "arm64-apple-clang-release" , "inherits": [ "base", "arm64-apple-clang", "reldbg" ] },
66
+ { "name": "arm64-apple-clang+static-release" , "inherits": [ "base", "arm64-apple-clang", "reldbg", "static" ] },
67
+
68
+ { "name": "arm64-windows-msvc-debug" , "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
69
+ { "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] },
70
+ { "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] },
71
+
72
+ { "name": "x64-windows-msvc-debug" , "inherits": [ "base", "debug" ] },
73
+ { "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
74
+ { "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
75
+
76
+ { "name": "x64-windows-sycl-debug" , "inherits": [ "sycl-base", "debug" ] },
77
+ { "name": "x64-windows-sycl-debug-f16", "inherits": [ "sycl-base", "debug", "sycl_f16" ] },
78
+ { "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] },
79
+ { "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] }
80
+ ]
81
+ }
CONTRIBUTING.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Pull requests (for contributors)
2
+
3
+ - Test your changes:
4
+ - Using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the `ggml` library
5
+ - Execute [the full CI locally on your machine](ci/README.md) before publishing
6
+ - Optionally rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs
7
+ - Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly
8
+ - If your PR becomes stale, don't hesitate to ping the maintainers in the comments
9
+
10
+ # Pull requests (for collaborators)
11
+
12
+ - Squash-merge PRs
13
+ - Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
14
+ - Optionally pick a `<module>` from here: https://github.com/ggerganov/llama.cpp/wiki/Modules
15
+
16
+ # Coding guidelines
17
+
18
+ - Avoid adding third-party dependencies, extra files, extra headers, etc.
19
+ - Always consider cross-compatibility with other operating systems and architectures
20
+ - Avoid fancy-looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple
21
+ - There are no strict rules for the code style, but try to follow the patterns in the code (indentation, spaces, etc.). Vertical alignment makes things more readable and easier to batch edit
22
+ - Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a`
23
+ - Naming usually optimizes for common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963)
24
+ - Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
25
+ - Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$
26
+
27
+ ![matmul](media/matmul.png)
28
+
29
+ # Resources
30
+
31
+ The Github issues, PRs and discussions contain a lot of information that can be useful to get familiar with the codebase. For convenience, some of the more important information is referenced from Github projects:
32
+
33
+ https://github.com/ggerganov/llama.cpp/projects
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023-2024 The ggml authors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
Makefile ADDED
@@ -0,0 +1,1702 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Define the default target now so that it is always the first target
2
+ BUILD_TARGETS = \
3
+ libllava.a \
4
+ llama-batched \
5
+ llama-batched-bench \
6
+ llama-bench \
7
+ llama-cli \
8
+ llama-convert-llama2c-to-ggml \
9
+ llama-embedding \
10
+ llama-eval-callback \
11
+ llama-export-lora \
12
+ llama-gbnf-validator \
13
+ llama-gguf \
14
+ llama-gguf-hash \
15
+ llama-gguf-split \
16
+ llama-gritlm \
17
+ llama-imatrix \
18
+ llama-infill \
19
+ llama-llava-cli \
20
+ llama-minicpmv-cli\
21
+ llama-lookahead \
22
+ llama-lookup \
23
+ llama-lookup-create \
24
+ llama-lookup-merge \
25
+ llama-lookup-stats \
26
+ llama-parallel \
27
+ llama-passkey \
28
+ llama-perplexity \
29
+ llama-q8dot \
30
+ llama-quantize \
31
+ llama-quantize-stats \
32
+ llama-retrieval \
33
+ llama-save-load-state \
34
+ llama-server \
35
+ llama-simple \
36
+ llama-simple-chat \
37
+ llama-speculative \
38
+ llama-tokenize \
39
+ llama-vdot \
40
+ llama-cvector-generator \
41
+ llama-gen-docs \
42
+ tests/test-c.o
43
+
44
+ # Binaries only useful for tests
45
+ TEST_TARGETS = \
46
+ tests/test-arg-parser \
47
+ tests/test-autorelease \
48
+ tests/test-backend-ops \
49
+ tests/test-chat-template \
50
+ tests/test-double-float \
51
+ tests/test-grad0 \
52
+ tests/test-grammar-integration \
53
+ tests/test-grammar-parser \
54
+ tests/test-json-schema-to-grammar \
55
+ tests/test-llama-grammar \
56
+ tests/test-log \
57
+ tests/test-model-load-cancel \
58
+ tests/test-quantize-fns \
59
+ tests/test-quantize-perf \
60
+ tests/test-rope \
61
+ tests/test-sampling \
62
+ tests/test-tokenizer-0 \
63
+ tests/test-tokenizer-1-bpe \
64
+ tests/test-tokenizer-1-spm
65
+ # tests/test-opt \
66
+
67
+ # Legacy build targets that were renamed in #7809, but should still be removed when the project is cleaned
68
+ LEGACY_TARGETS_CLEAN = main quantize quantize-stats perplexity imatrix embedding vdot q8dot convert-llama2c-to-ggml \
69
+ simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \
70
+ retrieval speculative infill tokenize parallel export-lora lookahead lookup passkey gritlm
71
+
72
+ # Legacy build targets that were renamed in #7809, but we want to build binaries that for them that output a deprecation warning if people try to use them.
73
+ # We don't want to clutter things too much, so we only build replacements for the most commonly used binaries.
74
+ LEGACY_TARGETS_BUILD = main quantize perplexity embedding server
75
+
76
+ # Deprecation aliases
77
+ ifdef LLAMA_CUBLAS
78
+ $(error LLAMA_CUBLAS is removed. Use GGML_CUDA instead.)
79
+ endif
80
+
81
+ ifdef LLAMA_CUDA
82
+ GGML_CUDA := 1
83
+ DEPRECATE_WARNING := 1
84
+ endif
85
+
86
+ ifdef LLAMA_KOMPUTE
87
+ GGML_KOMPUTE := 1
88
+ DEPRECATE_WARNING := 1
89
+ endif
90
+
91
+ ifdef LLAMA_METAL
92
+ GGML_METAL := 1
93
+ DEPRECATE_WARNING := 1
94
+ endif
95
+
96
+ ifdef LLAMA_RPC
97
+ GGML_RPC := 1
98
+ DEPRECATE_WARNING := 1
99
+ endif
100
+
101
+ ifdef LLAMA_SYCL
102
+ GGML_SYCL := 1
103
+ DEPRECATE_WARNING := 1
104
+ endif
105
+
106
+ ifdef LLAMA_SYCL_F16
107
+ GGML_SYCL_F16 := 1
108
+ DEPRECATE_WARNING := 1
109
+ endif
110
+
111
+ ifdef LLAMA_OPENBLAS
112
+ GGML_OPENBLAS := 1
113
+ DEPRECATE_WARNING := 1
114
+ endif
115
+
116
+ ifdef LLAMA_OPENBLAS64
117
+ GGML_OPENBLAS64 := 1
118
+ DEPRECATE_WARNING := 1
119
+ endif
120
+
121
+ ifdef LLAMA_BLIS
122
+ GGML_BLIS := 1
123
+ DEPRECATE_WARNING := 1
124
+ endif
125
+
126
+ ifdef LLAMA_NO_LLAMAFILE
127
+ GGML_NO_LLAMAFILE := 1
128
+ DEPRECATE_WARNING := 1
129
+ endif
130
+
131
+ ifdef LLAMA_NO_ACCELERATE
132
+ GGML_NO_ACCELERATE := 1
133
+ DEPRECATE_WARNING := 1
134
+ endif
135
+
136
+ ifdef LLAMA_NO_OPENMP
137
+ GGML_NO_OPENMP := 1
138
+ DEPRECATE_WARNING := 1
139
+ endif
140
+
141
+ ifdef LLAMA_NO_METAL
142
+ GGML_NO_METAL := 1
143
+ DEPRECATE_WARNING := 1
144
+ endif
145
+
146
+ ifdef LLAMA_DISABLE_LOGS
147
+ REMOVE_WARNING := 1
148
+ endif
149
+
150
+ ifdef LLAMA_SERVER_VERBOSE
151
+ REMOVE_WARNING := 1
152
+ endif
153
+
154
+ ifndef UNAME_S
155
+ UNAME_S := $(shell uname -s)
156
+ endif
157
+
158
+ ifndef UNAME_P
159
+ UNAME_P := $(shell uname -p)
160
+ endif
161
+
162
+ ifndef UNAME_M
163
+ UNAME_M := $(shell uname -m)
164
+ endif
165
+
166
+ # In GNU make default CXX is g++ instead of c++. Let's fix that so that users
167
+ # of non-gcc compilers don't have to provide g++ alias or wrapper.
168
+ DEFCC := cc
169
+ DEFCXX := c++
170
+ ifeq ($(origin CC),default)
171
+ CC := $(DEFCC)
172
+ endif
173
+ ifeq ($(origin CXX),default)
174
+ CXX := $(DEFCXX)
175
+ endif
176
+
177
+ # Mac OS + Arm can report x86_64
178
+ # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
179
+ ifeq ($(UNAME_S),Darwin)
180
+ ifndef GGML_NO_METAL
181
+ GGML_METAL := 1
182
+ endif
183
+
184
+ GGML_NO_OPENMP := 1
185
+
186
+ ifneq ($(UNAME_P),arm)
187
+ SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
188
+ ifeq ($(SYSCTL_M),1)
189
+ # UNAME_P := arm
190
+ # UNAME_M := arm64
191
+ warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
192
+ endif
193
+ endif
194
+ endif
195
+
196
+ ifdef GGML_METAL
197
+ GGML_METAL_EMBED_LIBRARY := 1
198
+ endif
199
+
200
+ ifdef GGML_RPC
201
+ BUILD_TARGETS += rpc-server
202
+ endif
203
+
204
+ ifdef GGML_VULKAN
205
+ BUILD_TARGETS += vulkan-shaders-gen
206
+ endif
207
+
208
+ default: $(BUILD_TARGETS) $(LEGACY_TARGETS_BUILD)
209
+
210
+ test: $(TEST_TARGETS)
211
+ @failures=0; \
212
+ for test_target in $(TEST_TARGETS); do \
213
+ if [ "$$test_target" = "tests/test-tokenizer-0" ]; then \
214
+ ./$$test_target $(CURDIR)/models/ggml-vocab-llama-spm.gguf; \
215
+ ./$$test_target $(CURDIR)/models/ggml-vocab-llama-bpe.gguf; \
216
+ ./$$test_target $(CURDIR)/models/ggml-vocab-phi-3.gguf; \
217
+ ./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
218
+ ./$$test_target $(CURDIR)/models/ggml-vocab-bert-bge.gguf; \
219
+ ./$$test_target $(CURDIR)/models/ggml-vocab-starcoder.gguf; \
220
+ ./$$test_target $(CURDIR)/models/ggml-vocab-gpt-2.gguf; \
221
+ ./$$test_target $(CURDIR)/models/ggml-vocab-refact.gguf; \
222
+ elif [ "$$test_target" = "tests/test-tokenizer-1-spm" ]; then \
223
+ continue; \
224
+ elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
225
+ continue; \
226
+ else \
227
+ echo "Running test $$test_target..."; \
228
+ ./$$test_target; \
229
+ fi; \
230
+ if [ $$? -ne 0 ]; then \
231
+ printf 'Test %s FAILED!\n\n' $$test_target; \
232
+ failures=$$(( failures + 1 )); \
233
+ else \
234
+ printf 'Test %s passed.\n\n' $$test_target; \
235
+ fi; \
236
+ done; \
237
+ if [ $$failures -gt 0 ]; then \
238
+ printf '\n%s tests failed.\n' $$failures; \
239
+ exit 1; \
240
+ fi
241
+ @echo 'All tests passed.'
242
+
243
+ all: $(BUILD_TARGETS) $(TEST_TARGETS) $(LEGACY_TARGETS_BUILD)
244
+
245
+ ifdef RISCV_CROSS_COMPILE
246
+ CC := riscv64-unknown-linux-gnu-gcc
247
+ CXX := riscv64-unknown-linux-gnu-g++
248
+ endif
249
+
250
+ #
251
+ # Compile flags
252
+ #
253
+
254
+ # keep standard at C11 and C++11
255
+ MK_CPPFLAGS = -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon
256
+ MK_CFLAGS = -std=c11 -fPIC
257
+ MK_CXXFLAGS = -std=c++11 -fPIC
258
+ MK_NVCCFLAGS = -std=c++11
259
+
260
+ ifdef LLAMA_NO_CCACHE
261
+ GGML_NO_CCACHE := 1
262
+ DEPRECATE_WARNING := 1
263
+ endif
264
+
265
+ ifndef GGML_NO_CCACHE
266
+ CCACHE := $(shell which ccache)
267
+ ifdef CCACHE
268
+ export CCACHE_SLOPPINESS = time_macros
269
+ $(info I ccache found, compilation results will be cached. Disable with GGML_NO_CCACHE.)
270
+ CC := $(CCACHE) $(CC)
271
+ CXX := $(CCACHE) $(CXX)
272
+ else
273
+ $(info I ccache not found. Consider installing it for faster compilation.)
274
+ endif # CCACHE
275
+ endif # GGML_NO_CCACHE
276
+
277
+ # clock_gettime came in POSIX.1b (1993)
278
+ # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
279
+ # posix_memalign came in POSIX.1-2001 / SUSv3
280
+ # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
281
+ MK_CPPFLAGS += -D_XOPEN_SOURCE=600
282
+
283
+ # Somehow in OpenBSD whenever POSIX conformance is specified
284
+ # some string functions rely on locale_t availability,
285
+ # which was introduced in POSIX.1-2008, forcing us to go higher
286
+ ifeq ($(UNAME_S),OpenBSD)
287
+ MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700
288
+ endif
289
+
290
+ # Data types, macros and functions related to controlling CPU affinity and
291
+ # some memory allocation are available on Linux through GNU extensions in libc
292
+ ifeq ($(UNAME_S),Linux)
293
+ MK_CPPFLAGS += -D_GNU_SOURCE
294
+ endif
295
+
296
+ # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
297
+ # and on macOS its availability depends on enabling Darwin extensions
298
+ # similarly on DragonFly, enabling BSD extensions is necessary
299
+ ifeq ($(UNAME_S),Darwin)
300
+ MK_CPPFLAGS += -D_DARWIN_C_SOURCE
301
+ endif
302
+ ifeq ($(UNAME_S),DragonFly)
303
+ MK_CPPFLAGS += -D__BSD_VISIBLE
304
+ endif
305
+
306
+ # alloca is a non-standard interface that is not visible on BSDs when
307
+ # POSIX conformance is specified, but not all of them provide a clean way
308
+ # to enable it in such cases
309
+ ifeq ($(UNAME_S),FreeBSD)
310
+ MK_CPPFLAGS += -D__BSD_VISIBLE
311
+ endif
312
+ ifeq ($(UNAME_S),NetBSD)
313
+ MK_CPPFLAGS += -D_NETBSD_SOURCE
314
+ endif
315
+ ifeq ($(UNAME_S),OpenBSD)
316
+ MK_CPPFLAGS += -D_BSD_SOURCE
317
+ endif
318
+
319
+ ifdef GGML_SCHED_MAX_COPIES
320
+ MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(GGML_SCHED_MAX_COPIES)
321
+ endif
322
+
323
+ ifdef LLAMA_DEBUG
324
+ MK_CFLAGS += -O0 -g
325
+ MK_CXXFLAGS += -O0 -g
326
+ MK_LDFLAGS += -g
327
+ MK_NVCCFLAGS += -O0 -g
328
+
329
+ ifeq ($(UNAME_S),Linux)
330
+ MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
331
+ endif
332
+ else
333
+ MK_CPPFLAGS += -DNDEBUG
334
+ MK_CFLAGS += -O3 -g
335
+ MK_CXXFLAGS += -O3 -g
336
+ MK_NVCCFLAGS += -O3 -g
337
+ endif
338
+
339
+ ifdef LLAMA_SANITIZE_THREAD
340
+ MK_CFLAGS += -fsanitize=thread -g
341
+ MK_CXXFLAGS += -fsanitize=thread -g
342
+ MK_LDFLAGS += -fsanitize=thread -g
343
+ endif
344
+
345
+ ifdef LLAMA_SANITIZE_ADDRESS
346
+ MK_CFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
347
+ MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
348
+ MK_LDFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
349
+ endif
350
+
351
+ ifdef LLAMA_SANITIZE_UNDEFINED
352
+ MK_CFLAGS += -fsanitize=undefined -g
353
+ MK_CXXFLAGS += -fsanitize=undefined -g
354
+ MK_LDFLAGS += -fsanitize=undefined -g
355
+ endif
356
+
357
+ ifdef LLAMA_SERVER_SSL
358
+ MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
359
+ MK_LDFLAGS += -lssl -lcrypto
360
+ endif
361
+
362
+ # warnings
363
+ WARN_FLAGS = \
364
+ -Wall \
365
+ -Wextra \
366
+ -Wpedantic \
367
+ -Wcast-qual \
368
+ -Wno-unused-function
369
+
370
+ MK_CFLAGS += \
371
+ $(WARN_FLAGS) \
372
+ -Wshadow \
373
+ -Wstrict-prototypes \
374
+ -Wpointer-arith \
375
+ -Wmissing-prototypes \
376
+ -Werror=implicit-int \
377
+ -Werror=implicit-function-declaration
378
+
379
+ MK_CXXFLAGS += \
380
+ $(WARN_FLAGS) \
381
+ -Wmissing-declarations \
382
+ -Wmissing-noreturn
383
+
384
+ ifeq ($(LLAMA_FATAL_WARNINGS),1)
385
+ MK_CFLAGS += -Werror
386
+ MK_CXXFLAGS += -Werror
387
+ endif
388
+
389
+ # this version of Apple ld64 is buggy
390
+ ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))'
391
+ MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER
392
+ endif
393
+
394
+ # OS specific
395
+ # TODO: support Windows
396
+ ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)'
397
+ MK_CFLAGS += -pthread
398
+ MK_CXXFLAGS += -pthread
399
+ endif
400
+
401
+ # detect Windows
402
+ ifneq ($(findstring _NT,$(UNAME_S)),)
403
+ _WIN32 := 1
404
+ endif
405
+
406
+ # library name prefix
407
+ ifneq ($(_WIN32),1)
408
+ LIB_PRE := lib
409
+ endif
410
+
411
+ # Dynamic Shared Object extension
412
+ ifneq ($(_WIN32),1)
413
+ DSO_EXT := .so
414
+ else
415
+ DSO_EXT := .dll
416
+ endif
417
+
418
+ # Windows Sockets 2 (Winsock) for network-capable apps
419
+ ifeq ($(_WIN32),1)
420
+ LWINSOCK2 := -lws2_32
421
+ endif
422
+
423
+ ifdef LLAMA_GPROF
424
+ MK_CFLAGS += -pg
425
+ MK_CXXFLAGS += -pg
426
+ endif
427
+
428
+ # Architecture specific
429
+ # TODO: probably these flags need to be tweaked on some architectures
430
+ # feel free to update the Makefile for your architecture and send a pull request or issue
431
+
432
+ ifndef RISCV_CROSS_COMPILE
433
+
434
+ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
435
+ # Use all CPU extensions that are available:
436
+ MK_CFLAGS += -march=native -mtune=native
437
+ HOST_CXXFLAGS += -march=native -mtune=native
438
+
439
+ # Usage AVX-only
440
+ #MK_CFLAGS += -mfma -mf16c -mavx
441
+ #MK_CXXFLAGS += -mfma -mf16c -mavx
442
+
443
+ # Usage SSSE3-only (Not is SSE3!)
444
+ #MK_CFLAGS += -mssse3
445
+ #MK_CXXFLAGS += -mssse3
446
+ endif
447
+
448
+ ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))'
449
+ # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves.
450
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412
451
+ # https://github.com/ggerganov/llama.cpp/issues/2922
452
+ MK_CFLAGS += -Xassembler -muse-unaligned-vector-move
453
+ MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move
454
+
455
+ # Target Windows 8 for PrefetchVirtualMemory
456
+ MK_CPPFLAGS += -D_WIN32_WINNT=0x602
457
+ endif
458
+
459
+ ifneq ($(filter aarch64%,$(UNAME_M)),)
460
+ # Apple M1, M2, etc.
461
+ # Raspberry Pi 3, 4, Zero 2 (64-bit)
462
+ # Nvidia Jetson
463
+ MK_CFLAGS += -mcpu=native
464
+ MK_CXXFLAGS += -mcpu=native
465
+ JETSON_RELEASE_INFO = $(shell jetson_release)
466
+ ifdef JETSON_RELEASE_INFO
467
+ ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),)
468
+ JETSON_EOL_MODULE_DETECT = 1
469
+ CC = aarch64-unknown-linux-gnu-gcc
470
+ cxx = aarch64-unknown-linux-gnu-g++
471
+ endif
472
+ endif
473
+ endif
474
+
475
+ ifneq ($(filter armv6%,$(UNAME_M)),)
476
+ # Raspberry Pi 1, Zero
477
+ MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
478
+ MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
479
+ endif
480
+
481
+ ifneq ($(filter armv7%,$(UNAME_M)),)
482
+ # Raspberry Pi 2
483
+ MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
484
+ MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
485
+ endif
486
+
487
+ ifneq ($(filter armv8%,$(UNAME_M)),)
488
+ # Raspberry Pi 3, 4, Zero 2 (32-bit)
489
+ MK_CFLAGS += -mfp16-format=ieee -mno-unaligned-access
490
+ MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access
491
+ endif
492
+
493
+ ifneq ($(filter ppc64%,$(UNAME_M)),)
494
+ POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
495
+ ifneq (,$(findstring POWER9,$(POWER9_M)))
496
+ MK_CFLAGS += -mcpu=power9
497
+ MK_CXXFLAGS += -mcpu=power9
498
+ endif
499
+ endif
500
+
501
+ ifneq ($(filter ppc64le%,$(UNAME_M)),)
502
+ MK_CFLAGS += -mcpu=powerpc64le
503
+ MK_CXXFLAGS += -mcpu=powerpc64le
504
+ CUDA_POWER_ARCH = 1
505
+ endif
506
+
507
+ ifneq ($(filter loongarch64%,$(UNAME_M)),)
508
+ MK_CFLAGS += -mlasx
509
+ MK_CXXFLAGS += -mlasx
510
+ endif
511
+
512
+ ifneq ($(filter riscv64%,$(UNAME_M)),)
513
+ MK_CFLAGS += -march=rv64gcv -mabi=lp64d
514
+ MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
515
+ endif
516
+
517
+ else # RISC-V CROSS COMPILATION
518
+ MK_CFLAGS += -march=rv64gcv -mabi=lp64d
519
+ MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
520
+ endif
521
+
522
+ ifndef GGML_NO_ACCELERATE
523
+ # Mac OS - include Accelerate framework.
524
+ # `-framework Accelerate` works both with Apple Silicon and Mac Intel
525
+ ifeq ($(UNAME_S),Darwin)
526
+ MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
527
+ MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
528
+ MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
529
+ MK_LDFLAGS += -framework Accelerate
530
+ OBJ_GGML += ggml/src/ggml-blas.o
531
+ endif
532
+ endif # GGML_NO_ACCELERATE
533
+
534
+ ifdef GGML_MUSA
535
+ CC := clang
536
+ CXX := clang++
537
+ GGML_CUDA := 1
538
+ MK_CPPFLAGS += -DGGML_USE_MUSA
539
+ endif
540
+
541
+ ifndef GGML_NO_OPENMP
542
+ MK_CPPFLAGS += -DGGML_USE_OPENMP
543
+ MK_CFLAGS += -fopenmp
544
+ MK_CXXFLAGS += -fopenmp
545
+ ifdef GGML_MUSA
546
+ MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp
547
+ MK_LDFLAGS += -L/usr/lib/llvm-10/lib
548
+ endif # GGML_MUSA
549
+ endif # GGML_NO_OPENMP
550
+
551
+ ifdef GGML_OPENBLAS
552
+ MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
553
+ MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
554
+ MK_LDFLAGS += $(shell pkg-config --libs openblas)
555
+ OBJ_GGML += ggml/src/ggml-blas.o
556
+ endif # GGML_OPENBLAS
557
+
558
+ ifdef GGML_OPENBLAS64
559
+ MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
560
+ MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
561
+ MK_LDFLAGS += $(shell pkg-config --libs openblas64)
562
+ OBJ_GGML += ggml/src/ggml-blas.o
563
+ endif # GGML_OPENBLAS64
564
+
565
+ ifdef GGML_BLIS
566
+ MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_BLIS -I/usr/local/include/blis -I/usr/include/blis
567
+ MK_LDFLAGS += -lblis -L/usr/local/lib
568
+ OBJ_GGML += ggml/src/ggml-blas.o
569
+ endif # GGML_BLIS
570
+
571
+ ifdef GGML_NVPL
572
+ MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_NVPL -DNVPL_ILP64 -I/usr/local/include/nvpl_blas -I/usr/include/nvpl_blas
573
+ MK_LDFLAGS += -L/usr/local/lib -lnvpl_blas_core -lnvpl_blas_ilp64_gomp
574
+ OBJ_GGML += ggml/src/ggml-blas.o
575
+ endif # GGML_NVPL
576
+
577
+ ifndef GGML_NO_LLAMAFILE
578
+ MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
579
+ OBJ_GGML += ggml/src/llamafile/sgemm.o
580
+ endif
581
+
582
+ ifndef GGML_NO_AMX
583
+ MK_CPPFLAGS += -DGGML_USE_AMX
584
+ OBJ_GGML += ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o
585
+ endif
586
+
587
+ ifdef GGML_RPC
588
+ MK_CPPFLAGS += -DGGML_USE_RPC
589
+ OBJ_GGML += ggml/src/ggml-rpc.o
590
+ endif # GGML_RPC
591
+
592
+ OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu))
593
+ OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu))
594
+
595
+ ifdef GGML_CUDA_FA_ALL_QUANTS
596
+ OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*.cu))
597
+ else
598
+ OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
599
+ OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
600
+ OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
601
+ endif # GGML_CUDA_FA_ALL_QUANTS
602
+
603
+ ifdef GGML_CUDA
604
+ ifdef GGML_MUSA
605
+ ifneq ('', '$(wildcard /opt/musa)')
606
+ CUDA_PATH ?= /opt/musa
607
+ else
608
+ CUDA_PATH ?= /usr/local/musa
609
+ endif
610
+
611
+ MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include
612
+ MK_LDFLAGS += -lmusa -lmublas -lmusart -lpthread -ldl -lrt -L$(CUDA_PATH)/lib -L/usr/lib64
613
+ MK_NVCCFLAGS += -x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22
614
+ else
615
+ ifneq ('', '$(wildcard /opt/cuda)')
616
+ CUDA_PATH ?= /opt/cuda
617
+ else
618
+ CUDA_PATH ?= /usr/local/cuda
619
+ endif
620
+
621
+ MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
622
+ MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
623
+ MK_NVCCFLAGS += -use_fast_math
624
+ endif # GGML_MUSA
625
+
626
+ OBJ_GGML += ggml/src/ggml-cuda.o
627
+ OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
628
+ OBJ_GGML += $(OBJ_CUDA_TMPL)
629
+
630
+ ifdef LLAMA_FATAL_WARNINGS
631
+ MK_NVCCFLAGS += -Werror all-warnings
632
+ endif # LLAMA_FATAL_WARNINGS
633
+
634
+ ifndef GGML_MUSA
635
+ ifndef JETSON_EOL_MODULE_DETECT
636
+ MK_NVCCFLAGS += --forward-unknown-to-host-compiler
637
+ endif # JETSON_EOL_MODULE_DETECT
638
+ endif # GGML_MUSA
639
+
640
+ ifdef LLAMA_DEBUG
641
+ MK_NVCCFLAGS += -lineinfo
642
+ endif # LLAMA_DEBUG
643
+
644
+ ifdef GGML_CUDA_DEBUG
645
+ MK_NVCCFLAGS += --device-debug
646
+ endif # GGML_CUDA_DEBUG
647
+
648
+ ifdef GGML_CUDA_NVCC
649
+ NVCC = $(CCACHE) $(GGML_CUDA_NVCC)
650
+ else
651
+ ifdef GGML_MUSA
652
+ NVCC = $(CCACHE) mcc
653
+ else
654
+ NVCC = $(CCACHE) nvcc
655
+ endif # GGML_MUSA
656
+ endif # GGML_CUDA_NVCC
657
+
658
+ ifdef CUDA_DOCKER_ARCH
659
+ MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
660
+ else ifndef CUDA_POWER_ARCH
661
+ MK_NVCCFLAGS += -arch=native
662
+ endif # CUDA_DOCKER_ARCH
663
+
664
+ ifdef GGML_CUDA_FORCE_DMMV
665
+ MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
666
+ endif # GGML_CUDA_FORCE_DMMV
667
+
668
+ ifdef GGML_CUDA_FORCE_MMQ
669
+ MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
670
+ endif # GGML_CUDA_FORCE_MMQ
671
+
672
+ ifdef GGML_CUDA_FORCE_CUBLAS
673
+ MK_NVCCFLAGS += -DGGML_CUDA_FORCE_CUBLAS
674
+ endif # GGML_CUDA_FORCE_CUBLAS
675
+
676
+ ifdef GGML_CUDA_DMMV_X
677
+ MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(GGML_CUDA_DMMV_X)
678
+ else
679
+ MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
680
+ endif # GGML_CUDA_DMMV_X
681
+
682
+ ifdef GGML_CUDA_MMV_Y
683
+ MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_MMV_Y)
684
+ else ifdef GGML_CUDA_DMMV_Y
685
+ MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_DMMV_Y) # for backwards compatibility
686
+ else
687
+ MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
688
+ endif # GGML_CUDA_MMV_Y
689
+
690
+ ifdef GGML_CUDA_F16
691
+ MK_NVCCFLAGS += -DGGML_CUDA_F16
692
+ endif # GGML_CUDA_F16
693
+
694
+ ifdef GGML_CUDA_DMMV_F16
695
+ MK_NVCCFLAGS += -DGGML_CUDA_F16
696
+ endif # GGML_CUDA_DMMV_F16
697
+
698
+ ifdef GGML_CUDA_KQUANTS_ITER
699
+ MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(GGML_CUDA_KQUANTS_ITER)
700
+ else
701
+ MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
702
+ endif
703
+
704
+ ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE
705
+ MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE)
706
+ else
707
+ MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
708
+ endif # GGML_CUDA_PEER_MAX_BATCH_SIZE
709
+
710
+ ifdef GGML_CUDA_NO_PEER_COPY
711
+ MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY
712
+ endif # GGML_CUDA_NO_PEER_COPY
713
+
714
+ ifdef GGML_CUDA_CCBIN
715
+ MK_NVCCFLAGS += -ccbin $(GGML_CUDA_CCBIN)
716
+ endif # GGML_CUDA_CCBIN
717
+
718
+ ifdef GGML_CUDA_FA_ALL_QUANTS
719
+ MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
720
+ endif # GGML_CUDA_FA_ALL_QUANTS
721
+
722
+ ifdef JETSON_EOL_MODULE_DETECT
723
+ define NVCC_COMPILE
724
+ $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
725
+ endef # NVCC_COMPILE
726
+ else
727
+ ifdef GGML_MUSA
728
+ define NVCC_COMPILE
729
+ $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -c $< -o $@
730
+ endef # NVCC_COMPILE
731
+ else
732
+ define NVCC_COMPILE
733
+ $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
734
+ endef # NVCC_COMPILE
735
+ endif # GGML_MUSA
736
+ endif # JETSON_EOL_MODULE_DETECT
737
+
738
+ ggml/src/ggml-cuda/%.o: \
739
+ ggml/src/ggml-cuda/%.cu \
740
+ ggml/include/ggml.h \
741
+ ggml/src/ggml-common.h \
742
+ ggml/src/ggml-cuda/common.cuh
743
+ $(NVCC_COMPILE)
744
+
745
+ ggml/src/ggml-cuda.o: \
746
+ ggml/src/ggml-cuda.cu \
747
+ ggml/include/ggml-cuda.h \
748
+ ggml/include/ggml.h \
749
+ ggml/include/ggml-backend.h \
750
+ ggml/src/ggml-backend-impl.h \
751
+ ggml/src/ggml-common.h \
752
+ $(wildcard ggml/src/ggml-cuda/*.cuh)
753
+ $(NVCC_COMPILE)
754
+ endif # GGML_CUDA
755
+
756
+ ifdef GGML_VULKAN
757
+ MK_CPPFLAGS += -DGGML_USE_VULKAN
758
+ MK_LDFLAGS += $(shell pkg-config --libs vulkan)
759
+ OBJ_GGML += ggml/src/ggml-vulkan.o ggml/src/ggml-vulkan-shaders.o
760
+
761
+ ifdef GGML_VULKAN_CHECK_RESULTS
762
+ MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
763
+ endif
764
+
765
+ ifdef GGML_VULKAN_DEBUG
766
+ MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
767
+ endif
768
+
769
+ ifdef GGML_VULKAN_MEMORY_DEBUG
770
+ MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG
771
+ endif
772
+
773
+ ifdef GGML_VULKAN_PERF
774
+ MK_CPPFLAGS += -DGGML_VULKAN_PERF
775
+ endif
776
+
777
+ ifdef GGML_VULKAN_VALIDATE
778
+ MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
779
+ endif
780
+
781
+ ifdef GGML_VULKAN_RUN_TESTS
782
+ MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS
783
+ endif
784
+
785
+ GLSLC_CMD = glslc
786
+ _ggml_vk_genshaders_cmd = $(shell pwd)/vulkan-shaders-gen
787
+ _ggml_vk_header = ggml/src/ggml-vulkan-shaders.hpp
788
+ _ggml_vk_source = ggml/src/ggml-vulkan-shaders.cpp
789
+ _ggml_vk_input_dir = ggml/src/vulkan-shaders
790
+ _ggml_vk_shader_deps = $(echo $(_ggml_vk_input_dir)/*.comp)
791
+
792
+ ggml/src/ggml-vulkan.o: ggml/src/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(_ggml_vk_header) $(_ggml_vk_source)
793
+ $(CXX) $(CXXFLAGS) $(shell pkg-config --cflags vulkan) -c $< -o $@
794
+
795
+ $(_ggml_vk_header): $(_ggml_vk_source)
796
+
797
+ $(_ggml_vk_source): $(_ggml_vk_shader_deps) vulkan-shaders-gen
798
+ $(_ggml_vk_genshaders_cmd) \
799
+ --glslc $(GLSLC_CMD) \
800
+ --input-dir $(_ggml_vk_input_dir) \
801
+ --target-hpp $(_ggml_vk_header) \
802
+ --target-cpp $(_ggml_vk_source)
803
+
804
+ vulkan-shaders-gen: ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
805
+ $(CXX) $(CXXFLAGS) -o $@ $(LDFLAGS) ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
806
+
807
+ endif # GGML_VULKAN
808
+
809
+ ifdef GGML_HIPBLAS
810
+ ifeq ($(wildcard /opt/rocm),)
811
+ ROCM_PATH ?= /usr
812
+ AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
813
+ else
814
+ ROCM_PATH ?= /opt/rocm
815
+ AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
816
+ endif
817
+
818
+ GGML_CUDA_DMMV_X ?= 32
819
+ GGML_CUDA_MMV_Y ?= 1
820
+ GGML_CUDA_KQUANTS_ITER ?= 2
821
+
822
+ MK_CPPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA
823
+
824
+ ifdef GGML_HIP_UMA
825
+ MK_CPPFLAGS += -DGGML_HIP_UMA
826
+ endif # GGML_HIP_UMA
827
+
828
+ MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
829
+ MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
830
+ MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
831
+
832
+ HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
833
+
834
+ HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
835
+ HIPFLAGS += -DGGML_CUDA_DMMV_X=$(GGML_CUDA_DMMV_X)
836
+ HIPFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_MMV_Y)
837
+ HIPFLAGS += -DK_QUANTS_PER_ITERATION=$(GGML_CUDA_KQUANTS_ITER)
838
+
839
+ ifdef GGML_CUDA_FORCE_DMMV
840
+ HIPFLAGS += -DGGML_CUDA_FORCE_DMMV
841
+ endif # GGML_CUDA_FORCE_DMMV
842
+
843
+ ifdef GGML_CUDA_FORCE_MMQ
844
+ HIPFLAGS += -DGGML_CUDA_FORCE_MMQ
845
+ endif # GGML_CUDA_FORCE_MMQ
846
+
847
+ ifdef GGML_CUDA_FORCE_CUBLAS
848
+ HIPFLAGS += -DGGML_CUDA_FORCE_CUBLAS
849
+ endif # GGML_CUDA_FORCE_CUBLAS
850
+
851
+ ifdef GGML_CUDA_NO_PEER_COPY
852
+ HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
853
+ endif # GGML_CUDA_NO_PEER_COPY
854
+
855
+ OBJ_GGML += ggml/src/ggml-cuda.o
856
+ OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
857
+ OBJ_GGML += $(OBJ_CUDA_TMPL)
858
+
859
+ ggml/src/ggml-cuda.o: \
860
+ ggml/src/ggml-cuda.cu \
861
+ ggml/include/ggml-cuda.h \
862
+ ggml/include/ggml.h \
863
+ ggml/include/ggml-backend.h \
864
+ ggml/src/ggml-backend-impl.h \
865
+ ggml/src/ggml-common.h \
866
+ $(wildcard ggml/src/ggml-cuda/*.cuh)
867
+ $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
868
+
869
+ ggml/src/ggml-cuda/%.o: \
870
+ ggml/src/ggml-cuda/%.cu \
871
+ ggml/include/ggml.h \
872
+ ggml/src/ggml-common.h \
873
+ ggml/src/ggml-cuda/common.cuh
874
+ $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
875
+ endif # GGML_HIPBLAS
876
+
877
+ ifdef GGML_METAL
878
+ MK_CPPFLAGS += -DGGML_USE_METAL
879
+ MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
880
+ OBJ_GGML += ggml/src/ggml-metal.o
881
+ ifdef GGML_METAL_NDEBUG
882
+ MK_CPPFLAGS += -DGGML_METAL_NDEBUG
883
+ endif
884
+ ifdef GGML_METAL_EMBED_LIBRARY
885
+ MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
886
+ OBJ_GGML += ggml/src/ggml-metal-embed.o
887
+ endif
888
+ endif # GGML_METAL
889
+
890
+ ifdef GGML_METAL
891
+ ggml/src/ggml-metal.o: \
892
+ ggml/src/ggml-metal.m \
893
+ ggml/include/ggml-metal.h \
894
+ ggml/include/ggml.h
895
+ $(CC) $(CFLAGS) -c $< -o $@
896
+
897
+ ifdef GGML_METAL_EMBED_LIBRARY
898
+ ggml/src/ggml-metal-embed.o: \
899
+ ggml/src/ggml-metal.metal \
900
+ ggml/src/ggml-common.h
901
+ @echo "Embedding Metal library"
902
+ @sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal
903
+ $(eval TEMP_ASSEMBLY=$(shell mktemp -d))
904
+ @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
905
+ @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
906
+ @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
907
+ @echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
908
+ @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
909
+ @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
910
+ $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
911
+ @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
912
+ @rmdir ${TEMP_ASSEMBLY}
913
+ endif
914
+ endif # GGML_METAL
915
+
916
+ OBJ_GGML += \
917
+ ggml/src/ggml.o \
918
+ ggml/src/ggml-cpu.o \
919
+ ggml/src/ggml-alloc.o \
920
+ ggml/src/ggml-backend.o \
921
+ ggml/src/ggml-quants.o \
922
+ ggml/src/ggml-aarch64.o
923
+
924
+ OBJ_LLAMA = \
925
+ src/llama.o \
926
+ src/llama-vocab.o \
927
+ src/llama-grammar.o \
928
+ src/llama-sampling.o \
929
+ src/unicode.o \
930
+ src/unicode-data.o
931
+
932
+ OBJ_COMMON = \
933
+ common/common.o \
934
+ common/arg.o \
935
+ common/log.o \
936
+ common/console.o \
937
+ common/ngram-cache.o \
938
+ common/sampling.o \
939
+ common/build-info.o \
940
+ common/json-schema-to-grammar.o
941
+
942
+ OBJ_ALL = $(OBJ_GGML) $(OBJ_LLAMA) $(OBJ_COMMON)
943
+
944
+ LIB_GGML = $(LIB_PRE)ggml$(DSO_EXT)
945
+ LIB_GGML_S = $(LIB_PRE)ggml.a
946
+
947
+ LIB_LLAMA = $(LIB_PRE)llama$(DSO_EXT)
948
+ LIB_LLAMA_S = $(LIB_PRE)llama.a
949
+
950
+ LIB_COMMON = $(LIB_PRE)common$(DSO_EXT)
951
+ LIB_COMMON_S = $(LIB_PRE)common.a
952
+
953
+ LIB_ALL = $(LIB_GGML) $(LIB_LLAMA) $(LIB_COMMON)
954
+ LIB_ALL_S = $(LIB_GGML_S) $(LIB_LLAMA_S) $(LIB_COMMON_S)
955
+
956
+ GF_CC := $(CC)
957
+ include scripts/get-flags.mk
958
+
959
+ # combine build flags with cmdline overrides
960
+ override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS)
961
+ override CFLAGS := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS)
962
+ BASE_CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS)
963
+ override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS)
964
+ override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS)
965
+ override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)
966
+
967
+ # identify CUDA host compiler
968
+ ifdef GGML_CUDA
969
+ GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler
970
+ include scripts/get-flags.mk
971
+ CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic
972
+ endif
973
+
974
+ ifdef LLAMA_CURL
975
+ override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL
976
+ override LDFLAGS := $(LDFLAGS) -lcurl
977
+ endif
978
+
979
+ #
980
+ # Print build information
981
+ #
982
+
983
+ $(info I llama.cpp build info: )
984
+ $(info I UNAME_S: $(UNAME_S))
985
+ $(info I UNAME_P: $(UNAME_P))
986
+ $(info I UNAME_M: $(UNAME_M))
987
+ $(info I CFLAGS: $(CFLAGS))
988
+ $(info I CXXFLAGS: $(CXXFLAGS))
989
+ $(info I NVCCFLAGS: $(NVCCFLAGS))
990
+ $(info I LDFLAGS: $(LDFLAGS))
991
+ $(info I CC: $(shell $(CC) --version | head -n 1))
992
+ $(info I CXX: $(shell $(CXX) --version | head -n 1))
993
+ ifdef GGML_CUDA
994
+ $(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
995
+ CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
996
+ ifndef GGML_MUSA
997
+ ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
998
+
999
+ ifndef CUDA_DOCKER_ARCH
1000
+ ifndef CUDA_POWER_ARCH
1001
+ $(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus )
1002
+ endif # CUDA_POWER_ARCH
1003
+ endif # CUDA_DOCKER_ARCH
1004
+
1005
+ endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
1006
+ endif # GGML_MUSA
1007
+ endif # GGML_CUDA
1008
+ $(info )
1009
+
1010
+ ifdef DEPRECATE_WARNING
1011
+ $(info !!! DEPRECATION WARNING !!!)
1012
+ $(info The following LLAMA_ options are deprecated and will be removed in the future. Use the GGML_ prefix instead)
1013
+ $(info - LLAMA_CUDA)
1014
+ $(info - LLAMA_METAL)
1015
+ $(info - LLAMA_METAL_EMBED_LIBRARY)
1016
+ $(info - LLAMA_OPENMP)
1017
+ $(info - LLAMA_RPC)
1018
+ $(info - LLAMA_SYCL)
1019
+ $(info - LLAMA_SYCL_F16)
1020
+ $(info - LLAMA_OPENBLAS)
1021
+ $(info - LLAMA_OPENBLAS64)
1022
+ $(info - LLAMA_BLIS)
1023
+ $(info - LLAMA_NO_LLAMAFILE)
1024
+ $(info - LLAMA_NO_ACCELERATE)
1025
+ $(info - LLAMA_NO_OPENMP)
1026
+ $(info - LLAMA_NO_METAL)
1027
+ $(info - LLAMA_NO_CCACHE)
1028
+ $(info )
1029
+ endif
1030
+
1031
+ ifdef REMOVE_WARNING
1032
+ $(info !!! REMOVAL WARNING !!!)
1033
+ $(info The following LLAMA_ options have been removed and are no longer supported)
1034
+ $(info - LLAMA_DISABLE_LOGS (https://github.com/ggerganov/llama.cpp/pull/9418))
1035
+ $(info - LLAMA_SERVER_VERBOSE (https://github.com/ggerganov/llama.cpp/pull/9418))
1036
+ $(info )
1037
+ endif
1038
+
1039
+ #
1040
+ # Build libraries
1041
+ #
1042
+
1043
+ # ggml
1044
+
1045
+ ggml/src/ggml.o: \
1046
+ ggml/src/ggml.c \
1047
+ ggml/include/ggml.h
1048
+ $(CC) $(CFLAGS) -c $< -o $@
1049
+
1050
+ ggml/src/ggml-cpu.o: \
1051
+ ggml/src/ggml-cpu.c \
1052
+ ggml/include/ggml.h \
1053
+ ggml/src/ggml-common.h
1054
+ $(CC) $(CFLAGS) -c $< -o $@
1055
+
1056
+ ggml/src/ggml-alloc.o: \
1057
+ ggml/src/ggml-alloc.c \
1058
+ ggml/include/ggml.h \
1059
+ ggml/include/ggml-alloc.h
1060
+ $(CC) $(CFLAGS) -c $< -o $@
1061
+
1062
+ ggml/src/ggml-backend.o: \
1063
+ ggml/src/ggml-backend.cpp \
1064
+ ggml/src/ggml-backend-impl.h \
1065
+ ggml/include/ggml.h \
1066
+ ggml/include/ggml-backend.h
1067
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1068
+
1069
+ ggml/src/ggml-quants.o: \
1070
+ ggml/src/ggml-quants.c \
1071
+ ggml/include/ggml.h \
1072
+ ggml/src/ggml-quants.h \
1073
+ ggml/src/ggml-common.h
1074
+ $(CC) $(CFLAGS) -c $< -o $@
1075
+
1076
+ ggml/src/ggml-aarch64.o: \
1077
+ ggml/src/ggml-aarch64.c \
1078
+ ggml/include/ggml.h \
1079
+ ggml/src/ggml-aarch64.h \
1080
+ ggml/src/ggml-common.h
1081
+ $(CC) $(CFLAGS) -c $< -o $@
1082
+
1083
+ ggml/src/ggml-blas.o: \
1084
+ ggml/src/ggml-blas.cpp \
1085
+ ggml/include/ggml-blas.h
1086
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1087
+
1088
+ ifndef GGML_NO_LLAMAFILE
1089
+ ggml/src/llamafile/sgemm.o: \
1090
+ ggml/src/llamafile/sgemm.cpp \
1091
+ ggml/src/llamafile/sgemm.h \
1092
+ ggml/include/ggml.h
1093
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1094
+ endif # GGML_NO_LLAMAFILE
1095
+
1096
+ ifndef GGML_NO_AMX
1097
+ ggml/src/ggml-amx.o: \
1098
+ ggml/src/ggml-amx.cpp \
1099
+ ggml/include/ggml-amx.h
1100
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1101
+
1102
+ ggml/src/ggml-amx/mmq.o: \
1103
+ ggml/src/ggml-amx/mmq.cpp \
1104
+ ggml/src/ggml-amx/mmq.h \
1105
+ ggml/include/ggml.h
1106
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1107
+ endif
1108
+
1109
+ ifdef GGML_RPC
1110
+ ggml/src/ggml-rpc.o: \
1111
+ ggml/src/ggml-rpc.cpp \
1112
+ ggml/include/ggml-rpc.h
1113
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1114
+ endif # GGML_RPC
1115
+
1116
+ $(LIB_GGML): \
1117
+ $(OBJ_GGML)
1118
+ $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
1119
+
1120
+ $(LIB_GGML_S): \
1121
+ $(OBJ_GGML)
1122
+ ar rcs $(LIB_GGML_S) $^
1123
+
1124
+ # llama
1125
+
1126
+ src/unicode.o: \
1127
+ src/unicode.cpp \
1128
+ src/unicode.h
1129
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1130
+
1131
+ src/unicode-data.o: \
1132
+ src/unicode-data.cpp \
1133
+ src/unicode-data.h
1134
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1135
+
1136
+ src/llama.o: \
1137
+ src/llama.cpp \
1138
+ src/llama-impl.h \
1139
+ src/llama-vocab.h \
1140
+ src/llama-grammar.h \
1141
+ src/llama-sampling.h \
1142
+ src/unicode.h \
1143
+ include/llama.h \
1144
+ ggml/include/ggml-cuda.h \
1145
+ ggml/include/ggml-metal.h \
1146
+ ggml/include/ggml.h \
1147
+ ggml/include/ggml-alloc.h \
1148
+ ggml/include/ggml-backend.h
1149
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1150
+
1151
+ src/llama-vocab.o: \
1152
+ src/llama-vocab.cpp \
1153
+ src/llama-vocab.h \
1154
+ src/llama-impl.h \
1155
+ include/llama.h
1156
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1157
+
1158
+ src/llama-grammar.o: \
1159
+ src/llama-grammar.cpp \
1160
+ src/llama-grammar.h \
1161
+ src/llama-impl.h \
1162
+ src/llama-vocab.h \
1163
+ src/llama-sampling.h \
1164
+ include/llama.h
1165
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1166
+
1167
+ src/llama-sampling.o: \
1168
+ src/llama-sampling.cpp \
1169
+ src/llama-sampling.h \
1170
+ src/llama-impl.h \
1171
+ include/llama.h
1172
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1173
+
1174
+ $(LIB_LLAMA): \
1175
+ $(OBJ_LLAMA) \
1176
+ $(LIB_GGML)
1177
+ $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
1178
+
1179
+ $(LIB_LLAMA_S): \
1180
+ $(OBJ_LLAMA)
1181
+ ar rcs $(LIB_LLAMA_S) $^
1182
+
1183
+ # common
1184
+
1185
+ common/common.o: \
1186
+ common/common.cpp \
1187
+ common/common.h \
1188
+ common/console.h \
1189
+ common/sampling.h \
1190
+ common/json.hpp \
1191
+ common/json-schema-to-grammar.h \
1192
+ include/llama.h
1193
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1194
+
1195
+ common/arg.o: \
1196
+ common/arg.cpp \
1197
+ common/arg.h
1198
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1199
+
1200
+ common/log.o: \
1201
+ common/log.cpp \
1202
+ common/log.h
1203
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1204
+
1205
+ common/sampling.o: \
1206
+ common/sampling.cpp \
1207
+ common/sampling.h \
1208
+ include/llama.h
1209
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1210
+
1211
+ common/console.o: \
1212
+ common/console.cpp \
1213
+ common/console.h
1214
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1215
+
1216
+ common/json-schema-to-grammar.o: \
1217
+ common/json-schema-to-grammar.cpp \
1218
+ common/json-schema-to-grammar.h
1219
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1220
+
1221
+ common/ngram-cache.o: \
1222
+ common/ngram-cache.cpp \
1223
+ common/ngram-cache.h
1224
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1225
+
1226
+ $(LIB_COMMON): \
1227
+ $(OBJ_COMMON) \
1228
+ $(LIB_LLAMA) \
1229
+ $(LIB_GGML)
1230
+ $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
1231
+
1232
+ $(LIB_COMMON_S): \
1233
+ $(OBJ_COMMON)
1234
+ ar rcs $(LIB_COMMON_S) $^
1235
+
1236
+ clean:
1237
+ rm -vrf *.dot $(BUILD_TARGETS) $(TEST_TARGETS)
1238
+ rm -rvf src/*.o
1239
+ rm -rvf tests/*.o
1240
+ rm -rvf examples/*.o
1241
+ rm -rvf common/*.o
1242
+ rm -rvf *.a
1243
+ rm -rvf *.dll
1244
+ rm -rvf *.so
1245
+ rm -rvf *.dot
1246
+ rm -rvf ggml/*.a
1247
+ rm -rvf ggml/*.dll
1248
+ rm -rvf ggml/*.so
1249
+ rm -vrf ggml/src/*.o
1250
+ rm -rvf ggml/src/llamafile/*.o
1251
+ rm -rvf common/build-info.cpp
1252
+ rm -vrf ggml/src/ggml-metal-embed.metal
1253
+ rm -vrf ggml/src/ggml-cuda/*.o
1254
+ rm -vrf ggml/src/ggml-cuda/template-instances/*.o
1255
+ rm -vrf ggml/src/ggml-amx/*.o
1256
+ rm -rvf $(BUILD_TARGETS)
1257
+ rm -rvf $(TEST_TARGETS)
1258
+ rm -f vulkan-shaders-gen ggml/src/ggml-vulkan-shaders.hpp ggml/src/ggml-vulkan-shaders.cpp
1259
+ rm -rvf $(LEGACY_TARGETS_CLEAN)
1260
+ find examples pocs -type f -name "*.o" -delete
1261
+
1262
+ #
1263
+ # Examples
1264
+ #
1265
+
1266
+ # $< is the first prerequisite, i.e. the source file.
1267
+ # Explicitly compile this to an object file so that it can be cached with ccache.
1268
+ # The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead.
1269
+
1270
+ # Helper function that replaces .c, .cpp, and .cu file endings with .o:
1271
+ GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
1272
+
1273
+ llama-cli: examples/main/main.cpp \
1274
+ $(OBJ_ALL)
1275
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1276
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1277
+ @echo
1278
+ @echo '==== Run ./llama-cli -h for help. ===='
1279
+ @echo
1280
+
1281
+ llama-infill: examples/infill/infill.cpp \
1282
+ $(OBJ_ALL)
1283
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1284
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1285
+
1286
+ llama-simple: examples/simple/simple.cpp \
1287
+ $(OBJ_ALL)
1288
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1289
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1290
+
1291
+ llama-simple-chat: examples/simple-chat/simple-chat.cpp \
1292
+ $(OBJ_ALL)
1293
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1294
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1295
+
1296
+ llama-tokenize: examples/tokenize/tokenize.cpp \
1297
+ $(OBJ_ALL)
1298
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1299
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1300
+
1301
+ llama-batched: examples/batched/batched.cpp \
1302
+ $(OBJ_ALL)
1303
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1304
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1305
+
1306
+ llama-batched-bench: examples/batched-bench/batched-bench.cpp \
1307
+ $(OBJ_ALL)
1308
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1309
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1310
+
1311
+ llama-quantize: examples/quantize/quantize.cpp \
1312
+ $(OBJ_ALL)
1313
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1314
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1315
+
1316
+ llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp \
1317
+ $(OBJ_ALL)
1318
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1319
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1320
+
1321
+ llama-perplexity: examples/perplexity/perplexity.cpp \
1322
+ $(OBJ_ALL)
1323
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1324
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1325
+
1326
+ llama-imatrix: examples/imatrix/imatrix.cpp \
1327
+ $(OBJ_ALL)
1328
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1329
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1330
+
1331
+ llama-embedding: examples/embedding/embedding.cpp \
1332
+ $(OBJ_ALL)
1333
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1334
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1335
+
1336
+ llama-gritlm: examples/gritlm/gritlm.cpp \
1337
+ $(OBJ_ALL)
1338
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1339
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1340
+
1341
+ llama-save-load-state: examples/save-load-state/save-load-state.cpp \
1342
+ $(OBJ_ALL)
1343
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1344
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1345
+
1346
+ llama-gguf: examples/gguf/gguf.cpp \
1347
+ $(OBJ_GGML)
1348
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1349
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1350
+
1351
+ examples/gguf-hash/deps/sha1/sha1.o: \
1352
+ examples/gguf-hash/deps/sha1/sha1.c
1353
+ $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
1354
+
1355
+ examples/gguf-hash/deps/xxhash/xxhash.o: \
1356
+ examples/gguf-hash/deps/xxhash/xxhash.c
1357
+ $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
1358
+
1359
+ examples/gguf-hash/deps/sha256/sha256.o: \
1360
+ examples/gguf-hash/deps/sha256/sha256.c
1361
+ $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
1362
+
1363
+ llama-gguf-hash: examples/gguf-hash/gguf-hash.cpp examples/gguf-hash/deps/sha1/sha1.o examples/gguf-hash/deps/xxhash/xxhash.o examples/gguf-hash/deps/sha256/sha256.o\
1364
+ $(OBJ_ALL)
1365
+ $(CXX) $(CXXFLAGS) -Iexamples/gguf-hash/deps -c $< -o $(call GET_OBJ_FILE, $<)
1366
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1367
+
1368
+ llama-gguf-split: examples/gguf-split/gguf-split.cpp \
1369
+ $(OBJ_ALL)
1370
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1371
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1372
+
1373
+ llama-eval-callback: examples/eval-callback/eval-callback.cpp \
1374
+ $(OBJ_ALL)
1375
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1376
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1377
+
1378
+ llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp \
1379
+ $(OBJ_ALL)
1380
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1381
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1382
+
1383
+ llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp \
1384
+ $(OBJ_ALL)
1385
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1386
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1387
+
1388
+ llama-bench: examples/llama-bench/llama-bench.cpp \
1389
+ $(OBJ_ALL)
1390
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1391
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1392
+
1393
+ llama-export-lora: examples/export-lora/export-lora.cpp \
1394
+ $(OBJ_ALL)
1395
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1396
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1397
+
1398
+ llama-retrieval: examples/retrieval/retrieval.cpp \
1399
+ $(OBJ_ALL)
1400
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1401
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1402
+
1403
+ llama-speculative: examples/speculative/speculative.cpp \
1404
+ $(OBJ_ALL)
1405
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1406
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1407
+
1408
+ llama-parallel: examples/parallel/parallel.cpp \
1409
+ $(OBJ_ALL)
1410
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1411
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1412
+
1413
+ llama-lookahead: examples/lookahead/lookahead.cpp \
1414
+ $(OBJ_ALL)
1415
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1416
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1417
+
1418
+ llama-lookup: examples/lookup/lookup.cpp \
1419
+ $(OBJ_ALL)
1420
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1421
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1422
+
1423
+ llama-lookup-create: examples/lookup/lookup-create.cpp \
1424
+ $(OBJ_ALL)
1425
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1426
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1427
+
1428
+ llama-lookup-merge: examples/lookup/lookup-merge.cpp \
1429
+ $(OBJ_ALL)
1430
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1431
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1432
+
1433
+ llama-lookup-stats: examples/lookup/lookup-stats.cpp \
1434
+ $(OBJ_ALL)
1435
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1436
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1437
+
1438
+ llama-passkey: examples/passkey/passkey.cpp \
1439
+ $(OBJ_ALL)
1440
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1441
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1442
+
1443
+ llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp \
1444
+ $(OBJ_ALL)
1445
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1446
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1447
+
1448
+ ifdef GGML_RPC
1449
+ rpc-server: examples/rpc/rpc-server.cpp \
1450
+ $(OBJ_GGML)
1451
+ $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
1452
+ endif # GGML_RPC
1453
+
1454
+ llama-server: \
1455
+ examples/server/server.cpp \
1456
+ examples/server/utils.hpp \
1457
+ examples/server/httplib.h \
1458
+ examples/server/colorthemes.css.hpp \
1459
+ examples/server/style.css.hpp \
1460
+ examples/server/theme-beeninorder.css.hpp \
1461
+ examples/server/theme-ketivah.css.hpp \
1462
+ examples/server/theme-mangotango.css.hpp \
1463
+ examples/server/theme-playground.css.hpp \
1464
+ examples/server/theme-polarnight.css.hpp \
1465
+ examples/server/theme-snowstorm.css.hpp \
1466
+ examples/server/index.html.hpp \
1467
+ examples/server/index-new.html.hpp \
1468
+ examples/server/index.js.hpp \
1469
+ examples/server/completion.js.hpp \
1470
+ examples/server/system-prompts.js.hpp \
1471
+ examples/server/prompt-formats.js.hpp \
1472
+ examples/server/json-schema-to-grammar.mjs.hpp \
1473
+ examples/server/loading.html.hpp \
1474
+ common/json.hpp \
1475
+ common/stb_image.h \
1476
+ $(OBJ_ALL)
1477
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1478
+ $(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
1479
+
1480
+ # Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`:
1481
+ examples/server/%.hpp: examples/server/public/% Makefile
1482
+ @( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \
1483
+ echo "unsigned char $${NAME}[] = {" && \
1484
+ cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \
1485
+ echo "};" && \
1486
+ echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
1487
+ ) > $@
1488
+
1489
+ llama-gen-docs: examples/gen-docs/gen-docs.cpp \
1490
+ $(OBJ_ALL)
1491
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1492
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1493
+
1494
+ libllava.a: examples/llava/llava.cpp \
1495
+ examples/llava/llava.h \
1496
+ examples/llava/clip.cpp \
1497
+ examples/llava/clip.h \
1498
+ common/stb_image.h \
1499
+ common/base64.hpp \
1500
+ $(OBJ_ALL)
1501
+ $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
1502
+
1503
+ llama-llava-cli: examples/llava/llava-cli.cpp \
1504
+ examples/llava/llava.cpp \
1505
+ examples/llava/llava.h \
1506
+ examples/llava/clip.cpp \
1507
+ examples/llava/clip.h \
1508
+ $(OBJ_ALL)
1509
+ $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
1510
+
1511
+ llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \
1512
+ examples/llava/llava.cpp \
1513
+ examples/llava/llava.h \
1514
+ examples/llava/clip.cpp \
1515
+ examples/llava/clip.h \
1516
+ $(OBJ_ALL)
1517
+ $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
1518
+
1519
+ ifeq ($(UNAME_S),Darwin)
1520
+ swift: examples/batched.swift
1521
+ (cd examples/batched.swift; make build)
1522
+ endif
1523
+
1524
+ common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh
1525
+ @sh scripts/build-info.sh "$(CC)" > $@.tmp
1526
+ @if ! cmp -s $@.tmp $@; then \
1527
+ mv $@.tmp $@; \
1528
+ else \
1529
+ rm $@.tmp; \
1530
+ fi
1531
+
1532
+ common/build-info.o: common/build-info.cpp
1533
+ $(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@
1534
+
1535
+ #
1536
+ # Tests
1537
+ #
1538
+
1539
+ tests: $(TEST_TARGETS)
1540
+
1541
+ tests/test-arg-parser: tests/test-arg-parser.cpp \
1542
+ $(OBJ_ALL)
1543
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1544
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1545
+
1546
+ tests/test-llama-grammar: tests/test-llama-grammar.cpp \
1547
+ $(OBJ_ALL)
1548
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1549
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1550
+
1551
+ tests/test-log: tests/test-log.cpp \
1552
+ $(OBJ_ALL)
1553
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1554
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1555
+
1556
+ tests/test-grammar-parser: tests/test-grammar-parser.cpp \
1557
+ $(OBJ_ALL)
1558
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1559
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1560
+
1561
+ tests/test-grammar-integration: tests/test-grammar-integration.cpp \
1562
+ $(OBJ_ALL)
1563
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1564
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1565
+
1566
+ tests/test-double-float: tests/test-double-float.cpp
1567
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1568
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1569
+
1570
+ tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp \
1571
+ $(OBJ_ALL)
1572
+ $(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
1573
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1574
+
1575
+ tests/test-grad0: tests/test-grad0.cpp \
1576
+ $(OBJ_GGML)
1577
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1578
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1579
+
1580
+ tests/test-opt: tests/test-opt.cpp \
1581
+ $(OBJ_GGML)
1582
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1583
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1584
+
1585
+ tests/test-quantize-fns: tests/test-quantize-fns.cpp \
1586
+ $(OBJ_GGML)
1587
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1588
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1589
+
1590
+ tests/test-quantize-perf: tests/test-quantize-perf.cpp \
1591
+ $(OBJ_GGML)
1592
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1593
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1594
+
1595
+ tests/test-sampling: tests/test-sampling.cpp \
1596
+ $(OBJ_ALL)
1597
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1598
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1599
+
1600
+ tests/test-tokenizer-0: tests/test-tokenizer-0.cpp \
1601
+ $(OBJ_ALL)
1602
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1603
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1604
+
1605
+ tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp \
1606
+ $(OBJ_ALL)
1607
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1608
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1609
+
1610
+ tests/test-tokenizer-1-spm: tests/test-tokenizer-1-spm.cpp \
1611
+ $(OBJ_ALL)
1612
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1613
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1614
+
1615
+ tests/test-rope: tests/test-rope.cpp ggml/src/ggml.o \
1616
+ $(OBJ_GGML)
1617
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1618
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1619
+
1620
+ tests/test-c.o: tests/test-c.c include/llama.h
1621
+ $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
1622
+
1623
+ tests/test-backend-ops: tests/test-backend-ops.cpp \
1624
+ $(OBJ_GGML)
1625
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1626
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1627
+
1628
+ tests/test-model-load-cancel: tests/test-model-load-cancel.cpp tests/get-model.cpp \
1629
+ $(OBJ_ALL)
1630
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1631
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1632
+
1633
+ tests/test-autorelease: tests/test-autorelease.cpp tests/get-model.cpp \
1634
+ $(OBJ_ALL)
1635
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1636
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1637
+
1638
+ tests/test-chat-template: tests/test-chat-template.cpp \
1639
+ $(OBJ_ALL)
1640
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1641
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1642
+
1643
+ #
1644
+ # PoCs
1645
+ #
1646
+
1647
+ llama-vdot: pocs/vdot/vdot.cpp ggml/src/ggml.o \
1648
+ $(OBJ_GGML)
1649
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1650
+ $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1651
+
1652
+ llama-q8dot: pocs/vdot/q8dot.cpp ggml/src/ggml.o \
1653
+ $(OBJ_GGML)
1654
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1655
+ $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1656
+
1657
+ #
1658
+ # Deprecated binaries that we want to keep around long enough for people to migrate to the new filenames, then these can be removed.
1659
+ #
1660
+ # Mark legacy binary targets as .PHONY so that they are always checked.
1661
+ .PHONY: main quantize perplexity embedding server
1662
+
1663
+ # Define the object file target
1664
+ examples/deprecation-warning/deprecation-warning.o: examples/deprecation-warning/deprecation-warning.cpp
1665
+ $(CXX) $(CXXFLAGS) -c $< -o $@
1666
+
1667
+ # NOTE: We currently will always build the deprecation-warning `main` and `server` binaries to help users migrate.
1668
+ # Eventually we will want to remove these target from building all the time.
1669
+ main: examples/deprecation-warning/deprecation-warning.o
1670
+ $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
1671
+ @echo "NOTICE: The 'main' binary is deprecated. Please use 'llama-cli' instead."
1672
+
1673
+ server: examples/deprecation-warning/deprecation-warning.o
1674
+ $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
1675
+ @echo "NOTICE: The 'server' binary is deprecated. Please use 'llama-server' instead."
1676
+
1677
+ quantize: examples/deprecation-warning/deprecation-warning.o
1678
+ ifneq (,$(wildcard quantize))
1679
+ $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
1680
+ @echo "#########"
1681
+ @echo "WARNING: The 'quantize' binary is deprecated. Please use 'llama-quantize' instead."
1682
+ @echo " Remove the 'quantize' binary to remove this warning."
1683
+ @echo "#########"
1684
+ endif
1685
+
1686
+ perplexity: examples/deprecation-warning/deprecation-warning.o
1687
+ ifneq (,$(wildcard perplexity))
1688
+ $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
1689
+ @echo "#########"
1690
+ @echo "WARNING: The 'perplexity' binary is deprecated. Please use 'llama-perplexity' instead."
1691
+ @echo " Remove the 'perplexity' binary to remove this warning."
1692
+ @echo "#########"
1693
+ endif
1694
+
1695
+ embedding: examples/deprecation-warning/deprecation-warning.o
1696
+ ifneq (,$(wildcard embedding))
1697
+ $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
1698
+ @echo "#########"
1699
+ @echo "WARNING: The 'embedding' binary is deprecated. Please use 'llama-embedding' instead."
1700
+ @echo " Remove the 'embedding' binary to remove this warning."
1701
+ @echo "#########"
1702
+ endif
Package.swift ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // swift-tools-version:5.5
2
+
3
+ import PackageDescription
4
+
5
+ var sources = [
6
+ "src/llama.cpp",
7
+ "src/llama-vocab.cpp",
8
+ "src/llama-grammar.cpp",
9
+ "src/llama-sampling.cpp",
10
+ "src/unicode.cpp",
11
+ "src/unicode-data.cpp",
12
+ "ggml/src/ggml.c",
13
+ "ggml/src/ggml-cpu.c",
14
+ "ggml/src/ggml-alloc.c",
15
+ "ggml/src/ggml-backend.cpp",
16
+ "ggml/src/ggml-quants.c",
17
+ "ggml/src/ggml-aarch64.c",
18
+ ]
19
+
20
+ var resources: [Resource] = []
21
+ var linkerSettings: [LinkerSetting] = []
22
+ var cSettings: [CSetting] = [
23
+ .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
24
+ .unsafeFlags(["-fno-objc-arc"]),
25
+ // NOTE: NEW_LAPACK will required iOS version 16.4+
26
+ // We should consider add this in the future when we drop support for iOS 14
27
+ // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
28
+ // .define("ACCELERATE_NEW_LAPACK"),
29
+ // .define("ACCELERATE_LAPACK_ILP64")
30
+ ]
31
+
32
+ #if canImport(Darwin)
33
+ sources.append("ggml/src/ggml-metal.m")
34
+ resources.append(.process("ggml/src/ggml-metal.metal"))
35
+ linkerSettings.append(.linkedFramework("Accelerate"))
36
+ cSettings.append(
37
+ contentsOf: [
38
+ .define("GGML_USE_ACCELERATE"),
39
+ .define("GGML_USE_METAL")
40
+ ]
41
+ )
42
+ #endif
43
+
44
+ #if os(Linux)
45
+ cSettings.append(.define("_GNU_SOURCE"))
46
+ #endif
47
+
48
+ let package = Package(
49
+ name: "llama",
50
+ platforms: [
51
+ .macOS(.v12),
52
+ .iOS(.v14),
53
+ .watchOS(.v4),
54
+ .tvOS(.v14)
55
+ ],
56
+ products: [
57
+ .library(name: "llama", targets: ["llama"]),
58
+ ],
59
+ targets: [
60
+ .target(
61
+ name: "llama",
62
+ path: ".",
63
+ exclude: [
64
+ "cmake",
65
+ "examples",
66
+ "scripts",
67
+ "models",
68
+ "tests",
69
+ "CMakeLists.txt",
70
+ "Makefile"
71
+ ],
72
+ sources: sources,
73
+ resources: resources,
74
+ publicHeadersPath: "spm-headers",
75
+ cSettings: cSettings,
76
+ linkerSettings: linkerSettings
77
+ )
78
+ ],
79
+ cxxLanguageStandard: .cxx11
80
+ )
SECURITY.md ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Security Policy
2
+
3
+ - [**Using llama.cpp securely**](#using-llamacpp-securely)
4
+ - [Untrusted models](#untrusted-models)
5
+ - [Untrusted inputs](#untrusted-inputs)
6
+ - [Data privacy](#data-privacy)
7
+ - [Untrusted environments or networks](#untrusted-environments-or-networks)
8
+ - [Multi-Tenant environments](#multi-tenant-environments)
9
+ - [**Reporting a vulnerability**](#reporting-a-vulnerability)
10
+
11
+ ## Using llama.cpp securely
12
+
13
+ ### Untrusted models
14
+ Be careful when running untrusted models. This classification includes models created by unknown developers or utilizing data obtained from unknown sources.
15
+
16
+ *Always execute untrusted models within a secure, isolated environment such as a sandbox* (e.g., containers, virtual machines). This helps protect your system from potentially malicious code.
17
+
18
+ > [!NOTE]
19
+ > The trustworthiness of a model is not binary. You must always determine the proper level of caution depending on the specific model and how it matches your use case and risk tolerance.
20
+
21
+ ### Untrusted inputs
22
+
23
+ Some models accept various input formats (text, images, audio, etc.). The libraries converting these inputs have varying security levels, so it's crucial to isolate the model and carefully pre-process inputs to mitigate script injection risks.
24
+
25
+ For maximum security when handling untrusted inputs, you may need to employ the following:
26
+
27
+ * Sandboxing: Isolate the environment where the inference happens.
28
+ * Pre-analysis: Check how the model performs by default when exposed to prompt injection (e.g. using [fuzzing for prompt injection](https://github.com/FonduAI/awesome-prompt-injection?tab=readme-ov-file#tools)). This will give you leads on how hard you will have to work on the next topics.
29
+ * Updates: Keep both LLaMA C++ and your libraries updated with the latest security patches.
30
+ * Input Sanitation: Before feeding data to the model, sanitize inputs rigorously. This involves techniques such as:
31
+ * Validation: Enforce strict rules on allowed characters and data types.
32
+ * Filtering: Remove potentially malicious scripts or code fragments.
33
+ * Encoding: Convert special characters into safe representations.
34
+ * Verification: Run tooling that identifies potential script injections (e.g. [models that detect prompt injection attempts](https://python.langchain.com/docs/guides/safety/hugging_face_prompt_injection)).
35
+
36
+ ### Data privacy
37
+
38
+ To protect sensitive data from potential leaks or unauthorized access, it is crucial to sandbox the model execution. This means running the model in a secure, isolated environment, which helps mitigate many attack vectors.
39
+
40
+ ### Untrusted environments or networks
41
+
42
+ If you can't run your models in a secure and isolated environment or if it must be exposed to an untrusted network, make sure to take the following security precautions:
43
+ * Confirm the hash of any downloaded artifact (e.g. pre-trained model weights) matches a known-good value
44
+ * Encrypt your data if sending it over the network.
45
+
46
+ ### Multi-Tenant environments
47
+
48
+ If you intend to run multiple models in parallel with shared memory, it is your responsibility to ensure the models do not interact or access each other's data. The primary areas of concern are tenant isolation, resource allocation, model sharing and hardware attacks.
49
+
50
+ 1. Tenant Isolation: Models should run separately with strong isolation methods to prevent unwanted data access. Separating networks is crucial for isolation, as it prevents unauthorized access to data or models and malicious users from sending graphs to execute under another tenant's identity.
51
+
52
+ 2. Resource Allocation: A denial of service caused by one model can impact the overall system health. Implement safeguards like rate limits, access controls, and health monitoring.
53
+
54
+ 3. Model Sharing: In a multitenant model sharing design, tenants and users must understand the security risks of running code provided by others. Since there are no reliable methods to detect malicious models, sandboxing the model execution is the recommended approach to mitigate the risk.
55
+
56
+ 4. Hardware Attacks: GPUs or TPUs can also be attacked. [Researches](https://scholar.google.com/scholar?q=gpu+side+channel) has shown that side channel attacks on GPUs are possible, which can make data leak from other models or processes running on the same system at the same time.
57
+
58
+ ## Reporting a vulnerability
59
+
60
+ Beware that none of the topics under [Using llama.cpp securely](#using-llamacpp-securely) are considered vulnerabilities of LLaMA C++.
61
+
62
+ <!-- normal version -->
63
+ However, If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released.
64
+
65
+ Please disclose it as a private [security advisory](https://github.com/ggerganov/llama.cpp/security/advisories/new).
66
+
67
+ A team of volunteers on a reasonable-effort basis maintains this project. As such, please give us at least 90 days to work on a fix before public exposure.
ci/README.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CI
2
+
3
+ In addition to [Github Actions](https://github.com/ggerganov/llama.cpp/actions) `llama.cpp` uses a custom CI framework:
4
+
5
+ https://github.com/ggml-org/ci
6
+
7
+ It monitors the `master` branch for new commits and runs the
8
+ [ci/run.sh](https://github.com/ggerganov/llama.cpp/blob/master/ci/run.sh) script on dedicated cloud instances. This allows us
9
+ to execute heavier workloads compared to just using Github Actions. Also with time, the cloud instances will be scaled
10
+ to cover various hardware architectures, including GPU and Apple Silicon instances.
11
+
12
+ Collaborators can optionally trigger the CI run by adding the `ggml-ci` keyword to their commit message.
13
+ Only the branches of this repo are monitored for this keyword.
14
+
15
+ It is a good practice, before publishing changes to execute the full CI locally on your machine:
16
+
17
+ ```bash
18
+ mkdir tmp
19
+
20
+ # CPU-only build
21
+ bash ./ci/run.sh ./tmp/results ./tmp/mnt
22
+
23
+ # with CUDA support
24
+ GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
25
+
26
+ # with SYCL support
27
+ source /opt/intel/oneapi/setvars.sh
28
+ GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
29
+ ```
ci/run.sh ADDED
@@ -0,0 +1,851 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #
3
+ # sample usage:
4
+ #
5
+ # mkdir tmp
6
+ #
7
+ # # CPU-only build
8
+ # bash ./ci/run.sh ./tmp/results ./tmp/mnt
9
+ #
10
+ # # with CUDA support
11
+ # GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
12
+ #
13
+ # # with SYCL support
14
+ # GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
15
+ #
16
+ # # with VULKAN support
17
+ # GG_BUILD_VULKAN=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
18
+ #
19
+
20
+ if [ -z "$2" ]; then
21
+ echo "usage: $0 <output-dir> <mnt-dir>"
22
+ exit 1
23
+ fi
24
+
25
+ mkdir -p "$1"
26
+ mkdir -p "$2"
27
+
28
+ OUT=$(realpath "$1")
29
+ MNT=$(realpath "$2")
30
+
31
+ rm -f "$OUT/*.log"
32
+ rm -f "$OUT/*.exit"
33
+ rm -f "$OUT/*.md"
34
+
35
+ sd=`dirname $0`
36
+ cd $sd/../
37
+ SRC=`pwd`
38
+
39
+ CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON"
40
+
41
+ if [ ! -z ${GG_BUILD_METAL} ]; then
42
+ CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON"
43
+ fi
44
+
45
+ if [ ! -z ${GG_BUILD_CUDA} ]; then
46
+ CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native"
47
+ fi
48
+
49
+ if [ ! -z ${GG_BUILD_SYCL} ]; then
50
+ if [ -z ${ONEAPI_ROOT} ]; then
51
+ echo "Not detected ONEAPI_ROOT, please install oneAPI base toolkit and enable it by:"
52
+ echo "source /opt/intel/oneapi/setvars.sh"
53
+ exit 1
54
+ fi
55
+
56
+ CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
57
+ fi
58
+
59
+ if [ ! -z ${GG_BUILD_VULKAN} ]; then
60
+ CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1"
61
+ fi
62
+ ## helpers
63
+
64
+ # download a file if it does not exist or if it is outdated
65
+ function gg_wget {
66
+ local out=$1
67
+ local url=$2
68
+
69
+ local cwd=`pwd`
70
+
71
+ mkdir -p $out
72
+ cd $out
73
+
74
+ # should not re-download if file is the same
75
+ wget -nv -N $url
76
+
77
+ cd $cwd
78
+ }
79
+
80
+ function gg_printf {
81
+ printf -- "$@" >> $OUT/README.md
82
+ }
83
+
84
+ function gg_run {
85
+ ci=$1
86
+
87
+ set -o pipefail
88
+ set -x
89
+
90
+ gg_run_$ci | tee $OUT/$ci.log
91
+ cur=$?
92
+ echo "$cur" > $OUT/$ci.exit
93
+
94
+ set +x
95
+ set +o pipefail
96
+
97
+ gg_sum_$ci
98
+
99
+ ret=$((ret | cur))
100
+ }
101
+
102
+ ## ci
103
+
104
+ # ctest_debug
105
+
106
+ function gg_run_ctest_debug {
107
+ cd ${SRC}
108
+
109
+ rm -rf build-ci-debug && mkdir build-ci-debug && cd build-ci-debug
110
+
111
+ set -e
112
+
113
+ # Check cmake, make and ctest are installed
114
+ gg_check_build_requirements
115
+
116
+ (time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
117
+ (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
118
+
119
+ (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
120
+
121
+ set +e
122
+ }
123
+
124
+ function gg_sum_ctest_debug {
125
+ gg_printf '### %s\n\n' "${ci}"
126
+
127
+ gg_printf 'Runs ctest in debug mode\n'
128
+ gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
129
+ gg_printf '```\n'
130
+ gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
131
+ gg_printf '```\n'
132
+ gg_printf '\n'
133
+ }
134
+
135
+ # ctest_release
136
+
137
+ function gg_run_ctest_release {
138
+ cd ${SRC}
139
+
140
+ rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
141
+
142
+ set -e
143
+
144
+ # Check cmake, make and ctest are installed
145
+ gg_check_build_requirements
146
+
147
+ (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
148
+ (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
149
+
150
+ if [ -z ${GG_BUILD_LOW_PERF} ]; then
151
+ (time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log
152
+ else
153
+ (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
154
+ fi
155
+
156
+ set +e
157
+ }
158
+
159
+ function gg_sum_ctest_release {
160
+ gg_printf '### %s\n\n' "${ci}"
161
+
162
+ gg_printf 'Runs ctest in release mode\n'
163
+ gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
164
+ gg_printf '```\n'
165
+ gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
166
+ gg_printf '```\n'
167
+ }
168
+
169
+ # test_scripts_debug
170
+
171
+ function gg_run_test_scripts_debug {
172
+ cd ${SRC}
173
+
174
+ set -e
175
+
176
+ (cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
177
+ (cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
178
+
179
+ set +e
180
+ }
181
+
182
+ function gg_sum_test_scripts_debug {
183
+ gg_printf '### %s\n\n' "${ci}"
184
+
185
+ gg_printf 'Runs test scripts in debug mode\n'
186
+ gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
187
+ gg_printf '```\n'
188
+ gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)"
189
+ gg_printf '```\n'
190
+ gg_printf '\n'
191
+ }
192
+
193
+ # test_scripts_release
194
+
195
+ function gg_run_test_scripts_release {
196
+ cd ${SRC}
197
+
198
+ set -e
199
+
200
+ (cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
201
+ (cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
202
+
203
+ set +e
204
+ }
205
+
206
+ function gg_sum_test_scripts_release {
207
+ gg_printf '### %s\n\n' "${ci}"
208
+
209
+ gg_printf 'Runs test scripts in release mode\n'
210
+ gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
211
+ gg_printf '```\n'
212
+ gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)"
213
+ gg_printf '```\n'
214
+ gg_printf '\n'
215
+ }
216
+
217
+ function gg_get_model {
218
+ local gguf_0="$MNT/models/pythia/1.4B/ggml-model-f16.gguf"
219
+ local gguf_1="$MNT/models/pythia/2.8B/ggml-model-f16.gguf"
220
+ local gguf_2="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf"
221
+ if [[ -s $gguf_0 ]]; then
222
+ echo -n "$gguf_0"
223
+ elif [[ -s $gguf_1 ]]; then
224
+ echo -n "$gguf_1"
225
+ elif [[ -s $gguf_2 ]]; then
226
+ echo -n "$gguf_2"
227
+ else
228
+ echo >&2 "No model found. Can't run gg_run_ctest_with_model."
229
+ exit 1
230
+ fi
231
+ }
232
+
233
+ function gg_run_ctest_with_model_debug {
234
+ cd ${SRC}
235
+
236
+ local model; model=$(gg_get_model)
237
+ cd build-ci-debug
238
+ set -e
239
+ (LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
240
+ set +e
241
+ cd ..
242
+ }
243
+
244
+ function gg_run_ctest_with_model_release {
245
+ cd ${SRC}
246
+
247
+ local model; model=$(gg_get_model)
248
+ cd build-ci-release
249
+ set -e
250
+ (LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
251
+ set +e
252
+ cd ..
253
+ }
254
+
255
+ function gg_sum_ctest_with_model_debug {
256
+ gg_printf '### %s\n\n' "${ci}"
257
+
258
+ gg_printf 'Runs ctest with model files in debug mode\n'
259
+ gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
260
+ gg_printf '```\n'
261
+ gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
262
+ gg_printf '```\n'
263
+ }
264
+
265
+ function gg_sum_ctest_with_model_release {
266
+ gg_printf '### %s\n\n' "${ci}"
267
+
268
+ gg_printf 'Runs ctest with model files in release mode\n'
269
+ gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
270
+ gg_printf '```\n'
271
+ gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
272
+ gg_printf '```\n'
273
+ }
274
+
275
+ # open_llama_7b_v2
276
+
277
+ function gg_run_open_llama_7b_v2 {
278
+ cd ${SRC}
279
+
280
+ gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/config.json
281
+ gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/tokenizer.model
282
+ gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/tokenizer_config.json
283
+ gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/special_tokens_map.json
284
+ gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/pytorch_model.bin.index.json
285
+ gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00001-of-00002.bin
286
+ gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00002-of-00002.bin
287
+ gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/generation_config.json
288
+
289
+ gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
290
+ unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
291
+
292
+ path_models="../models-mnt/open-llama/7B-v2"
293
+ path_wiki="../models-mnt/wikitext/wikitext-2-raw"
294
+
295
+ rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
296
+
297
+ set -e
298
+
299
+ (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
300
+ (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
301
+
302
+ python3 ../examples/convert_legacy_llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
303
+
304
+ model_f16="${path_models}/ggml-model-f16.gguf"
305
+ model_q8_0="${path_models}/ggml-model-q8_0.gguf"
306
+ model_q4_0="${path_models}/ggml-model-q4_0.gguf"
307
+ model_q4_1="${path_models}/ggml-model-q4_1.gguf"
308
+ model_q5_0="${path_models}/ggml-model-q5_0.gguf"
309
+ model_q5_1="${path_models}/ggml-model-q5_1.gguf"
310
+ model_q2_k="${path_models}/ggml-model-q2_k.gguf"
311
+ model_q3_k="${path_models}/ggml-model-q3_k.gguf"
312
+ model_q4_k="${path_models}/ggml-model-q4_k.gguf"
313
+ model_q5_k="${path_models}/ggml-model-q5_k.gguf"
314
+ model_q6_k="${path_models}/ggml-model-q6_k.gguf"
315
+
316
+ wiki_test="${path_wiki}/wiki.test.raw"
317
+
318
+ ./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
319
+ ./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0
320
+ ./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1
321
+ ./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0
322
+ ./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1
323
+ ./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k
324
+ ./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k
325
+ ./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k
326
+ ./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
327
+ ./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
328
+
329
+ (time ./bin/llama-cli --model ${model_f16} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
330
+ (time ./bin/llama-cli --model ${model_q8_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
331
+ (time ./bin/llama-cli --model ${model_q4_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
332
+ (time ./bin/llama-cli --model ${model_q4_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
333
+ (time ./bin/llama-cli --model ${model_q5_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
334
+ (time ./bin/llama-cli --model ${model_q5_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
335
+ (time ./bin/llama-cli --model ${model_q2_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
336
+ (time ./bin/llama-cli --model ${model_q3_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
337
+ (time ./bin/llama-cli --model ${model_q4_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
338
+ (time ./bin/llama-cli --model ${model_q5_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
339
+ (time ./bin/llama-cli --model ${model_q6_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
340
+
341
+ (time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
342
+ (time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
343
+ (time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
344
+ (time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
345
+ (time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
346
+ (time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
347
+ (time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
348
+ (time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
349
+ (time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
350
+ (time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
351
+ (time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
352
+
353
+ (time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
354
+
355
+ (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
356
+ (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
357
+ (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
358
+ (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
359
+
360
+ function check_ppl {
361
+ qnt="$1"
362
+ ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
363
+
364
+ if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then
365
+ printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl"
366
+ return 20
367
+ fi
368
+
369
+ printf ' - %s @ %s OK\n' "$qnt" "$ppl"
370
+ return 0
371
+ }
372
+
373
+ check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
374
+ check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
375
+ check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
376
+ check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
377
+ check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
378
+ check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
379
+ check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
380
+ check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
381
+ check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
382
+ check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
383
+ check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
384
+
385
+ cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log
386
+
387
+ set +e
388
+ }
389
+
390
+ function gg_sum_open_llama_7b_v2 {
391
+ gg_printf '### %s\n\n' "${ci}"
392
+
393
+ gg_printf 'OpenLLaMA 7B-v2:\n'
394
+ gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
395
+ gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
396
+ gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)"
397
+ gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
398
+ gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
399
+ gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)"
400
+ gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)"
401
+ gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)"
402
+ gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)"
403
+ gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)"
404
+ gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)"
405
+ gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)"
406
+ gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)"
407
+ gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)"
408
+ gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)"
409
+ }
410
+
411
+ # pythia_1.4b
412
+
413
+ function gg_run_pythia_1_4b {
414
+ cd ${SRC}
415
+
416
+ gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/config.json
417
+ gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/tokenizer.json
418
+ gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/tokenizer_config.json
419
+ gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/special_tokens_map.json
420
+ gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/resolve/main/pytorch_model.bin
421
+
422
+ gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
423
+ unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
424
+ head -n 60 models-mnt/wikitext/wikitext-2-raw/wiki.test.raw > models-mnt/wikitext/wikitext-2-raw/wiki.test-60.raw
425
+
426
+ path_models="../models-mnt/pythia/1.4B"
427
+ path_wiki="../models-mnt/wikitext/wikitext-2-raw"
428
+
429
+ rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
430
+
431
+ set -e
432
+
433
+ (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
434
+ (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
435
+
436
+ python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
437
+
438
+ model_f16="${path_models}/ggml-model-f16.gguf"
439
+ model_q8_0="${path_models}/ggml-model-q8_0.gguf"
440
+ model_q4_0="${path_models}/ggml-model-q4_0.gguf"
441
+ model_q4_1="${path_models}/ggml-model-q4_1.gguf"
442
+ model_q5_0="${path_models}/ggml-model-q5_0.gguf"
443
+ model_q5_1="${path_models}/ggml-model-q5_1.gguf"
444
+ model_q2_k="${path_models}/ggml-model-q2_k.gguf"
445
+ model_q3_k="${path_models}/ggml-model-q3_k.gguf"
446
+ model_q4_k="${path_models}/ggml-model-q4_k.gguf"
447
+ model_q5_k="${path_models}/ggml-model-q5_k.gguf"
448
+ model_q6_k="${path_models}/ggml-model-q6_k.gguf"
449
+
450
+ wiki_test_60="${path_wiki}/wiki.test-60.raw"
451
+
452
+ ./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
453
+ ./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0
454
+ ./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1
455
+ ./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0
456
+ ./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1
457
+ ./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k
458
+ ./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k
459
+ ./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k
460
+ ./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
461
+ ./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
462
+
463
+ (time ./bin/llama-cli --model ${model_f16} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
464
+ (time ./bin/llama-cli --model ${model_q8_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
465
+ (time ./bin/llama-cli --model ${model_q4_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
466
+ (time ./bin/llama-cli --model ${model_q4_1} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
467
+ (time ./bin/llama-cli --model ${model_q5_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
468
+ (time ./bin/llama-cli --model ${model_q5_1} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
469
+ (time ./bin/llama-cli --model ${model_q2_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
470
+ (time ./bin/llama-cli --model ${model_q3_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
471
+ (time ./bin/llama-cli --model ${model_q4_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
472
+ (time ./bin/llama-cli --model ${model_q5_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
473
+ (time ./bin/llama-cli --model ${model_q6_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
474
+
475
+ (time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
476
+ (time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
477
+ (time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
478
+ (time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
479
+ (time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
480
+ (time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
481
+ (time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
482
+ (time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
483
+ (time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
484
+ (time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
485
+ (time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
486
+
487
+ (time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
488
+
489
+ (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
490
+ (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
491
+
492
+ function check_ppl {
493
+ qnt="$1"
494
+ ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
495
+
496
+ if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then
497
+ printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl"
498
+ return 20
499
+ fi
500
+
501
+ printf ' - %s @ %s OK\n' "$qnt" "$ppl"
502
+ return 0
503
+ }
504
+
505
+ check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
506
+ check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
507
+ check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
508
+ check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
509
+ check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
510
+ check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
511
+ #check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log # note: ppl > 20.0 for this quant and model
512
+ check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
513
+ check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
514
+ check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
515
+ check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
516
+
517
+ cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log
518
+
519
+ set +e
520
+ }
521
+
522
+ function gg_sum_pythia_1_4b {
523
+ gg_printf '### %s\n\n' "${ci}"
524
+
525
+ gg_printf 'Pythia 1.4B:\n'
526
+ gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
527
+ gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
528
+ gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)"
529
+ gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
530
+ gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
531
+ gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)"
532
+ gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)"
533
+ gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)"
534
+ gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)"
535
+ gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)"
536
+ gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)"
537
+ gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)"
538
+ gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)"
539
+ gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)"
540
+ gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)"
541
+ }
542
+
543
+ # pythia_2_8b
544
+
545
+ function gg_run_pythia_2_8b {
546
+ cd ${SRC}
547
+
548
+ gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/config.json
549
+ gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/tokenizer.json
550
+ gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/tokenizer_config.json
551
+ gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/special_tokens_map.json
552
+ gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/resolve/main/pytorch_model.bin
553
+
554
+ gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
555
+ unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
556
+
557
+ path_models="../models-mnt/pythia/2.8B"
558
+ path_wiki="../models-mnt/wikitext/wikitext-2-raw"
559
+
560
+ rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
561
+
562
+ set -e
563
+
564
+ (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
565
+ (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
566
+
567
+ python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
568
+
569
+ model_f16="${path_models}/ggml-model-f16.gguf"
570
+ model_q8_0="${path_models}/ggml-model-q8_0.gguf"
571
+ model_q4_0="${path_models}/ggml-model-q4_0.gguf"
572
+ model_q4_1="${path_models}/ggml-model-q4_1.gguf"
573
+ model_q5_0="${path_models}/ggml-model-q5_0.gguf"
574
+ model_q5_1="${path_models}/ggml-model-q5_1.gguf"
575
+ model_q2_k="${path_models}/ggml-model-q2_k.gguf"
576
+ model_q3_k="${path_models}/ggml-model-q3_k.gguf"
577
+ model_q4_k="${path_models}/ggml-model-q4_k.gguf"
578
+ model_q5_k="${path_models}/ggml-model-q5_k.gguf"
579
+ model_q6_k="${path_models}/ggml-model-q6_k.gguf"
580
+
581
+ wiki_test="${path_wiki}/wiki.test.raw"
582
+
583
+ ./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
584
+ ./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0
585
+ ./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1
586
+ ./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0
587
+ ./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1
588
+ ./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k
589
+ ./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k
590
+ ./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k
591
+ ./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
592
+ ./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
593
+
594
+ (time ./bin/llama-cli --model ${model_f16} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
595
+ (time ./bin/llama-cli --model ${model_q8_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
596
+ (time ./bin/llama-cli --model ${model_q4_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
597
+ (time ./bin/llama-cli --model ${model_q4_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
598
+ (time ./bin/llama-cli --model ${model_q5_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
599
+ (time ./bin/llama-cli --model ${model_q5_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
600
+ (time ./bin/llama-cli --model ${model_q2_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
601
+ (time ./bin/llama-cli --model ${model_q3_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
602
+ (time ./bin/llama-cli --model ${model_q4_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
603
+ (time ./bin/llama-cli --model ${model_q5_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
604
+ (time ./bin/llama-cli --model ${model_q6_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
605
+
606
+ (time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
607
+ (time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
608
+ (time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
609
+ (time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
610
+ (time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
611
+ (time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
612
+ (time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
613
+ (time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
614
+ (time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
615
+ (time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
616
+ (time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
617
+
618
+ (time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
619
+
620
+ (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
621
+ (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
622
+ (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
623
+ (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
624
+
625
+ function check_ppl {
626
+ qnt="$1"
627
+ ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
628
+
629
+ if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then
630
+ printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl"
631
+ return 20
632
+ fi
633
+
634
+ printf ' - %s @ %s OK\n' "$qnt" "$ppl"
635
+ return 0
636
+ }
637
+
638
+ check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
639
+ check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
640
+ check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
641
+ check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
642
+ check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
643
+ check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
644
+ #check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log # note: ppl > 20.0 for this quant and model
645
+ check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
646
+ check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
647
+ check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
648
+ check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
649
+
650
+ cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log
651
+
652
+ set +e
653
+ }
654
+
655
+ function gg_sum_pythia_2_8b {
656
+ gg_printf '### %s\n\n' "${ci}"
657
+
658
+ gg_printf 'Pythia 2.8B:\n'
659
+ gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
660
+ gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
661
+ gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)"
662
+ gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
663
+ gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
664
+ gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)"
665
+ gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)"
666
+ gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)"
667
+ gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)"
668
+ gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)"
669
+ gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)"
670
+ gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)"
671
+ gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)"
672
+ gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)"
673
+ gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)"
674
+ }
675
+
676
+ # bge-small
677
+
678
+ function gg_run_embd_bge_small {
679
+ cd ${SRC}
680
+
681
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json
682
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer.json
683
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer_config.json
684
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/special_tokens_map.json
685
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/pytorch_model.bin
686
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/sentence_bert_config.json
687
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/vocab.txt
688
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/modules.json
689
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json
690
+
691
+ gg_wget models-mnt/bge-small/1_Pooling https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json
692
+
693
+ path_models="../models-mnt/bge-small"
694
+
695
+ rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
696
+
697
+ set -e
698
+
699
+ (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
700
+ (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
701
+
702
+ python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
703
+
704
+ model_f16="${path_models}/ggml-model-f16.gguf"
705
+ model_q8_0="${path_models}/ggml-model-q8_0.gguf"
706
+
707
+ ./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
708
+
709
+ (time ./bin/llama-embedding --model ${model_f16} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
710
+ (time ./bin/llama-embedding --model ${model_q8_0} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
711
+
712
+ set +e
713
+ }
714
+
715
+ function gg_sum_embd_bge_small {
716
+ gg_printf '### %s\n\n' "${ci}"
717
+
718
+ gg_printf 'BGE Small (BERT):\n'
719
+ gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
720
+ gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
721
+ gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
722
+ }
723
+
724
+ # rerank_tiny
725
+
726
+ function gg_run_rerank_tiny {
727
+ cd ${SRC}
728
+
729
+ gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json
730
+ gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer.json
731
+ gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer_config.json
732
+ gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/special_tokens_map.json
733
+ gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/resolve/main/pytorch_model.bin
734
+ gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/sentence_bert_config.json
735
+ gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/vocab.txt
736
+ gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/modules.json
737
+ gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json
738
+
739
+ gg_wget models-mnt/rerank-tiny/1_Pooling https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/1_Pooling/config.json
740
+
741
+ path_models="../models-mnt/rerank-tiny"
742
+
743
+ rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
744
+
745
+ set -e
746
+
747
+ (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
748
+ (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
749
+
750
+ python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
751
+
752
+ model_f16="${path_models}/ggml-model-f16.gguf"
753
+
754
+ # for this model, the SEP token is "</s>"
755
+ (time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s></s>hi\nwhat is panda?</s></s>it's a bear\nwhat is panda?</s></s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
756
+
757
+ # sample output
758
+ # rerank score 0: 0.029
759
+ # rerank score 1: 0.029
760
+ # rerank score 2: 0.135
761
+
762
+ # check that the score is in the range [$3, $4]
763
+ function check_score {
764
+ qnt="$1"
765
+ score=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
766
+
767
+ if [ $(echo "$score < $3" | bc) -eq 1 ] || [ $(echo "$score > $4" | bc) -eq 1 ]; then
768
+ printf ' - %s @ %s (FAIL: score not in range [%s, %s])\n' "$qnt" "$score" "$3" "$4"
769
+ return 20
770
+ fi
771
+
772
+ printf ' - %s @ %s OK\n' "$qnt" "$score"
773
+ return 0
774
+ }
775
+
776
+ check_score "rerank score 0" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 0")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
777
+ check_score "rerank score 1" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 1")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
778
+ check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.30" | tee -a $OUT/${ci}-rk-f16.log
779
+
780
+ set +e
781
+ }
782
+
783
+ function gg_sum_rerank_tiny {
784
+ gg_printf '### %s\n\n' "${ci}"
785
+
786
+ gg_printf 'Rerank Tiny (Jina):\n'
787
+ gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
788
+ gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-rk-f16.log)"
789
+ }
790
+
791
+ function gg_check_build_requirements {
792
+ if ! command -v cmake &> /dev/null; then
793
+ gg_printf 'cmake not found, please install'
794
+ fi
795
+
796
+ if ! command -v make &> /dev/null; then
797
+ gg_printf 'make not found, please install'
798
+ fi
799
+
800
+ if ! command -v ctest &> /dev/null; then
801
+ gg_printf 'ctest not found, please install'
802
+ fi
803
+ }
804
+
805
+ ## main
806
+
807
+ export LLAMA_LOG_PREFIX=1
808
+ export LLAMA_LOG_TIMESTAMPS=1
809
+
810
+ if [ -z ${GG_BUILD_LOW_PERF} ]; then
811
+ # Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
812
+ rm -rf ${SRC}/models-mnt
813
+ mnt_models=${MNT}/models
814
+ mkdir -p ${mnt_models}
815
+ ln -sfn ${mnt_models} ${SRC}/models-mnt
816
+
817
+ # Create a fresh python3 venv and enter it
818
+ python3 -m venv "$MNT/venv"
819
+ source "$MNT/venv/bin/activate"
820
+
821
+ pip install -r ${SRC}/requirements.txt --disable-pip-version-check
822
+ pip install --editable gguf-py --disable-pip-version-check
823
+ fi
824
+
825
+ ret=0
826
+
827
+ test $ret -eq 0 && gg_run ctest_debug
828
+ test $ret -eq 0 && gg_run ctest_release
829
+
830
+ if [ -z ${GG_BUILD_LOW_PERF} ]; then
831
+ test $ret -eq 0 && gg_run embd_bge_small
832
+ test $ret -eq 0 && gg_run rerank_tiny
833
+
834
+ if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
835
+ test $ret -eq 0 && gg_run test_scripts_debug
836
+ test $ret -eq 0 && gg_run test_scripts_release
837
+ fi
838
+
839
+ if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
840
+ if [ -z ${GG_BUILD_CUDA} ] && [ -z ${GG_BUILD_VULKAN} ]; then
841
+ test $ret -eq 0 && gg_run pythia_1_4b
842
+ else
843
+ test $ret -eq 0 && gg_run pythia_2_8b
844
+ #test $ret -eq 0 && gg_run open_llama_7b_v2
845
+ fi
846
+ test $ret -eq 0 && gg_run ctest_with_model_debug
847
+ test $ret -eq 0 && gg_run ctest_with_model_release
848
+ fi
849
+ fi
850
+
851
+ exit $ret