diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 0000000000000000000000000000000000000000..952c0cca82580e2068864c6ee2344c167a4aff02 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,24 @@ +--- +Checks: > + bugprone-*, + -bugprone-easily-swappable-parameters, + -bugprone-implicit-widening-of-multiplication-result, + -bugprone-misplaced-widening-cast, + -bugprone-narrowing-conversions, + readability-*, + -readability-avoid-unconditional-preprocessor-if, + -readability-function-cognitive-complexity, + -readability-identifier-length, + -readability-implicit-bool-conversion, + -readability-magic-numbers, + -readability-uppercase-literal-suffix, + -readability-simplify-boolean-expr, + clang-analyzer-*, + -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, + performance-*, + portability-*, + misc-*, + -misc-const-correctness, + -misc-non-private-member-variables-in-classes, + -misc-no-recursion, +FormatStyle: none diff --git a/.devops/cloud-v-pipeline b/.devops/cloud-v-pipeline new file mode 100644 index 0000000000000000000000000000000000000000..af8c0cea6155c71c0c162e034715908ea16ee9cf --- /dev/null +++ b/.devops/cloud-v-pipeline @@ -0,0 +1,22 @@ +node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries + stage('Cleanup'){ + cleanWs() // Cleaning previous CI build in workspace + } + stage('checkout repo'){ + retry(5){ // Retry if the cloning fails due to some reason + checkout scm // Clone the repo on Runner + } + } + stage('Compiling llama.cpp'){ + sh'''#!/bin/bash + make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V + ''' + } + stage('Running llama.cpp'){ + sh'''#!/bin/bash + module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc + qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64 + cat llama_log.txt # Printing results + ''' + } +} diff --git a/.devops/full-cuda.Dockerfile b/.devops/full-cuda.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..d5acd35e204d3320df346687d552bb6a9dfa26be --- /dev/null +++ b/.devops/full-cuda.Dockerfile @@ -0,0 +1,33 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +ARG CUDA_VERSION=12.6.0 +# Target the CUDA build image +ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_CUDA_DEV_CONTAINER} AS build + +# CUDA architecture to build for (defaults to all supported archs) +ARG CUDA_DOCKER_ARCH=default + +RUN apt-get update && \ + apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1 + +COPY requirements.txt requirements.txt +COPY requirements requirements + +RUN pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt + +WORKDIR /app + +COPY . . + +# Use the default CUDA archs if not specified +RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ + export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ + fi && \ + cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake --build build --config Release -j$(nproc) && \ + cp build/bin/* . + +ENTRYPOINT ["/app/.devops/tools.sh"] diff --git a/.devops/full-musa.Dockerfile b/.devops/full-musa.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..34ba856d3d1ca9e0525884c82037eb57a88f4dde --- /dev/null +++ b/.devops/full-musa.Dockerfile @@ -0,0 +1,26 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +ARG MUSA_VERSION=rc3.1.0 +# Target the MUSA build image +ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_MUSA_DEV_CONTAINER} AS build + +RUN apt-get update && \ + apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1 + +COPY requirements.txt requirements.txt +COPY requirements requirements + +RUN pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt + +WORKDIR /app + +COPY . . + +RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake --build build --config Release -j$(nproc) && \ + cp build/bin/* . + +ENTRYPOINT ["/app/.devops/tools.sh"] diff --git a/.devops/full-rocm.Dockerfile b/.devops/full-rocm.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..df496bcd2b7ee772425fc5ba4dc3a0701a8b04be --- /dev/null +++ b/.devops/full-rocm.Dockerfile @@ -0,0 +1,50 @@ +ARG UBUNTU_VERSION=22.04 + +# This needs to generally match the container host's environment. +ARG ROCM_VERSION=5.6 + +# Target the CUDA build image +ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete + +FROM ${BASE_ROCM_DEV_CONTAINER} AS build + +# Unless otherwise specified, we make a fat build. +# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 +# This is mostly tied to rocBLAS supported archs. +ARG ROCM_DOCKER_ARCH="\ + gfx803 \ + gfx900 \ + gfx906 \ + gfx908 \ + gfx90a \ + gfx1010 \ + gfx1030 \ + gfx1100 \ + gfx1101 \ + gfx1102" + +COPY requirements.txt requirements.txt +COPY requirements requirements + +RUN pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt + +WORKDIR /app + +COPY . . + +# Set nvcc architecture +ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH} +# Enable ROCm +ENV GGML_HIPBLAS=1 +ENV CC=/opt/rocm/llvm/bin/clang +ENV CXX=/opt/rocm/llvm/bin/clang++ + +# Enable cURL +ENV LLAMA_CURL=1 +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + +RUN make -j$(nproc) + +ENTRYPOINT ["/app/.devops/tools.sh"] diff --git a/.devops/full.Dockerfile b/.devops/full.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..2a06f82b738aee88ed82266c710e7c942e408976 --- /dev/null +++ b/.devops/full.Dockerfile @@ -0,0 +1,25 @@ +ARG UBUNTU_VERSION=22.04 + +FROM ubuntu:$UBUNTU_VERSION AS build + +RUN apt-get update && \ + apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1 + +COPY requirements.txt requirements.txt +COPY requirements requirements + +RUN pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt + +WORKDIR /app + +COPY . . + +ENV LLAMA_CURL=1 + + +RUN make -j$(nproc) + +ENV LC_ALL=C.utf8 + +ENTRYPOINT ["/app/.devops/tools.sh"] diff --git a/.devops/llama-cli-cann.Dockerfile b/.devops/llama-cli-cann.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..db5ba2f25ea6727772c63164afea0dbedb7b9ab1 --- /dev/null +++ b/.devops/llama-cli-cann.Dockerfile @@ -0,0 +1,44 @@ +ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8 + +FROM cosdt/cann:$ASCEND_VERSION AS build + +WORKDIR /app + +COPY . . + +RUN yum install -y gcc g++ cmake make +ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest +ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH +ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH} +ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH} +ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH} +ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME} +ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp +ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit +ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME} + +# find libascend_hal.so, because the drive hasn`t been mounted. +ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH + +RUN echo "Building with static libs" && \ + source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \ + cmake -B build -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \ + cmake --build build --config Release --target llama-cli + +# TODO: use image with NNRT +FROM cosdt/cann:$ASCEND_VERSION AS runtime +COPY --from=build /app/build/bin/llama-cli /llama-cli + +ENV LC_ALL=C.utf8 + +ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest +ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH +ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH} +ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH} +ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH} +ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME} +ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp +ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit +ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME} + +ENTRYPOINT ["/llama-cli" ] diff --git a/.devops/llama-cli-cuda.Dockerfile b/.devops/llama-cli-cuda.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..b75163b94435a9c154a023aa46f61d57ce0b92f2 --- /dev/null +++ b/.devops/llama-cli-cuda.Dockerfile @@ -0,0 +1,37 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +ARG CUDA_VERSION=12.6.0 +# Target the CUDA build image +ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} +# Target the CUDA runtime image +ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_CUDA_DEV_CONTAINER} AS build + +# CUDA architecture to build for (defaults to all supported archs) +ARG CUDA_DOCKER_ARCH=default + +RUN apt-get update && \ + apt-get install -y build-essential git cmake + +WORKDIR /app + +COPY . . + +# Use the default CUDA archs if not specified +RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ + export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ + fi && \ + cmake -B build -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake --build build --config Release --target llama-cli -j$(nproc) + +FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime + +RUN apt-get update && \ + apt-get install -y libgomp1 + +COPY --from=build /app/build/ggml/src/libggml.so /libggml.so +COPY --from=build /app/build/src/libllama.so /libllama.so +COPY --from=build /app/build/bin/llama-cli /llama-cli + +ENTRYPOINT [ "/llama-cli" ] diff --git a/.devops/llama-cli-intel.Dockerfile b/.devops/llama-cli-intel.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..79dba06a77d6eba3c8e2b71d809a4cdf3213d337 --- /dev/null +++ b/.devops/llama-cli-intel.Dockerfile @@ -0,0 +1,28 @@ +ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04 + +FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build + +ARG GGML_SYCL_F16=OFF +RUN apt-get update && \ + apt-get install -y git + +WORKDIR /app + +COPY . . + +RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \ + echo "GGML_SYCL_F16 is set" && \ + export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \ + fi && \ + echo "Building with static libs" && \ + cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx \ + ${OPT_SYCL_F16} -DBUILD_SHARED_LIBS=OFF && \ + cmake --build build --config Release --target llama-cli + +FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime + +COPY --from=build /app/build/bin/llama-cli /llama-cli + +ENV LC_ALL=C.utf8 + +ENTRYPOINT [ "/llama-cli" ] diff --git a/.devops/llama-cli-musa.Dockerfile b/.devops/llama-cli-musa.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..b5696794f1a560bb584392141dbc50cb1c01cef0 --- /dev/null +++ b/.devops/llama-cli-musa.Dockerfile @@ -0,0 +1,30 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +ARG MUSA_VERSION=rc3.1.0 +# Target the MUSA build image +ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION} +# Target the MUSA runtime image +ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_MUSA_DEV_CONTAINER} AS build + +RUN apt-get update && \ + apt-get install -y build-essential git cmake + +WORKDIR /app + +COPY . . + +RUN cmake -B build -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake --build build --config Release --target llama-cli -j$(nproc) + +FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime + +RUN apt-get update && \ + apt-get install -y libgomp1 + +COPY --from=build /app/build/ggml/src/libggml.so /libggml.so +COPY --from=build /app/build/src/libllama.so /libllama.so +COPY --from=build /app/build/bin/llama-cli /llama-cli + +ENTRYPOINT [ "/llama-cli" ] diff --git a/.devops/llama-cli-rocm.Dockerfile b/.devops/llama-cli-rocm.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..e60c747bdbf11bc72a17258678b4a8fc909a5c6a --- /dev/null +++ b/.devops/llama-cli-rocm.Dockerfile @@ -0,0 +1,45 @@ +ARG UBUNTU_VERSION=22.04 + +# This needs to generally match the container host's environment. +ARG ROCM_VERSION=5.6 + +# Target the CUDA build image +ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete + +FROM ${BASE_ROCM_DEV_CONTAINER} AS build + +# Unless otherwise specified, we make a fat build. +# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 +# This is mostly tied to rocBLAS supported archs. +ARG ROCM_DOCKER_ARCH="\ + gfx803 \ + gfx900 \ + gfx906 \ + gfx908 \ + gfx90a \ + gfx1010 \ + gfx1030 \ + gfx1100 \ + gfx1101 \ + gfx1102" + +COPY requirements.txt requirements.txt +COPY requirements requirements + +RUN pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt + +WORKDIR /app + +COPY . . + +# Set nvcc architecture +ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH} +# Enable ROCm +ENV GGML_HIPBLAS=1 +ENV CC=/opt/rocm/llvm/bin/clang +ENV CXX=/opt/rocm/llvm/bin/clang++ + +RUN make -j$(nproc) llama-cli + +ENTRYPOINT [ "/app/llama-cli" ] diff --git a/.devops/llama-cli-vulkan.Dockerfile b/.devops/llama-cli-vulkan.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..9b0dad8bf7a13b04d9953ce586eee34e1ec9694a --- /dev/null +++ b/.devops/llama-cli-vulkan.Dockerfile @@ -0,0 +1,27 @@ +ARG UBUNTU_VERSION=jammy + +FROM ubuntu:$UBUNTU_VERSION AS build + +# Install build tools +RUN apt update && apt install -y git build-essential cmake wget libgomp1 + +# Install Vulkan SDK +RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \ + wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \ + apt update -y && \ + apt-get install -y vulkan-sdk + +# Build it +WORKDIR /app +COPY . . +RUN cmake -B build -DGGML_VULKAN=1 && \ + cmake --build build --config Release --target llama-cli + +# Clean up +WORKDIR / +RUN cp /app/build/bin/llama-cli /llama-cli && \ + rm -rf /app + +ENV LC_ALL=C.utf8 + +ENTRYPOINT [ "/llama-cli" ] diff --git a/.devops/llama-cli.Dockerfile b/.devops/llama-cli.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..7f741aa46ecf0e81de24a5697cfa4c9cebcf5def --- /dev/null +++ b/.devops/llama-cli.Dockerfile @@ -0,0 +1,23 @@ +ARG UBUNTU_VERSION=22.04 + +FROM ubuntu:$UBUNTU_VERSION AS build + +RUN apt-get update && \ + apt-get install -y build-essential git + +WORKDIR /app + +COPY . . + +RUN make -j$(nproc) llama-cli + +FROM ubuntu:$UBUNTU_VERSION AS runtime + +RUN apt-get update && \ + apt-get install -y libgomp1 + +COPY --from=build /app/llama-cli /llama-cli + +ENV LC_ALL=C.utf8 + +ENTRYPOINT [ "/llama-cli" ] diff --git a/.devops/llama-cpp-cuda.srpm.spec b/.devops/llama-cpp-cuda.srpm.spec new file mode 100644 index 0000000000000000000000000000000000000000..7425d3a9d7a4027fcff13077ffec3caf6e66d02f --- /dev/null +++ b/.devops/llama-cpp-cuda.srpm.spec @@ -0,0 +1,83 @@ +# SRPM for building from source and packaging an RPM for RPM-based distros. +# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages +# Built and maintained by John Boero - boeroboy@gmail.com +# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal + +# Notes for llama.cpp: +# 1. Tags are currently based on hash - which will not sort asciibetically. +# We need to declare standard versioning if people want to sort latest releases. +# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies. +# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed. +# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo +# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries. +# It is up to the user to install the correct vendor-specific support. + +Name: llama.cpp-cuda +Version: %( date "+%%Y%%m%%d" ) +Release: 1%{?dist} +Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL) +License: MIT +Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz +BuildRequires: coreutils make gcc-c++ git cuda-toolkit +Requires: cuda-toolkit +URL: https://github.com/ggerganov/llama.cpp + +%define debug_package %{nil} +%define source_date_epoch_from_changelog 0 + +%description +CPU inference for Meta's Lllama2 models using default options. + +%prep +%setup -n llama.cpp-master + +%build +make -j GGML_CUDA=1 + +%install +mkdir -p %{buildroot}%{_bindir}/ +cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli +cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server +cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple + +mkdir -p %{buildroot}/usr/lib/systemd/system +%{__cat} < %{buildroot}/usr/lib/systemd/system/llamacuda.service +[Unit] +Description=Llama.cpp server, CPU only (no GPU support in this build). +After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target + +[Service] +Type=simple +EnvironmentFile=/etc/sysconfig/llama +ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS +ExecReload=/bin/kill -s HUP $MAINPID +Restart=never + +[Install] +WantedBy=default.target +EOF + +mkdir -p %{buildroot}/etc/sysconfig +%{__cat} < %{buildroot}/etc/sysconfig/llama +LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin" +EOF + +%clean +rm -rf %{buildroot} +rm -rf %{_builddir}/* + +%files +%{_bindir}/llama-cuda-cli +%{_bindir}/llama-cuda-server +%{_bindir}/llama-cuda-simple +/usr/lib/systemd/system/llamacuda.service +%config /etc/sysconfig/llama + +%pre + +%post + +%preun +%postun + +%changelog diff --git a/.devops/llama-cpp.srpm.spec b/.devops/llama-cpp.srpm.spec new file mode 100644 index 0000000000000000000000000000000000000000..4d5560089816c6679bd02b673dc57a2206ebfbe7 --- /dev/null +++ b/.devops/llama-cpp.srpm.spec @@ -0,0 +1,85 @@ +# SRPM for building from source and packaging an RPM for RPM-based distros. +# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages +# Built and maintained by John Boero - boeroboy@gmail.com +# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal + +# Notes for llama.cpp: +# 1. Tags are currently based on hash - which will not sort asciibetically. +# We need to declare standard versioning if people want to sort latest releases. +# In the meantime, YYYYMMDD format will be used. +# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies. +# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed. +# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo +# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries. +# It is up to the user to install the correct vendor-specific support. + +Name: llama.cpp +Version: %( date "+%%Y%%m%%d" ) +Release: 1%{?dist} +Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL) +License: MIT +Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz +BuildRequires: coreutils make gcc-c++ git libstdc++-devel +Requires: libstdc++ +URL: https://github.com/ggerganov/llama.cpp + +%define debug_package %{nil} +%define source_date_epoch_from_changelog 0 + +%description +CPU inference for Meta's Lllama2 models using default options. +Models are not included in this package and must be downloaded separately. + +%prep +%setup -n llama.cpp-master + +%build +make -j + +%install +mkdir -p %{buildroot}%{_bindir}/ +cp -p llama-cli %{buildroot}%{_bindir}/llama-cli +cp -p llama-server %{buildroot}%{_bindir}/llama-server +cp -p llama-simple %{buildroot}%{_bindir}/llama-simple + +mkdir -p %{buildroot}/usr/lib/systemd/system +%{__cat} < %{buildroot}/usr/lib/systemd/system/llama.service +[Unit] +Description=Llama.cpp server, CPU only (no GPU support in this build). +After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target + +[Service] +Type=simple +EnvironmentFile=/etc/sysconfig/llama +ExecStart=/usr/bin/llama-server $LLAMA_ARGS +ExecReload=/bin/kill -s HUP $MAINPID +Restart=never + +[Install] +WantedBy=default.target +EOF + +mkdir -p %{buildroot}/etc/sysconfig +%{__cat} < %{buildroot}/etc/sysconfig/llama +LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin" +EOF + +%clean +rm -rf %{buildroot} +rm -rf %{_builddir}/* + +%files +%{_bindir}/llama-cli +%{_bindir}/llama-server +%{_bindir}/llama-simple +/usr/lib/systemd/system/llama.service +%config /etc/sysconfig/llama + +%pre + +%post + +%preun +%postun + +%changelog diff --git a/.devops/llama-server-cuda.Dockerfile b/.devops/llama-server-cuda.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..a40e24205707ffd7370c4b1902b1be4a075fbfd6 --- /dev/null +++ b/.devops/llama-server-cuda.Dockerfile @@ -0,0 +1,42 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +ARG CUDA_VERSION=12.6.0 +# Target the CUDA build image +ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} +# Target the CUDA runtime image +ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_CUDA_DEV_CONTAINER} AS build + +# CUDA architecture to build for (defaults to all supported archs) +ARG CUDA_DOCKER_ARCH=default + +RUN apt-get update && \ + apt-get install -y build-essential git cmake libcurl4-openssl-dev + +WORKDIR /app + +COPY . . + +# Use the default CUDA archs if not specified +RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ + export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ + fi && \ + cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake --build build --config Release --target llama-server -j$(nproc) + +FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime + +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev libgomp1 curl + +COPY --from=build /app/build/ggml/src/libggml.so /libggml.so +COPY --from=build /app/build/src/libllama.so /libllama.so +COPY --from=build /app/build/bin/llama-server /llama-server + +# Must be set to 0.0.0.0 so it can listen to requests from host machine +ENV LLAMA_ARG_HOST=0.0.0.0 + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/llama-server" ] diff --git a/.devops/llama-server-intel.Dockerfile b/.devops/llama-server-intel.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..9c355b664f15e94f44e43183c7cb3d694c58bb88 --- /dev/null +++ b/.devops/llama-server-intel.Dockerfile @@ -0,0 +1,34 @@ +ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04 + +FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build + +ARG GGML_SYCL_F16=OFF +RUN apt-get update && \ + apt-get install -y git libcurl4-openssl-dev + +WORKDIR /app + +COPY . . + +RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \ + echo "GGML_SYCL_F16 is set" && \ + export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \ + fi && \ + echo "Building with dynamic libs" && \ + cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ + cmake --build build --config Release --target llama-server + +FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime + +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev curl + +COPY --from=build /app/build/bin/llama-server /llama-server + +ENV LC_ALL=C.utf8 +# Must be set to 0.0.0.0 so it can listen to requests from host machine +ENV LLAMA_ARG_HOST=0.0.0.0 + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/llama-server" ] diff --git a/.devops/llama-server-musa.Dockerfile b/.devops/llama-server-musa.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..193a6d77cb9edacefdeb67a65af8330409d0e25f --- /dev/null +++ b/.devops/llama-server-musa.Dockerfile @@ -0,0 +1,35 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +ARG MUSA_VERSION=rc3.1.0 +# Target the MUSA build image +ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION} +# Target the MUSA runtime image +ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_MUSA_DEV_CONTAINER} AS build + +RUN apt-get update && \ + apt-get install -y build-essential git cmake libcurl4-openssl-dev + +WORKDIR /app + +COPY . . + +RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ + cmake --build build --config Release --target llama-server -j$(nproc) + +FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime + +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev libgomp1 curl + +COPY --from=build /app/build/ggml/src/libggml.so /libggml.so +COPY --from=build /app/build/src/libllama.so /libllama.so +COPY --from=build /app/build/bin/llama-server /llama-server + +# Must be set to 0.0.0.0 so it can listen to requests from host machine +ENV LLAMA_ARG_HOST=0.0.0.0 + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/llama-server" ] diff --git a/.devops/llama-server-rocm.Dockerfile b/.devops/llama-server-rocm.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..8553af75b61fc51d1d0319926f4f7a7edd75e34b --- /dev/null +++ b/.devops/llama-server-rocm.Dockerfile @@ -0,0 +1,54 @@ +ARG UBUNTU_VERSION=22.04 + +# This needs to generally match the container host's environment. +ARG ROCM_VERSION=5.6 + +# Target the CUDA build image +ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete + +FROM ${BASE_ROCM_DEV_CONTAINER} AS build + +# Unless otherwise specified, we make a fat build. +# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 +# This is mostly tied to rocBLAS supported archs. +ARG ROCM_DOCKER_ARCH="\ + gfx803 \ + gfx900 \ + gfx906 \ + gfx908 \ + gfx90a \ + gfx1010 \ + gfx1030 \ + gfx1100 \ + gfx1101 \ + gfx1102" + +COPY requirements.txt requirements.txt +COPY requirements requirements + +RUN pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt + +WORKDIR /app + +COPY . . + +# Set nvcc architecture +ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH} +# Enable ROCm +ENV GGML_HIPBLAS=1 +ENV CC=/opt/rocm/llvm/bin/clang +ENV CXX=/opt/rocm/llvm/bin/clang++ +# Must be set to 0.0.0.0 so it can listen to requests from host machine +ENV LLAMA_ARG_HOST=0.0.0.0 + +# Enable cURL +ENV LLAMA_CURL=1 +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev curl + +RUN make -j$(nproc) llama-server + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/app/llama-server" ] diff --git a/.devops/llama-server-vulkan.Dockerfile b/.devops/llama-server-vulkan.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..93c5e0c26e6917936ff2685a78e3da9097eef34f --- /dev/null +++ b/.devops/llama-server-vulkan.Dockerfile @@ -0,0 +1,31 @@ +ARG UBUNTU_VERSION=jammy + +FROM ubuntu:$UBUNTU_VERSION AS build + +# Install build tools +RUN apt update && apt install -y git build-essential cmake wget + +# Install Vulkan SDK and cURL +RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \ + wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \ + apt update -y && \ + apt-get install -y vulkan-sdk libcurl4-openssl-dev curl + +# Build it +WORKDIR /app +COPY . . +RUN cmake -B build -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \ + cmake --build build --config Release --target llama-server + +# Clean up +WORKDIR / +RUN cp /app/build/bin/llama-server /llama-server && \ + rm -rf /app + +ENV LC_ALL=C.utf8 +# Must be set to 0.0.0.0 so it can listen to requests from host machine +ENV LLAMA_ARG_HOST=0.0.0.0 + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/llama-server" ] diff --git a/.devops/llama-server.Dockerfile b/.devops/llama-server.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..02accc85e1368f9db1a50d0d0af7447eebc93361 --- /dev/null +++ b/.devops/llama-server.Dockerfile @@ -0,0 +1,29 @@ +ARG UBUNTU_VERSION=22.04 + +FROM ubuntu:$UBUNTU_VERSION AS build + +RUN apt-get update && \ + apt-get install -y build-essential git libcurl4-openssl-dev + +WORKDIR /app + +COPY . . + +ENV LLAMA_CURL=1 + +RUN make -j$(nproc) llama-server + +FROM ubuntu:$UBUNTU_VERSION AS runtime + +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev libgomp1 curl + +COPY --from=build /app/llama-server /llama-server + +ENV LC_ALL=C.utf8 +# Must be set to 0.0.0.0 so it can listen to requests from host machine +ENV LLAMA_ARG_HOST=0.0.0.0 + +HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] + +ENTRYPOINT [ "/llama-server" ] diff --git a/.devops/nix/apps.nix b/.devops/nix/apps.nix new file mode 100644 index 0000000000000000000000000000000000000000..0ecf19fc56d554c69aeef8a03b253fc15338688e --- /dev/null +++ b/.devops/nix/apps.nix @@ -0,0 +1,21 @@ +{ + perSystem = + { config, lib, ... }: + { + apps = + let + inherit (config.packages) default; + binaries = [ + "llama-cli" + "llama-embedding" + "llama-server" + "llama-quantize" + ]; + mkApp = name: { + type = "app"; + program = "${default}/bin/${name}"; + }; + in + lib.genAttrs binaries mkApp; + }; +} diff --git a/.devops/nix/devshells.nix b/.devops/nix/devshells.nix new file mode 100644 index 0000000000000000000000000000000000000000..bfd304af14dcda59e284d30b9fc7491466bf71d1 --- /dev/null +++ b/.devops/nix/devshells.nix @@ -0,0 +1,52 @@ +{ inputs, ... }: + +{ + perSystem = + { + config, + lib, + system, + ... + }: + { + devShells = + let + pkgs = import inputs.nixpkgs { inherit system; }; + stdenv = pkgs.stdenv; + scripts = config.packages.python-scripts; + in + lib.pipe (config.packages) [ + (lib.concatMapAttrs ( + name: package: { + ${name} = pkgs.mkShell { + name = "${name}"; + inputsFrom = [ package ]; + shellHook = '' + echo "Entering ${name} devShell" + ''; + }; + "${name}-extra" = + if (name == "python-scripts") then + null + else + pkgs.mkShell { + name = "${name}-extra"; + inputsFrom = [ + package + scripts + ]; + # Extra packages that *may* be used by some scripts + packages = [ + pkgs.python3Packages.tiktoken + ]; + shellHook = '' + echo "Entering ${name} devShell" + addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib" + ''; + }; + } + )) + (lib.filterAttrs (name: value: value != null)) + ]; + }; +} diff --git a/.devops/nix/docker.nix b/.devops/nix/docker.nix new file mode 100644 index 0000000000000000000000000000000000000000..d607b4575772c5330e962649ff6e14e5562ecfad --- /dev/null +++ b/.devops/nix/docker.nix @@ -0,0 +1,37 @@ +{ + lib, + dockerTools, + buildEnv, + llama-cpp, + interactive ? true, + coreutils, +}: + +# A tar that can be fed into `docker load`: +# +# $ nix build .#llamaPackages.docker +# $ docker load < result + +# For details and variations cf. +# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage +# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922 +# - https://nixery.dev/ + +# Approximate (compressed) sizes, at the time of writing, are: +# +# .#llamaPackages.docker: 125M; +# .#llamaPackagesCuda.docker: 537M; +# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M. + +dockerTools.buildLayeredImage { + name = llama-cpp.pname; + tag = "latest"; + + contents = + [ llama-cpp ] + ++ lib.optionals interactive [ + coreutils + dockerTools.binSh + dockerTools.caCertificates + ]; +} diff --git a/.devops/nix/jetson-support.nix b/.devops/nix/jetson-support.nix new file mode 100644 index 0000000000000000000000000000000000000000..78e2e40e03864e3df046389f7b751a1fd4575656 --- /dev/null +++ b/.devops/nix/jetson-support.nix @@ -0,0 +1,39 @@ +{ inputs, ... }: +{ + perSystem = + { + config, + system, + lib, + pkgsCuda, + ... + }: + { + legacyPackages = + let + caps.llamaPackagesXavier = "7.2"; + caps.llamaPackagesOrin = "8.7"; + caps.llamaPackagesTX2 = "6.2"; + caps.llamaPackagesNano = "5.3"; + + pkgsFor = + cap: + import inputs.nixpkgs { + inherit system; + config = { + cudaSupport = true; + cudaCapabilities = [ cap ]; + cudaEnableForwardCompat = false; + inherit (pkgsCuda.config) allowUnfreePredicate; + }; + }; + in + builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps; + + packages = lib.optionalAttrs (system == "aarch64-linux") { + jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp; + jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp; + jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp; + }; + }; +} diff --git a/.devops/nix/nixpkgs-instances.nix b/.devops/nix/nixpkgs-instances.nix new file mode 100644 index 0000000000000000000000000000000000000000..90d683a713aa1f6f7b7e263549afe5edef0a4033 --- /dev/null +++ b/.devops/nix/nixpkgs-instances.nix @@ -0,0 +1,45 @@ +{ inputs, ... }: +{ + # The _module.args definitions are passed on to modules as arguments. E.g. + # the module `{ pkgs ... }: { /* config */ }` implicitly uses + # `_module.args.pkgs` (defined in this case by flake-parts). + perSystem = + { system, ... }: + { + _module.args = { + # Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs + # again, the below creates several nixpkgs instances which the + # flake-centric CLI will be forced to evaluate e.g. on `nix flake show`. + # + # This is currently "slow" and "expensive", on a certain scale. + # This also isn't "right" in that this hinders dependency injection at + # the level of flake inputs. This might get removed in the foreseeable + # future. + # + # Note that you can use these expressions without Nix + # (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point). + + pkgsCuda = import inputs.nixpkgs { + inherit system; + # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc, + # and ucx are built with CUDA support) + config.cudaSupport = true; + config.allowUnfreePredicate = + p: + builtins.all ( + license: + license.free + || builtins.elem license.shortName [ + "CUDA EULA" + "cuDNN EULA" + ] + ) (p.meta.licenses or [ p.meta.license ]); + }; + # Ensure dependencies use ROCm consistently + pkgsRocm = import inputs.nixpkgs { + inherit system; + config.rocmSupport = true; + }; + }; + }; +} diff --git a/.devops/nix/package-gguf-py.nix b/.devops/nix/package-gguf-py.nix new file mode 100644 index 0000000000000000000000000000000000000000..cca2f36a5bd4df214b3b7da84aa7a05d242fce0e --- /dev/null +++ b/.devops/nix/package-gguf-py.nix @@ -0,0 +1,36 @@ +{ + lib, + llamaVersion, + numpy, + tqdm, + sentencepiece, + pyyaml, + poetry-core, + buildPythonPackage, + pytestCheckHook, +}: + +buildPythonPackage { + pname = "gguf"; + version = llamaVersion; + pyproject = true; + nativeBuildInputs = [ poetry-core ]; + propagatedBuildInputs = [ + numpy + tqdm + sentencepiece + pyyaml + ]; + src = lib.cleanSource ../../gguf-py; + pythonImportsCheck = [ + "numpy" + "gguf" + ]; + nativeCheckInputs = [ pytestCheckHook ]; + doCheck = true; + meta = with lib; { + description = "Python package for writing binary files in the GGUF format"; + license = licenses.mit; + maintainers = [ maintainers.ditsuke ]; + }; +} diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix new file mode 100644 index 0000000000000000000000000000000000000000..5d7d7ea5ae2d0ccf6aa7e515b5f8e5d555862855 --- /dev/null +++ b/.devops/nix/package.nix @@ -0,0 +1,246 @@ +{ + lib, + glibc, + config, + stdenv, + runCommand, + cmake, + ninja, + pkg-config, + git, + mpi, + blas, + cudaPackages, + autoAddDriverRunpath, + darwin, + rocmPackages, + vulkan-headers, + vulkan-loader, + curl, + shaderc, + useBlas ? + builtins.all (x: !x) [ + useCuda + useMetalKit + useRocm + useVulkan + ] + && blas.meta.available, + useCuda ? config.cudaSupport, + useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin, + # Increases the runtime closure size by ~700M + useMpi ? false, + useRocm ? config.rocmSupport, + enableCurl ? true, + useVulkan ? false, + llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake + + # It's necessary to consistently use backendStdenv when building with CUDA support, + # otherwise we get libstdc++ errors downstream. + effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv, + enableStatic ? effectiveStdenv.hostPlatform.isStatic, + precompileMetalShaders ? false, +}: + +let + inherit (lib) + cmakeBool + cmakeFeature + optionals + strings + ; + + stdenv = throw "Use effectiveStdenv instead"; + + suffices = + lib.optionals useBlas [ "BLAS" ] + ++ lib.optionals useCuda [ "CUDA" ] + ++ lib.optionals useMetalKit [ "MetalKit" ] + ++ lib.optionals useMpi [ "MPI" ] + ++ lib.optionals useRocm [ "ROCm" ] + ++ lib.optionals useVulkan [ "Vulkan" ]; + + pnameSuffix = + strings.optionalString (suffices != [ ]) + "-${strings.concatMapStringsSep "-" strings.toLower suffices}"; + descriptionSuffix = strings.optionalString ( + suffices != [ ] + ) ", accelerated with ${strings.concatStringsSep ", " suffices}"; + + xcrunHost = runCommand "xcrunHost" { } '' + mkdir -p $out/bin + ln -s /usr/bin/xcrun $out/bin + ''; + + # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64 + # separately + darwinBuildInputs = + with darwin.apple_sdk.frameworks; + [ + Accelerate + CoreVideo + CoreGraphics + ] + ++ optionals useMetalKit [ MetalKit ]; + + cudaBuildInputs = with cudaPackages; [ + cuda_cudart + cuda_cccl # + libcublas + ]; + + rocmBuildInputs = with rocmPackages; [ + clr + hipblas + rocblas + ]; + + vulkanBuildInputs = [ + vulkan-headers + vulkan-loader + shaderc + ]; +in + +effectiveStdenv.mkDerivation (finalAttrs: { + pname = "llama-cpp${pnameSuffix}"; + version = llamaVersion; + + # Note: none of the files discarded here are visible in the sandbox or + # affect the output hash. This also means they can be modified without + # triggering a rebuild. + src = lib.cleanSourceWith { + filter = + name: type: + let + noneOf = builtins.all (x: !x); + baseName = baseNameOf name; + in + noneOf [ + (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths + (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths + (lib.hasPrefix "." baseName) # Skip hidden files and directories + (baseName == "flake.lock") + ]; + src = lib.cleanSource ../../.; + }; + + postPatch = '' + substituteInPlace ./ggml/src/ggml-metal.m \ + --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" + substituteInPlace ./ggml/src/ggml-metal.m \ + --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" + ''; + + # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015, + # `default.metallib` may be compiled with Metal compiler from XCode + # and we need to escape sandbox on MacOS to access Metal compiler. + # `xcrun` is used find the path of the Metal compiler, which is varible + # and not on $PATH + # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion + __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders; + + nativeBuildInputs = + [ + cmake + ninja + pkg-config + git + ] + ++ optionals useCuda [ + cudaPackages.cuda_nvcc + + autoAddDriverRunpath + ] + ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ] + ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ]; + + buildInputs = + optionals effectiveStdenv.isDarwin darwinBuildInputs + ++ optionals useCuda cudaBuildInputs + ++ optionals useMpi [ mpi ] + ++ optionals useRocm rocmBuildInputs + ++ optionals useBlas [ blas ] + ++ optionals useVulkan vulkanBuildInputs + ++ optionals enableCurl [ curl ]; + + cmakeFlags = + [ + (cmakeBool "LLAMA_BUILD_SERVER" true) + (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic)) + (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) + (cmakeBool "LLAMA_CURL" enableCurl) + (cmakeBool "GGML_NATIVE" false) + (cmakeBool "GGML_BLAS" useBlas) + (cmakeBool "GGML_CUDA" useCuda) + (cmakeBool "GGML_HIPBLAS" useRocm) + (cmakeBool "GGML_METAL" useMetalKit) + (cmakeBool "GGML_VULKAN" useVulkan) + (cmakeBool "GGML_STATIC" enableStatic) + ] + ++ optionals useCuda [ + ( + with cudaPackages.flags; + cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( + builtins.concatStringsSep ";" (map dropDot cudaCapabilities) + ) + ) + ] + ++ optionals useRocm [ + (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang") + (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets)) + ] + ++ optionals useMetalKit [ + (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") + (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders)) + ]; + + # Environment variables needed for ROCm + env = optionals useRocm { + ROCM_PATH = "${rocmPackages.clr}"; + HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode"; + }; + + # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, + # if they haven't been added yet. + postInstall = '' + mkdir -p $out/include + cp $src/include/llama.h $out/include/ + ''; + + meta = { + # Configurations we don't want even the CI to evaluate. Results in the + # "unsupported platform" messages. This is mostly a no-op, because + # cudaPackages would've refused to evaluate anyway. + badPlatforms = optionals useCuda lib.platforms.darwin; + + # Configurations that are known to result in build failures. Can be + # overridden by importing Nixpkgs with `allowBroken = true`. + broken = (useMetalKit && !effectiveStdenv.isDarwin); + + description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; + homepage = "https://github.com/ggerganov/llama.cpp/"; + license = lib.licenses.mit; + + # Accommodates `nix run` and `lib.getExe` + mainProgram = "llama-cli"; + + # These people might respond, on the best effort basis, if you ping them + # in case of Nix-specific regressions or for reviewing Nix-specific PRs. + # Consider adding yourself to this list if you want to ensure this flake + # stays maintained and you're willing to invest your time. Do not add + # other people without their consent. Consider removing people after + # they've been unreachable for long periods of time. + + # Note that lib.maintainers is defined in Nixpkgs, but you may just add + # an attrset following the same format as in + # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix + maintainers = with lib.maintainers; [ + philiptaron + SomeoneSerge + ]; + + # Extend `badPlatforms` instead + platforms = lib.platforms.all; + }; +}) diff --git a/.devops/nix/python-scripts.nix b/.devops/nix/python-scripts.nix new file mode 100644 index 0000000000000000000000000000000000000000..392e9ffe41bf5b0bdeec968fc47873c59ac67343 --- /dev/null +++ b/.devops/nix/python-scripts.nix @@ -0,0 +1,66 @@ +{ + lib, + stdenv, + buildPythonPackage, + poetry-core, + mkShell, + python3Packages, + gguf-py, +}@inputs: + +let + llama-python-deps = with python3Packages; [ + numpy + sentencepiece + transformers + protobuf + torchWithoutCuda + gguf-py + tqdm + + # for scripts/compare-llama-bench.py + gitpython + tabulate + + # for examples/pydantic-models-to-grammar-examples.py + docstring-parser + pydantic + + ]; + + llama-python-test-deps = with python3Packages; [ + # Server bench + matplotlib + + # server tests + openai + behave + prometheus-client + ]; +in + +buildPythonPackage ({ + pname = "llama-scripts"; + version = "0.0.0"; + pyproject = true; + + # NOTE: The files filtered out here are not visible in the build sandbox, neither + # do they affect the output hash. They can be modified without triggering a rebuild. + src = lib.cleanSourceWith { + filter = + name: type: + let + any = builtins.any (x: x); + baseName = builtins.baseNameOf name; + in + any [ + (lib.hasSuffix ".py" name) + (baseName == "README.md") + (baseName == "pyproject.toml") + ]; + src = lib.cleanSource ../../.; + }; + nativeBuildInputs = [ poetry-core ]; + nativeCheckInputs = llama-python-test-deps; + dependencies = llama-python-deps; +}) diff --git a/.devops/nix/scope.nix b/.devops/nix/scope.nix new file mode 100644 index 0000000000000000000000000000000000000000..478e8c4228afad449c3bd396d7ed4a12759a5de3 --- /dev/null +++ b/.devops/nix/scope.nix @@ -0,0 +1,41 @@ +{ + lib, + newScope, + python3, + llamaVersion ? "0.0.0", +}: + +let + pythonPackages = python3.pkgs; + buildPythonPackage = pythonPackages.buildPythonPackage; + numpy = pythonPackages.numpy; + tqdm = pythonPackages.tqdm; + sentencepiece = pythonPackages.sentencepiece; + pyyaml = pythonPackages.pyyaml; + poetry-core = pythonPackages.poetry-core; + pytestCheckHook = pythonPackages.pytestCheckHook; +in + +# We're using `makeScope` instead of just writing out an attrset +# because it allows users to apply overlays later using `overrideScope'`. +# Cf. https://noogle.dev/f/lib/makeScope + +lib.makeScope newScope (self: { + inherit llamaVersion; + gguf-py = self.callPackage ./package-gguf-py.nix { + inherit + buildPythonPackage + numpy + tqdm + sentencepiece + poetry-core + pyyaml + pytestCheckHook + ; + }; + python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; }; + llama-cpp = self.callPackage ./package.nix { }; + docker = self.callPackage ./docker.nix { }; + docker-min = self.callPackage ./docker.nix { interactive = false; }; + sif = self.callPackage ./sif.nix { }; +}) diff --git a/.devops/nix/sif.nix b/.devops/nix/sif.nix new file mode 100644 index 0000000000000000000000000000000000000000..7a5e1dd0ffc4c61e9b88b25d14a10afbd4f8cda9 --- /dev/null +++ b/.devops/nix/sif.nix @@ -0,0 +1,27 @@ +{ + lib, + singularity-tools, + llama-cpp, + bashInteractive, + interactive ? false, +}: + +let + optionalInt = cond: x: if cond then x else 0; +in +singularity-tools.buildImage rec { + inherit (llama-cpp) name; + contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ]; + + # These are excessive (but safe) for most variants. Building singularity + # images requires superuser privileges, so we build them inside a VM in a + # writable image of pre-determined size. + # + # ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846 + # + # Expected image sizes: + # - cpu/blas: 150M, + # - cuda, all gencodes: 560M, + diskSize = 4096 + optionalInt llama-cpp.useRocm 16384; + memSize = diskSize; +} diff --git a/.devops/tools.sh b/.devops/tools.sh new file mode 100644 index 0000000000000000000000000000000000000000..24dcfd35079cb73236dc80d911480bcbacc7008d --- /dev/null +++ b/.devops/tools.sh @@ -0,0 +1,41 @@ +#!/bin/bash +set -e + +# Read the first argument into a variable +arg1="$1" + +# Shift the arguments to remove the first one +shift + +if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then + python3 ./convert_hf_to_gguf.py "$@" +elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then + ./llama-quantize "$@" +elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then + ./llama-cli "$@" +elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then + echo "Converting PTH to GGML..." + for i in `ls $1/$2/ggml-model-f16.bin*`; do + if [ -f "${i/f16/q4_0}" ]; then + echo "Skip model quantization, it already exists: ${i/f16/q4_0}" + else + echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..." + ./llama-quantize "$i" "${i/f16/q4_0}" q4_0 + fi + done +elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then + ./llama-server "$@" +else + echo "Unknown command: $arg1" + echo "Available commands: " + echo " --run (-r): Run a model previously converted into ggml" + echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512" + echo " --convert (-c): Convert a llama model into ggml" + echo " ex: --outtype f16 \"/models/7B/\" " + echo " --quantize (-q): Optimize with quantization process ggml" + echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2" + echo " --all-in-one (-a): Execute --convert & --quantize" + echo " ex: \"/models/\" 7B" + echo " --server (-s): Run a model on the server" + echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080" +fi diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..064b7c7be86d02d7bbc17b17a0ca40f638d66611 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,20 @@ +*.o +*.a +.cache/ +# Do not ignore .git directory, otherwise the reported build number will always be 0 +.github/ +.gitignore +.vs/ +.vscode/ +.DS_Store + +build*/ + +models/* + +/llama-cli +/llama-quantize + +arm_neon.h +compile_commands.json +Dockerfile diff --git a/.ecrc b/.ecrc new file mode 100644 index 0000000000000000000000000000000000000000..c68877ec211f1c7f9458b8d59c278fc7e88f38c8 --- /dev/null +++ b/.ecrc @@ -0,0 +1,6 @@ +{ + "Exclude": ["^\\.gitmodules$", "stb_image\\.h"], + "Disable": { + "IndentSize": true + } +} diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000000000000000000000000000000000000..f88f8da67cd78758ed461758ae646ea62c5fbbc5 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,32 @@ +# https://EditorConfig.org + +# Top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file, utf-8 charset +[*] +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +charset = utf-8 +indent_style = space +indent_size = 4 + +[Makefile] +indent_style = tab + +[scripts/*.mk] +indent_style = tab + +[prompts/*.txt] +insert_final_newline = unset + +[examples/server/public/*] +indent_size = 2 + +[examples/llama.swiftui/llama.swiftui.xcodeproj/*] +indent_style = tab + +[examples/cvector-generator/*.txt] +trim_trailing_whitespace = unset +insert_final_newline = unset diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000000000000000000000000000000000000..d64c2564aca8f54ff000c93a44c505f65a76c07c --- /dev/null +++ b/.flake8 @@ -0,0 +1,17 @@ +[flake8] +max-line-length = 125 +ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503 +exclude = + # Do not traverse examples + examples, + # Do not include package initializers + __init__.py, + # No need to traverse our git directory + .git, + # There's no value in checking cache directories + __pycache__, + # No need to include the build path + build, + # This contains builds that we don't want to check + dist # This is generated with `python build .` for package releases +# max-complexity = 10 diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..f15b49c2e62c6e52121f7b498c0b245512d97955 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +*.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..1092d097a7542906eee911783e529ee977c334fe --- /dev/null +++ b/.gitignore @@ -0,0 +1,135 @@ +# Extensions + +*.a +*.bat +*.bin +*.dll +*.dot +*.etag +*.exe +*.gcda +*.gcno +*.gcov +*.gguf +*.gguf.json +*.lastModified +*.log +*.metallib +*.o +*.so +*.tmp + +# IDE / OS + +.cache/ +.ccls-cache/ +.direnv/ +.DS_Store +.envrc +.idea/ +.swiftpm +.vs/ +.vscode/ +nppBackup + + +# Coverage + +gcovr-report/ +lcov-report/ + +# Build Artifacts + +tags +.build/ +build* +!build-info.cmake +!build-info.cpp.in +!build-info.sh +!build.zig +!docs/build.md +/libllama.so +/llama-* +/vulkan-shaders-gen +android-ndk-* +arm_neon.h +cmake-build-* +CMakeSettings.json +compile_commands.json +ggml-metal-embed.metal +llama-batched-swift +/rpc-server +out/ +tmp/ +autogen-*.md + +# Deprecated + +/main +/server + +# CI + +!.github/workflows/*.yml + +# Models + +models/* +models-mnt +!models/.editorconfig +!models/ggml-vocab-*.gguf* + +# Zig +zig-out/ +zig-cache/ + +# Logs + +ppl-*.txt +qnt-*.txt +perf-*.txt + +# Examples + +examples/jeopardy/results.txt +examples/server/*.css.hpp +examples/server/*.html.hpp +examples/server/*.js.hpp +examples/server/*.mjs.hpp +!build_64.sh +!examples/*.bat +!examples/*/*.kts +!examples/*/*/*.kts +!examples/sycl/*.bat +!examples/sycl/*.sh + +# Python + +/.venv +__pycache__/ +*/poetry.lock +poetry.toml + +# Nix +/result + +# Test binaries +/tests/test-backend-ops +/tests/test-double-float +/tests/test-grad0 +/tests/test-grammar-parser +/tests/test-llama-grammar +/tests/test-opt +/tests/test-quantize-fns +/tests/test-quantize-perf +/tests/test-rope +/tests/test-sampling +/tests/test-tokenizer-0 +/tests/test-tokenizer-1-bpe +/tests/test-tokenizer-1-spm + +# Scripts +!/scripts/install-oneapi.bat + +# Test models for lora adapters +/lora-tests diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..5861d59cb785dabccb2cf01a2af4d519966cdbad --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "kompute"] + path = ggml/src/kompute + url = https://github.com/nomic-ai/kompute.git diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..91d7916285081aa14f5b801935cb35d031f2e601 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,16 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +exclude: prompts/.*.txt +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files +- repo: https://github.com/PyCQA/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + additional_dependencies: [flake8-no-print] diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000000000000000000000000000000000000..1bd36158a72f4e48d61dbe49ec1923cbf95dd2cd --- /dev/null +++ b/AUTHORS @@ -0,0 +1,782 @@ +# date: Wed Jun 26 19:36:34 EEST 2024 +# this file is auto-generated by scripts/gen-authors.sh + +0cc4m +0xspringtime <110655352+0xspringtime@users.noreply.github.com> +20kdc +2f38b454 +3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com> +44670 <44670@users.noreply.github.com> +AN Long +AT +Aarni Koskela +Aaron Miller +Aaryaman Vasishta +Abheek Gulati +Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> +Abhishek Gopinath K <31348521+overtunned@users.noreply.github.com> +Adithya Balaji +AdithyanI +Adrian +Adrian Hesketh +Ahmet Zeer +AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com> +Aisuko +Akarshan Biswas +Albert Jin +Alberto <57916483+albbus-stack@users.noreply.github.com> +Alex +Alex Azarov +Alex Azarov +Alex Klinkhamer +Alex Klinkhamer +Alex Nguyen +Alex Petenchea +Alex Renda +Alex von Gluck IV +Alexey Parfenov +Ali Chraghi <63465728+alichraghi@users.noreply.github.com> +Ali Nehzat +Ali Tariq +Alon +AlpinDale <52078762+AlpinDale@users.noreply.github.com> +Amir +AmirAli Mirian <37371367+amiralimi@users.noreply.github.com> +Ananta Bastola +Anas Ahouzi <112881240+aahouzi@users.noreply.github.com> +András Salamon +Andrei +Andrew Canis +Andrew Downing +Andrew Duffy +Andrew Godfrey +Andy Tai +Arik Poznanski +Artem +Artem Zinnatullin +Artyom Lebedev +Asbjørn Olling +Ásgeir Bjarni Ingvarsson +Ashish <1856117+ashishdatta@users.noreply.github.com> +Ashok Gelal <401055+ashokgelal@users.noreply.github.com> +Ashraful Islam +Atsushi Tatsuma +Austin <77757836+teleprint-me@users.noreply.github.com> +AustinMroz +BADR +Bach Le +Bailey Chittle <39804642+bachittle@users.noreply.github.com> +BarfingLemurs <128182951+BarfingLemurs@users.noreply.github.com> +Bartowski +Behnam M <58621210+ibehnam@users.noreply.github.com> +Ben Ashbaugh +Ben Garney +Ben Siraphob +Ben Williams +Benjamin Findley <39356821+Kartoffelsaft@users.noreply.github.com> +Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com> +Bernat Vadell +Bingan <70050083+binganao@users.noreply.github.com> +Bodo Graumann +Bono Lv +Borislav Stanimirov +Branden Butler +Brian +Bruce MacDonald +Bryan Honof +CJ Pais +CRD716 +Calvin Laurenson +Cameron +Cameron Kaiser +Carolinabanana <140120812+Carolinabanana@users.noreply.github.com> +Casey Primozic +Casey Primozic +CausalLM <148736309+CausalLM@users.noreply.github.com> +Cebtenzzre +Chad Brewbaker +Chao Jiang +Cheng Shao +Chris Elrod +Chris Kuehl +Christian Demsar +Christian Demsar +Christian Falch <875252+chrfalch@users.noreply.github.com> +Christian Kögler +Christian Zhou-Zheng <59622928+christianazinn@users.noreply.github.com> +Clark Saben <76020733+csaben@users.noreply.github.com> +Clint Herron +CrispStrobe <154636388+CrispStrobe@users.noreply.github.com> +Cuong Trinh Manh +DAN™ +Damian Stewart +Dane Madsen +DaniAndTheWeb <57776841+DaniAndTheWeb@users.noreply.github.com> +Daniel Bevenius +Daniel Drake +Daniel Hiltgen +Daniel Illescas Romero +Daniele <57776841+daniandtheweb@users.noreply.github.com> +DannyDaemonic +Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com> +Dave +Dave Airlie +Dave Airlie +Dave Della Costa +David Friehs +David Kennedy +David Pflug +David Renshaw +David Sommers <12738+databyte@users.noreply.github.com> +David Yang +Dawid Potocki +Dawid Wysocki <62249621+TortillaZHawaii@users.noreply.github.com> +Dean +Deins +Deven Mistry <31466137+deven367@users.noreply.github.com> +Didzis Gosko +Djip007 +Don Mahurin +DooWoong Lee (David) +Doomsdayrs <38189170+Doomsdayrs@users.noreply.github.com> +Douglas Hanley +Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com> +Ebey Abraham +Ed Lee +Ed Lepedus +Eddie-Wang +Edward Taylor +Elaine +Elbios <141279586+Elbios@users.noreply.github.com> +Elton Kola +Engininja2 <139037756+Engininja2@users.noreply.github.com> +Equim +Eric Sommerlade +Eric Zhang <34133756+EZForever@users.noreply.github.com> +Erik Garrison +Erik Scholz +Ettore Di Giacinto +Evan Jones +Evan Miller +Eve <139727413+netrunnereve@users.noreply.github.com> +Evgeny Kurnevsky +Ewout ter Hoeven +ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> +FK +Fabian +Fabio R. Sluzala +Faez Shakil +FantasyGmm <16450052+FantasyGmm@users.noreply.github.com> +Fattire <528174+fat-tire@users.noreply.github.com> +Felix +Finn Voorhees +Firat +Folko-Ven <71110216+Folko-Ven@users.noreply.github.com> +Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com> +Francisco Melo <43780565+francis2tm@users.noreply.github.com> +Frank Mai +FrankHB +Fred Douglas <43351173+fredlas@users.noreply.github.com> +Frederik Vogel +Gabe Goodhart +GainLee +Galunid +Gary Linscott +Gary Mulder +Gavin Zhao +Genkagaku.GPT +Georgi Gerganov +Gilad S +Giuseppe Scrivano +GiviMAD +Govlzkoy +Guillaume "Vermeille" Sanchez +Guillaume Wenzek +Guoteng <32697156+SolenoidWGT@users.noreply.github.com> +Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com> +Haggai Nuchi +Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com> +Hamdoud Hakem <90524568+hamdoudhakem@users.noreply.github.com> +HanishKVC +Haohui Mai +Haoxiang Fei +Harald Fernengel +Hatsune Miku <129688334+at8u@users.noreply.github.com> +HatsuneMikuUwU33 <173229399+HatsuneMikuUwU33@users.noreply.github.com> +Henk Poley +Henri Vasserman +Henrik Forstén +Herman Semenov +Hesen Peng +Hoang Nguyen +Hong Bo PENG +Hongyu Ouyang <96765450+casavaca@users.noreply.github.com> +Howard Su +Hua Jiang +Huawei Lin +Hugo Roussel +Ian Bull +Ian Bull +Ian Scrivener +Ido S +IgnacioFDM +Igor Okulist +Ikko Eltociear Ashimine +Ilya Kurdyukov <59548320+ilyakurdyukov@users.noreply.github.com> +Ionoclast Laboratories +Isaac McFadyen +IsaacDynamo <61521674+IsaacDynamo@users.noreply.github.com> +Ivan Komarov +Ivan Stepanov +JH23X <165871467+JH23X@users.noreply.github.com> +Jack Mousseau +JackJollimore <130917767+JackJollimore@users.noreply.github.com> +Jaemin Son +Jag Chadha +Jakub N +James A Capozzoli <157492257+jac-jim@users.noreply.github.com> +James Reynolds +Jan Boon +Jan Boon +Jan Ploski +Jannis Schönleber +Jared Van Bortel +Jared Van Bortel +Jason McCartney +Jean-Christophe Hoelt +Jean-Michaël Celerier +Jed Fox +Jeffrey Quesnelle +Jesse Jojo Johnson +Jeximo +Jhen-Jie Hong +Jiahao Li +Jian Liao +JidongZhang-THU <1119708529@qq.com> +Jinwoo Jeong <33892306+williamjeong2@users.noreply.github.com> +Jiří Podivín <66251151+jpodivin@users.noreply.github.com> +Jiří Sejkora +Joan Fontanals +Joan Fontanals +Johan +Johannes Gäßler +Johannes Rudolph +John <78893154+cmp-nct@users.noreply.github.com> +John Balis +John Smith <67539080+kingsidelee@users.noreply.github.com> +JohnnyB +Jonas Wunderlich <32615971+jonas-w@users.noreply.github.com> +Jorge A <161275481+jorgealias@users.noreply.github.com> +Jose Maldonado <63384398+yukiteruamano@users.noreply.github.com> +Joseph Stahl <1269177+josephst@users.noreply.github.com> +Josh Ramer +Joyce +Juan Calderon-Perez <835733+gaby@users.noreply.github.com> +Judd +Julius Arkenberg +Jun Jie <71215065+junnjiee16@users.noreply.github.com> +Junyang Lin +Juraj Bednar +Justin Parker +Justin Suess +Justina Cho +Justine Tunney +Justine Tunney +Juuso Alasuutari +KASR +Kamil Tomšík +Karsten Weiss +Karthick +Karthik Kumar Viswanathan <195178+guilt@users.noreply.github.com> +Karthik Sethuraman +Kasumi <90275229+kasumi-1@users.noreply.github.com> +Kawrakow <48489457+ikawrakow@users.noreply.github.com> +Keiichi Tabata +Kenvix ⭐ +Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> +Kevin Gibbons +Kevin Ji <1146876+kevinji@users.noreply.github.com> +Kevin Kwok +Kevin Lo +Kolen Cheung +Konstantin Herud +Konstantin Zhuravlyov +Kunshang Ji +Kyle Liang +Kyle Mistele +Kylin <56434533+KyL0N@users.noreply.github.com> +Lars Grammel +Laura +Lee <44310445+lx200916@users.noreply.github.com> +Lee Drake +Leng Yue +Leon Knauer +LeonEricsson <70749762+LeonEricsson@users.noreply.github.com> +Leonardo Neumann +Li Tan +Linwei Wang +LoganDark +LostRuins <39025047+LostRuins@users.noreply.github.com> +Luciano +Luo Tian +Lyle Dean +M. Yusuf Sarıgöz +Maarten ter Huurne +Mack Straight +Maël Kerbiriou +MaggotHATE +Manuel <44313466+makuche@users.noreply.github.com> +Marc Köhlbrugge +Marco Matthies <71844+marcom@users.noreply.github.com> +Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com> +Marian Cepok +Mark Fairbairn +Marko Tasic +Markus Tavenrath +Martin Delille +Martin Krasser +Martin Schwaighofer +Marvin Gießing +Masaya, Kato <62578291+msy-kato@users.noreply.github.com> +MasterYi1024 <39848311+MasterYi1024@users.noreply.github.com> +Mateusz Charytoniuk +Matheus C. França +Matheus Gabriel Alves Silva +Mathieu Nayrolles +Mathijs de Bruin +Matt Clayton <156335168+mattjcly@users.noreply.github.com> +Matt Pulver +Matteo Boschini <12133566+mbosc@users.noreply.github.com> +Mattheus Chediak +Matthew Tejo +Matvey Soloviev +Max Krasnyansky +Max Krasnyansky +Maxime <672982+maximegmd@users.noreply.github.com> +Maximilian Winter +Meng Zhang +Meng, Hengyu +Merrick Christensen +Michael Coppola +Michael Hueschen +Michael Kesper +Michael Klimenko +Michael Podvitskiy +Michael Potter +Michael de Gans +Michaël de Vries +Mihai +Mike +Mikko Juola +Minsoo Cheong <54794500+mscheong01@users.noreply.github.com> +Mirko185 +Mirror Azure <54669636+MirrorAzure@users.noreply.github.com> +Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com> +Mohammadreza Hendiani +Mohammadreza Hendiani +Murilo Santana +Musab Gultekin +Nam D. Tran <42194884+namtranase@users.noreply.github.com> +Nathan Epstein +NawafAlansari <72708095+NawafAlansari@users.noreply.github.com> +Nebula +Neo Zhang <14088817+arthw@users.noreply.github.com> +Neo Zhang +Neo Zhang Jianyu +Neuman Vong +Nexesenex <124105151+Nexesenex@users.noreply.github.com> +Niall Coates <1349685+Niall-@users.noreply.github.com> +Nicolai Weitkemper +Nicolás Pérez +Nigel Bosch +Niklas Korz +Nikolas <127742645+nneubacher@users.noreply.github.com> +Nindaleth +Oleksandr Nikitin +Oleksii Maryshchenko +Olivier Chafik +Ondřej Čertík +Ouadie EL FAROUKI +Patrice Ferlet +Paul Tsochantaris +Pavol Rusnak +Pedro Cuenca +Peter Sugihara +Phil H <5756783+phiharri@users.noreply.github.com> +Philip Taron +Phillip Kravtsov +Pierre Alexandre SCHEMBRI +Pierrick Hymbert +Przemysław Pawełczyk +Qin Yue Chen <71813199+chenqiny@users.noreply.github.com> +Qingyou Meng +Qu Zongfu <43257352+yancaoweidaode@users.noreply.github.com> +RJ Adriaansen +Radoslav Gerganov +Radosław Gryta +Rahul Vivek Nair <68507071+RahulVivekNair@users.noreply.github.com> +Raj Hammeer Singh Hada +Ralph Soika +Rand Xie +Randall Fitzgerald +Reinforce-II +Ren Xuancheng +Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com> +RhinoDevel +Riceball LEE +Richard Kiss +Richard Roberson +Rick G <26732651+TheFlipbook@users.noreply.github.com> +Rickard Edén +Rickard Hallerbäck +Rickey Bowers Jr +Riley Stewart +Rinne +Rinne +Robert Brisita <986796+rbrisita@users.noreply.github.com> +Robert Sung-wook Shin +Robey Holderith +Robyn +Roger Meier +Roland <14355895+rbur0425@users.noreply.github.com> +Romain D <90720+Artefact2@users.noreply.github.com> +Romain Neutron +Roman Parykin +Ron Evans +Ron Jailall +Ronny Brendel +Ronsor +Rowan Hart +Rune <43761327+Rune-AI@users.noreply.github.com> +Ryan Landay +Ryder Wishart +Ryuei +Rőczey Barnabás <31726601+An0nie@users.noreply.github.com> +SakuraUmi +Salvador E. Tropea +Sam Spilsbury +Sami Farin <3876865+Safari77@users.noreply.github.com> +Samuel Maynard +Sang-Kil Park +Seb C <47074056+Sebby37@users.noreply.github.com> +Sebastián A +SebastianApel <13675545+SebastianApel@users.noreply.github.com> +Senemu <10880819+Senemu@users.noreply.github.com> +Sergey Alirzaev +Sergio López +Sertaç Özercan <852750+sozercan@users.noreply.github.com> +SeungWon Jeong <65549245+redlion0929@users.noreply.github.com> +ShadovvBeast +Shakhar Dasgupta +Shangning Xu <32517059+xushangning@users.noreply.github.com> +Shijie <821898965@qq.com> +Shintarou Okada +Shouzheng Liu <61452103+lshzh-ww@users.noreply.github.com> +Shouzheng Liu +Shuichi Tsutsumi +Sigbjørn Skjæret +Simon Willison +Siwen Yu +Sky Yan +Slaren <2141330+slaren@users.noreply.github.com> +Slava Primenko +SoftwareRenderer <138734813+SoftwareRenderer@users.noreply.github.com> +Someone +Someone Serge +Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com> +Spencer Sutton +Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com> +Srinivas Billa +Stefan Sydow +Steffen Röcker +Stephan Walter +Stephen Nichols +Steve Grubb +Steven Prichard +Steven Roussey +Steward Garcia <57494570+FSSRepo@users.noreply.github.com> +Suaj Carrot <72162667+SuajCarrot@users.noreply.github.com> +SuperUserNameMan +Tai Duc Nguyen +Taikono-Himazin +Tameem <113388789+AhmadTameem@users.noreply.github.com> +Tamotsu Takahashi +Thái Hoàng Tâm <75922889+RoyalHeart@users.noreply.github.com> +Thatcher Chamberlin +Theia Vogel +Thérence <13496987+Royalphax@users.noreply.github.com> +Thibault Terrasson +Thomas Klausner +Tim Miller +Timmy Knight +Timothy Cronin <40186632+4imothy@users.noreply.github.com> +Ting Lou +Ting Sun +Tobias Lütke +Tom C +Tom Jobbins <784313+TheBloke@users.noreply.github.com> +Tomas +Tomáš Pazdiora +Tristan Druyen +Tristan Ross +Tungsten842 <886724vf@anonaddy.me> +Tungsten842 +Tushar +UEXTM.com <84163508+uextm@users.noreply.github.com> +Ulrich Drepper +Uzo Nweke +Vaibhav Srivastav +Val Kharitonov +Valentin Konovalov +Valentyn Bezshapkin <61702053+valentynbez@users.noreply.github.com> +Victor Nogueira +Victor Z. Peng +Vlad +Vladimir +Vladimir Malyutin +Vladimir Zorin +Volodymyr Vitvitskyi <72226+signalpillar@users.noreply.github.com> +WangHaoranRobin <56047610+WangHaoranRobin@users.noreply.github.com> +Weird Constructor +Welby Seely +Wentai Zhang +WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com> +William Tambellini +Willy Tarreau +Wouter <9594229+DifferentialityDevelopment@users.noreply.github.com> +Wu Jian Ping +Wu Jian Ping +Xiake Sun +Xiang (Kevin) Li +Xiao-Yong Jin +XiaotaoChen +Xiaoyi Chen +Xingchen Song(宋星辰) +Xuan Son Nguyen +Yann Follet <131855179+YannFollet@users.noreply.github.com> +Yaroslav +Yazan Agha-Schrader +Yiming Cui +Yishuo Wang +Yueh-Po Peng <94939112+y10ab1@users.noreply.github.com> +Yui +Yusuf Kağan Hanoğlu +Yuval Peled <31162840+Yuval-Peled@users.noreply.github.com> +ZHAOKAI WANG +Zane Shannon +Zay <95888118+isaiahbjork@users.noreply.github.com> +Zenix +Zhang Peiyuan +Zheng.Deng <32841220+dengzheng-cloud@users.noreply.github.com> +ZhouYuChen +Ziad Ben Hadj-Alouane +Ziang Wu <97337387+ZiangWu-77@users.noreply.github.com> +Zsapi +a-n-n-a-l-e-e <150648636+a-n-n-a-l-e-e@users.noreply.github.com> +adel boussaken +afrideva <95653597+afrideva@users.noreply.github.com> +agray3 +akawrykow <142945436+akawrykow@users.noreply.github.com> +alexpinel <93524949+alexpinel@users.noreply.github.com> +alonfaraj +alwqx +amd-lalithnc +andrijdavid +anon998 <131767832+anon998@users.noreply.github.com> +anzz1 +apaz +apcameron <37645737+apcameron@users.noreply.github.com> +arch-btw <57669023+arch-btw@users.noreply.github.com> +arcrank +arlo-phoenix <140345165+arlo-phoenix@users.noreply.github.com> +at8u <129688334+at8u@users.noreply.github.com> +automaticcat +bandoti <141645996+bandoti@users.noreply.github.com> +beiller +bhubbb <79117352+bhubbb@users.noreply.github.com> +bmwl +bobqianic <129547291+bobqianic@users.noreply.github.com> +bryanSwk <93190252+bryanSwk@users.noreply.github.com> +bsilvereagle +bssrdf +byte-6174 <88070277+byte-6174@users.noreply.github.com> +cebtenzzre +chaihahaha +chiranko <96988916+chiranko@users.noreply.github.com> +clibdev <52199778+clibdev@users.noreply.github.com> +clyang +cocktailpeanut <121128867+cocktailpeanut@users.noreply.github.com> +coezbek +comex +compilade <113953597+compilade@users.noreply.github.com> +compilade +cpumaxx <163466046+cpumaxx@users.noreply.github.com> +crasm +crasm +daboe01 +david raistrick +ddh0 +ddpasa <112642920+ddpasa@users.noreply.github.com> +deepdiffuser <112834445+deepdiffuser@users.noreply.github.com> +divinity76 +dm4 +dotpy314 <33351922+dotpy314@users.noreply.github.com> +drbh +ds5t5 <145942675+ds5t5@users.noreply.github.com> +dylan +eastriver +ebraminio +eiery <19350831+eiery@users.noreply.github.com> +eric8607242 +fairydreaming <166155368+fairydreaming@users.noreply.github.com> +fraxy-v <65565042+fraxy-v@users.noreply.github.com> +github-actions[bot] +gliptic +goerch +grahameth <96447521+grahameth@users.noreply.github.com> +gwjr <502526+gwjr@users.noreply.github.com> +h-h-h-h <13482553+h-h-h-h@users.noreply.github.com> +hankcs +hoangmit +hongbo.mo <352280764@qq.com> +hopkins385 <98618192+hopkins385@users.noreply.github.com> +howlger +howlger +hutli <6594598+hutli@users.noreply.github.com> +hutli +hutli +hxer7963 +hydai +iSma +iacore <74560659+iacore@users.noreply.github.com> +igarnier +intelmatt <61025942+intelmatt@users.noreply.github.com> +iohub +jacobi petrucciani <8117202+jpetrucciani@users.noreply.github.com> +jaime-m-p <167997752+jaime-m-p@users.noreply.github.com> +jameswu2014 <545426914@qq.com> +jiez <373447296@qq.com> +jneem +joecryptotoo <80373433+joecryptotoo@users.noreply.github.com> +johnson442 <56517414+johnson442@users.noreply.github.com> +jojorne +jon-chuang <9093549+jon-chuang@users.noreply.github.com> +jp-x-g +jukofyork <69222624+jukofyork@users.noreply.github.com> +junchao-loongson <68935141+junchao-loongson@users.noreply.github.com> +jwj7140 <32943891+jwj7140@users.noreply.github.com> +k.h.lai +kaizau +kalomaze <66376113+kalomaze@users.noreply.github.com> +kang +katsu560 <118887472+katsu560@users.noreply.github.com> +kchro3 <62481661+kchro3@users.noreply.github.com> +khimaros +kiltyj +klosax <131523366+klosax@users.noreply.github.com> +kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com> +kunnis +kuronekosaiko +kuvaus <22169537+kuvaus@users.noreply.github.com> +kwin1412 <42286931+kwin1412@users.noreply.github.com> +l3utterfly +ldwang +le.chang +leejet +limitedAtonement +liuwei-git <14815172+liuwei-git@users.noreply.github.com> +lon <114724657+longregen@users.noreply.github.com> +loonerin <132926317+loonerin@users.noreply.github.com> +luoyu-intel +m3ndax +maddes8cht <55592906+maddes8cht@users.noreply.github.com> +makomk +manikbhandari +maor-ps <154728172+maor-ps@users.noreply.github.com> +mdrokz +mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com> +minarchist +mj-shifu <77107165+mj-shifu@users.noreply.github.com> +mmyjona +momonga <115213907+mmnga@users.noreply.github.com> +moritzbrantner <31051084+moritzbrantner@users.noreply.github.com> +mzcu +nanahi <130121847+na-na-hi@users.noreply.github.com> +ngc92 <7938269+ngc92@users.noreply.github.com> +nhamanasu <45545786+nhamanasu@users.noreply.github.com> +niansa/tuxifan +niansa/tuxifan +nickp27 +ningshanwutuobang +nold +nopperl <54780682+nopperl@users.noreply.github.com> +nusu-github <29514220+nusu-github@users.noreply.github.com> +olexiyb +omahs <73983677+omahs@users.noreply.github.com> +oobabooga <112222186+oobabooga@users.noreply.github.com> +opparco +ostix360 <55257054+ostix360@users.noreply.github.com> +pengxin99 +perserk +pmysl +postmasters +pudepiedj +qingfengfenga <41416092+qingfengfenga@users.noreply.github.com> +qouoq +qunash +rabidcopy +rankaiyx +rhjdvsgsgks <26178113+rhjdvsgsgks@users.noreply.github.com> +rhuddleston +rimoliga <53384203+rimoliga@users.noreply.github.com> +runfuture +sandyiscool +sasha0552 +semidark +sharpHL <132747147+sharpHL@users.noreply.github.com> +shibe2 +singularity <12184989+singularity-s0@users.noreply.github.com> +sjinzh +sjxx <63994076+ylsdamxssjxxdd@users.noreply.github.com> +slaren <2141330+slaren@users.noreply.github.com> +slaren +snadampal <87143774+snadampal@users.noreply.github.com> +staviq +stduhpf +strawberrymelonpanda <152940198+strawberrymelonpanda@users.noreply.github.com> +swittk +takov751 <40316768+takov751@users.noreply.github.com> +tarcey +texmex76 <40733439+texmex76@users.noreply.github.com> +thement <40525767+thement@users.noreply.github.com> +tjohnman +tslmy +ubik2 +uint256_t +uint256_t +unbounded +valiray <133289098+valiray@users.noreply.github.com> +vik +viric +vodkaslime <646329483@qq.com> +vvhg1 <94630311+vvhg1@users.noreply.github.com> +vxiiduu <73044267+vxiiduu@users.noreply.github.com> +wbpxre150 <100937007+wbpxre150@users.noreply.github.com> +whoreson <139810751+whoreson@users.noreply.github.com> +woachk <24752637+woachk@users.noreply.github.com> +wonjun Jang +woodx <124784234+woodx9@users.noreply.github.com> +wzy <32936898+Freed-Wu@users.noreply.github.com> +xaedes +xaedes +xloem <0xloem@gmail.com> +yangli2 +yuiseki +zakkor +zhangkaihuo +zhouwg <6889919+zhouwg@users.noreply.github.com> +zhouwg +zrm +Ștefan-Gabriel Muscalu +源文雨 <41315874+fumiama@users.noreply.github.com> +Нияз Гарифзянов <112617865+garrnizon@users.noreply.github.com> diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..ef0932a7b9277966fac399fcaefc72c4b3d71a52 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,216 @@ +cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories. +project("llama.cpp" C CXX) +include(CheckIncludeFileCXX) + +#set(CMAKE_WARN_DEPRECATED YES) +set(CMAKE_WARN_UNUSED_CLI YES) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") +endif() + +# Add path to modules +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/") + +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + +if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + set(LLAMA_STANDALONE ON) + + include(git-vars) + + # configure project version + # TODO +else() + set(LLAMA_STANDALONE OFF) +endif() + +if (EMSCRIPTEN) + set(BUILD_SHARED_LIBS_DEFAULT OFF) + + option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON) +else() + if (MINGW) + set(BUILD_SHARED_LIBS_DEFAULT OFF) + else() + set(BUILD_SHARED_LIBS_DEFAULT ON) + endif() +endif() + +option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT}) + +if (WIN32) + add_compile_definitions(_CRT_SECURE_NO_WARNINGS) +endif() + +# +# option list +# + +# debug +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) + +# build +option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) + +# sanitizers +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) + +# utils +option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE}) + +# extra artifacts +option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) +option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) +option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE}) + +# 3rd party libs +option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF) + +# Required for relocatable CMake package +include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) + +# override ggml options +set(GGML_SANITIZE_THREAD ${LLAMA_SANITIZE_THREAD}) +set(GGML_SANITIZE_ADDRESS ${LLAMA_SANITIZE_ADDRESS}) +set(GGML_SANITIZE_UNDEFINED ${LLAMA_SANITIZE_UNDEFINED}) +set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS}) +set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS}) + +# change the default for these ggml options +if (NOT DEFINED GGML_LLAMAFILE) + set(GGML_LLAMAFILE_DEFAULT ON) +endif() + +if (NOT DEFINED GGML_AMX) + set(GGML_AMX ON) +endif() + +if (NOT DEFINED GGML_CUDA_GRAPHS) + set(GGML_CUDA_GRAPHS_DEFAULT ON) +endif() + +# transition helpers +function (llama_option_depr TYPE OLD NEW) + if (${OLD}) + message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n") + set(${NEW} ON PARENT_SCOPE) + endif() +endfunction() + +llama_option_depr(FATAL_ERROR LLAMA_CUBLAS GGML_CUDA) +llama_option_depr(WARNING LLAMA_CUDA GGML_CUDA) +llama_option_depr(WARNING LLAMA_KOMPUTE GGML_KOMPUTE) +llama_option_depr(WARNING LLAMA_METAL GGML_METAL) +llama_option_depr(WARNING LLAMA_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY) +llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE) +llama_option_depr(WARNING LLAMA_RPC GGML_RPC) +llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL) +llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16) +llama_option_depr(WARNING LLAMA_CANN GGML_CANN) + +# +# build the library +# + +if (NOT TARGET ggml) + add_subdirectory(ggml) + # ... otherwise assume ggml is added by a parent CMakeLists.txt +endif() +add_subdirectory(src) + +# +# install +# + +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + +set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER}) +set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT}) +set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER}) + +set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files") +set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files") +set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files") + + +# At the moment some compile definitions are placed within the ggml/src +# directory but not exported on the `ggml` target. This could be improved by +# determining _precisely_ which defines are necessary for the llama-config +# package. +# +set(GGML_TRANSIENT_DEFINES) +get_target_property(GGML_DIRECTORY ggml SOURCE_DIR) +get_directory_property(GGML_DIR_DEFINES DIRECTORY ${GGML_DIRECTORY} COMPILE_DEFINITIONS) +if (GGML_DIR_DEFINES) + list(APPEND GGML_TRANSIENT_DEFINES ${GGML_DIR_DEFINES}) +endif() +get_target_property(GGML_TARGET_DEFINES ggml COMPILE_DEFINITIONS) +if (GGML_TARGET_DEFINES) + list(APPEND GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES}) +endif() +get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES) + +set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h) +install(TARGETS llama LIBRARY PUBLIC_HEADER) + +configure_package_config_file( + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/llama-config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama + PATH_VARS LLAMA_INCLUDE_INSTALL_DIR + LLAMA_LIB_INSTALL_DIR + LLAMA_BIN_INSTALL_DIR ) + +write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake + VERSION ${LLAMA_INSTALL_VERSION} + COMPATIBILITY SameMajorVersion) + +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama) + +install( + FILES convert_hf_to_gguf.py + PERMISSIONS + OWNER_READ + OWNER_WRITE + OWNER_EXECUTE + GROUP_READ + GROUP_EXECUTE + WORLD_READ + WORLD_EXECUTE + DESTINATION ${CMAKE_INSTALL_BINDIR}) + +configure_file(cmake/llama.pc.in + "${CMAKE_CURRENT_BINARY_DIR}/llama.pc" + @ONLY) + +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc" + DESTINATION lib/pkgconfig) + +# +# utils, programs, examples and tests +# + +if (LLAMA_BUILD_COMMON) + add_subdirectory(common) +endif() + +if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) + include(CTest) + add_subdirectory(tests) +endif() + +if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES) + add_subdirectory(examples) + add_subdirectory(pocs) +endif() diff --git a/CMakePresets.json b/CMakePresets.json new file mode 100644 index 0000000000000000000000000000000000000000..ae45d60af49d9360a12a515c545257638a6f047a --- /dev/null +++ b/CMakePresets.json @@ -0,0 +1,81 @@ +{ + "version": 4, + "configurePresets": [ + { + "name": "base", + "hidden": true, + "generator": "Ninja", + "binaryDir": "${sourceDir}/build-${presetName}", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.." + } + }, + { + "name": "sycl-base", + "hidden": true, + "generator": "Ninja", + "binaryDir": "${sourceDir}/build-${presetName}", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_CXX_COMPILER": "icx", + "CMAKE_C_COMPILER": "cl", + "GGML_SYCL": "ON", + "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.." + } + }, + { "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } }, + { "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } }, + { "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } }, + { "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } }, + { "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } }, + + { + "name": "arm64-windows-msvc", "hidden": true, + "architecture": { "value": "arm64", "strategy": "external" }, + "toolset": { "value": "host=x64", "strategy": "external" }, + "cacheVariables": { + "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-msvc.cmake" + } + }, + + { + "name": "arm64-windows-llvm", "hidden": true, + "architecture": { "value": "arm64", "strategy": "external" }, + "toolset": { "value": "host=x64", "strategy": "external" }, + "cacheVariables": { + "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake" + } + }, + + { + "name": "arm64-apple-clang", "hidden": true, + "architecture": { "value": "arm64", "strategy": "external" }, + "toolset": { "value": "host=x64", "strategy": "external" }, + "cacheVariables": { + "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake" + } + }, + + { "name": "arm64-windows-llvm-debug" , "inherits": [ "base", "arm64-windows-llvm", "debug" ] }, + { "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] }, + { "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] }, + + { "name": "arm64-apple-clang-debug" , "inherits": [ "base", "arm64-apple-clang", "debug" ] }, + { "name": "arm64-apple-clang-release" , "inherits": [ "base", "arm64-apple-clang", "reldbg" ] }, + { "name": "arm64-apple-clang+static-release" , "inherits": [ "base", "arm64-apple-clang", "reldbg", "static" ] }, + + { "name": "arm64-windows-msvc-debug" , "inherits": [ "base", "arm64-windows-msvc", "debug" ] }, + { "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] }, + { "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] }, + + { "name": "x64-windows-msvc-debug" , "inherits": [ "base", "debug" ] }, + { "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] }, + { "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] }, + + { "name": "x64-windows-sycl-debug" , "inherits": [ "sycl-base", "debug" ] }, + { "name": "x64-windows-sycl-debug-f16", "inherits": [ "sycl-base", "debug", "sycl_f16" ] }, + { "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] }, + { "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] } + ] +} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000000000000000000000000000000000..4c882c254cac502ef76bf6684eb5a12d71979f9a --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,33 @@ +# Pull requests (for contributors) + +- Test your changes: + - Using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the `ggml` library + - Execute [the full CI locally on your machine](ci/README.md) before publishing +- Optionally rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs +- Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly +- If your PR becomes stale, don't hesitate to ping the maintainers in the comments + +# Pull requests (for collaborators) + +- Squash-merge PRs +- Use the following format for the squashed commit title: ` : (#)`. For example: `utils : fix typo in utils.py (#1234)` +- Optionally pick a `` from here: https://github.com/ggerganov/llama.cpp/wiki/Modules + +# Coding guidelines + +- Avoid adding third-party dependencies, extra files, extra headers, etc. +- Always consider cross-compatibility with other operating systems and architectures +- Avoid fancy-looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple +- There are no strict rules for the code style, but try to follow the patterns in the code (indentation, spaces, etc.). Vertical alignment makes things more readable and easier to batch edit +- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a` +- Naming usually optimizes for common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963) +- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices +- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$ + +![matmul](media/matmul.png) + +# Resources + +The Github issues, PRs and discussions contain a lot of information that can be useful to get familiar with the codebase. For convenience, some of the more important information is referenced from Github projects: + +https://github.com/ggerganov/llama.cpp/projects diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..acb96ce78e0486e9dc7602cdcdd2f491c34f335a --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023-2024 The ggml authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..eb1da90f11a658d12ba73b184e829a4641c5ec01 --- /dev/null +++ b/Makefile @@ -0,0 +1,1702 @@ +# Define the default target now so that it is always the first target +BUILD_TARGETS = \ + libllava.a \ + llama-batched \ + llama-batched-bench \ + llama-bench \ + llama-cli \ + llama-convert-llama2c-to-ggml \ + llama-embedding \ + llama-eval-callback \ + llama-export-lora \ + llama-gbnf-validator \ + llama-gguf \ + llama-gguf-hash \ + llama-gguf-split \ + llama-gritlm \ + llama-imatrix \ + llama-infill \ + llama-llava-cli \ + llama-minicpmv-cli\ + llama-lookahead \ + llama-lookup \ + llama-lookup-create \ + llama-lookup-merge \ + llama-lookup-stats \ + llama-parallel \ + llama-passkey \ + llama-perplexity \ + llama-q8dot \ + llama-quantize \ + llama-quantize-stats \ + llama-retrieval \ + llama-save-load-state \ + llama-server \ + llama-simple \ + llama-simple-chat \ + llama-speculative \ + llama-tokenize \ + llama-vdot \ + llama-cvector-generator \ + llama-gen-docs \ + tests/test-c.o + +# Binaries only useful for tests +TEST_TARGETS = \ + tests/test-arg-parser \ + tests/test-autorelease \ + tests/test-backend-ops \ + tests/test-chat-template \ + tests/test-double-float \ + tests/test-grad0 \ + tests/test-grammar-integration \ + tests/test-grammar-parser \ + tests/test-json-schema-to-grammar \ + tests/test-llama-grammar \ + tests/test-log \ + tests/test-model-load-cancel \ + tests/test-quantize-fns \ + tests/test-quantize-perf \ + tests/test-rope \ + tests/test-sampling \ + tests/test-tokenizer-0 \ + tests/test-tokenizer-1-bpe \ + tests/test-tokenizer-1-spm +# tests/test-opt \ + +# Legacy build targets that were renamed in #7809, but should still be removed when the project is cleaned +LEGACY_TARGETS_CLEAN = main quantize quantize-stats perplexity imatrix embedding vdot q8dot convert-llama2c-to-ggml \ + simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \ + retrieval speculative infill tokenize parallel export-lora lookahead lookup passkey gritlm + +# Legacy build targets that were renamed in #7809, but we want to build binaries that for them that output a deprecation warning if people try to use them. +# We don't want to clutter things too much, so we only build replacements for the most commonly used binaries. +LEGACY_TARGETS_BUILD = main quantize perplexity embedding server + +# Deprecation aliases +ifdef LLAMA_CUBLAS +$(error LLAMA_CUBLAS is removed. Use GGML_CUDA instead.) +endif + +ifdef LLAMA_CUDA +GGML_CUDA := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_KOMPUTE +GGML_KOMPUTE := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_METAL +GGML_METAL := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_RPC +GGML_RPC := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_SYCL +GGML_SYCL := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_SYCL_F16 +GGML_SYCL_F16 := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_OPENBLAS +GGML_OPENBLAS := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_OPENBLAS64 +GGML_OPENBLAS64 := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_BLIS +GGML_BLIS := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_NO_LLAMAFILE +GGML_NO_LLAMAFILE := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_NO_ACCELERATE +GGML_NO_ACCELERATE := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_NO_OPENMP +GGML_NO_OPENMP := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_NO_METAL +GGML_NO_METAL := 1 +DEPRECATE_WARNING := 1 +endif + +ifdef LLAMA_DISABLE_LOGS +REMOVE_WARNING := 1 +endif + +ifdef LLAMA_SERVER_VERBOSE +REMOVE_WARNING := 1 +endif + +ifndef UNAME_S +UNAME_S := $(shell uname -s) +endif + +ifndef UNAME_P +UNAME_P := $(shell uname -p) +endif + +ifndef UNAME_M +UNAME_M := $(shell uname -m) +endif + +# In GNU make default CXX is g++ instead of c++. Let's fix that so that users +# of non-gcc compilers don't have to provide g++ alias or wrapper. +DEFCC := cc +DEFCXX := c++ +ifeq ($(origin CC),default) +CC := $(DEFCC) +endif +ifeq ($(origin CXX),default) +CXX := $(DEFCXX) +endif + +# Mac OS + Arm can report x86_64 +# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789 +ifeq ($(UNAME_S),Darwin) + ifndef GGML_NO_METAL + GGML_METAL := 1 + endif + + GGML_NO_OPENMP := 1 + + ifneq ($(UNAME_P),arm) + SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null) + ifeq ($(SYSCTL_M),1) + # UNAME_P := arm + # UNAME_M := arm64 + warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789) + endif + endif +endif + +ifdef GGML_METAL + GGML_METAL_EMBED_LIBRARY := 1 +endif + +ifdef GGML_RPC + BUILD_TARGETS += rpc-server +endif + +ifdef GGML_VULKAN + BUILD_TARGETS += vulkan-shaders-gen +endif + +default: $(BUILD_TARGETS) $(LEGACY_TARGETS_BUILD) + +test: $(TEST_TARGETS) + @failures=0; \ + for test_target in $(TEST_TARGETS); do \ + if [ "$$test_target" = "tests/test-tokenizer-0" ]; then \ + ./$$test_target $(CURDIR)/models/ggml-vocab-llama-spm.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-llama-bpe.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-phi-3.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-bert-bge.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-starcoder.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-gpt-2.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-refact.gguf; \ + elif [ "$$test_target" = "tests/test-tokenizer-1-spm" ]; then \ + continue; \ + elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \ + continue; \ + else \ + echo "Running test $$test_target..."; \ + ./$$test_target; \ + fi; \ + if [ $$? -ne 0 ]; then \ + printf 'Test %s FAILED!\n\n' $$test_target; \ + failures=$$(( failures + 1 )); \ + else \ + printf 'Test %s passed.\n\n' $$test_target; \ + fi; \ + done; \ + if [ $$failures -gt 0 ]; then \ + printf '\n%s tests failed.\n' $$failures; \ + exit 1; \ + fi + @echo 'All tests passed.' + +all: $(BUILD_TARGETS) $(TEST_TARGETS) $(LEGACY_TARGETS_BUILD) + +ifdef RISCV_CROSS_COMPILE +CC := riscv64-unknown-linux-gnu-gcc +CXX := riscv64-unknown-linux-gnu-g++ +endif + +# +# Compile flags +# + +# keep standard at C11 and C++11 +MK_CPPFLAGS = -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon +MK_CFLAGS = -std=c11 -fPIC +MK_CXXFLAGS = -std=c++11 -fPIC +MK_NVCCFLAGS = -std=c++11 + +ifdef LLAMA_NO_CCACHE +GGML_NO_CCACHE := 1 +DEPRECATE_WARNING := 1 +endif + +ifndef GGML_NO_CCACHE +CCACHE := $(shell which ccache) +ifdef CCACHE +export CCACHE_SLOPPINESS = time_macros +$(info I ccache found, compilation results will be cached. Disable with GGML_NO_CCACHE.) +CC := $(CCACHE) $(CC) +CXX := $(CCACHE) $(CXX) +else +$(info I ccache not found. Consider installing it for faster compilation.) +endif # CCACHE +endif # GGML_NO_CCACHE + +# clock_gettime came in POSIX.1b (1993) +# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional +# posix_memalign came in POSIX.1-2001 / SUSv3 +# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985) +MK_CPPFLAGS += -D_XOPEN_SOURCE=600 + +# Somehow in OpenBSD whenever POSIX conformance is specified +# some string functions rely on locale_t availability, +# which was introduced in POSIX.1-2008, forcing us to go higher +ifeq ($(UNAME_S),OpenBSD) + MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700 +endif + +# Data types, macros and functions related to controlling CPU affinity and +# some memory allocation are available on Linux through GNU extensions in libc +ifeq ($(UNAME_S),Linux) + MK_CPPFLAGS += -D_GNU_SOURCE +endif + +# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1, +# and on macOS its availability depends on enabling Darwin extensions +# similarly on DragonFly, enabling BSD extensions is necessary +ifeq ($(UNAME_S),Darwin) + MK_CPPFLAGS += -D_DARWIN_C_SOURCE +endif +ifeq ($(UNAME_S),DragonFly) + MK_CPPFLAGS += -D__BSD_VISIBLE +endif + +# alloca is a non-standard interface that is not visible on BSDs when +# POSIX conformance is specified, but not all of them provide a clean way +# to enable it in such cases +ifeq ($(UNAME_S),FreeBSD) + MK_CPPFLAGS += -D__BSD_VISIBLE +endif +ifeq ($(UNAME_S),NetBSD) + MK_CPPFLAGS += -D_NETBSD_SOURCE +endif +ifeq ($(UNAME_S),OpenBSD) + MK_CPPFLAGS += -D_BSD_SOURCE +endif + +ifdef GGML_SCHED_MAX_COPIES + MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(GGML_SCHED_MAX_COPIES) +endif + +ifdef LLAMA_DEBUG + MK_CFLAGS += -O0 -g + MK_CXXFLAGS += -O0 -g + MK_LDFLAGS += -g + MK_NVCCFLAGS += -O0 -g + + ifeq ($(UNAME_S),Linux) + MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS + endif +else + MK_CPPFLAGS += -DNDEBUG + MK_CFLAGS += -O3 -g + MK_CXXFLAGS += -O3 -g + MK_NVCCFLAGS += -O3 -g +endif + +ifdef LLAMA_SANITIZE_THREAD + MK_CFLAGS += -fsanitize=thread -g + MK_CXXFLAGS += -fsanitize=thread -g + MK_LDFLAGS += -fsanitize=thread -g +endif + +ifdef LLAMA_SANITIZE_ADDRESS + MK_CFLAGS += -fsanitize=address -fno-omit-frame-pointer -g + MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g + MK_LDFLAGS += -fsanitize=address -fno-omit-frame-pointer -g +endif + +ifdef LLAMA_SANITIZE_UNDEFINED + MK_CFLAGS += -fsanitize=undefined -g + MK_CXXFLAGS += -fsanitize=undefined -g + MK_LDFLAGS += -fsanitize=undefined -g +endif + +ifdef LLAMA_SERVER_SSL + MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT + MK_LDFLAGS += -lssl -lcrypto +endif + +# warnings +WARN_FLAGS = \ + -Wall \ + -Wextra \ + -Wpedantic \ + -Wcast-qual \ + -Wno-unused-function + +MK_CFLAGS += \ + $(WARN_FLAGS) \ + -Wshadow \ + -Wstrict-prototypes \ + -Wpointer-arith \ + -Wmissing-prototypes \ + -Werror=implicit-int \ + -Werror=implicit-function-declaration + +MK_CXXFLAGS += \ + $(WARN_FLAGS) \ + -Wmissing-declarations \ + -Wmissing-noreturn + +ifeq ($(LLAMA_FATAL_WARNINGS),1) + MK_CFLAGS += -Werror + MK_CXXFLAGS += -Werror +endif + +# this version of Apple ld64 is buggy +ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))' + MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER +endif + +# OS specific +# TODO: support Windows +ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)' + MK_CFLAGS += -pthread + MK_CXXFLAGS += -pthread +endif + +# detect Windows +ifneq ($(findstring _NT,$(UNAME_S)),) + _WIN32 := 1 +endif + +# library name prefix +ifneq ($(_WIN32),1) + LIB_PRE := lib +endif + +# Dynamic Shared Object extension +ifneq ($(_WIN32),1) + DSO_EXT := .so +else + DSO_EXT := .dll +endif + +# Windows Sockets 2 (Winsock) for network-capable apps +ifeq ($(_WIN32),1) + LWINSOCK2 := -lws2_32 +endif + +ifdef LLAMA_GPROF + MK_CFLAGS += -pg + MK_CXXFLAGS += -pg +endif + +# Architecture specific +# TODO: probably these flags need to be tweaked on some architectures +# feel free to update the Makefile for your architecture and send a pull request or issue + +ifndef RISCV_CROSS_COMPILE + +ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64)) + # Use all CPU extensions that are available: + MK_CFLAGS += -march=native -mtune=native + HOST_CXXFLAGS += -march=native -mtune=native + + # Usage AVX-only + #MK_CFLAGS += -mfma -mf16c -mavx + #MK_CXXFLAGS += -mfma -mf16c -mavx + + # Usage SSSE3-only (Not is SSE3!) + #MK_CFLAGS += -mssse3 + #MK_CXXFLAGS += -mssse3 +endif + +ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))' + # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves. + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412 + # https://github.com/ggerganov/llama.cpp/issues/2922 + MK_CFLAGS += -Xassembler -muse-unaligned-vector-move + MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move + + # Target Windows 8 for PrefetchVirtualMemory + MK_CPPFLAGS += -D_WIN32_WINNT=0x602 +endif + +ifneq ($(filter aarch64%,$(UNAME_M)),) + # Apple M1, M2, etc. + # Raspberry Pi 3, 4, Zero 2 (64-bit) + # Nvidia Jetson + MK_CFLAGS += -mcpu=native + MK_CXXFLAGS += -mcpu=native + JETSON_RELEASE_INFO = $(shell jetson_release) + ifdef JETSON_RELEASE_INFO + ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),) + JETSON_EOL_MODULE_DETECT = 1 + CC = aarch64-unknown-linux-gnu-gcc + cxx = aarch64-unknown-linux-gnu-g++ + endif + endif +endif + +ifneq ($(filter armv6%,$(UNAME_M)),) + # Raspberry Pi 1, Zero + MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access + MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access +endif + +ifneq ($(filter armv7%,$(UNAME_M)),) + # Raspberry Pi 2 + MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations + MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations +endif + +ifneq ($(filter armv8%,$(UNAME_M)),) + # Raspberry Pi 3, 4, Zero 2 (32-bit) + MK_CFLAGS += -mfp16-format=ieee -mno-unaligned-access + MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access +endif + +ifneq ($(filter ppc64%,$(UNAME_M)),) + POWER9_M := $(shell grep "POWER9" /proc/cpuinfo) + ifneq (,$(findstring POWER9,$(POWER9_M))) + MK_CFLAGS += -mcpu=power9 + MK_CXXFLAGS += -mcpu=power9 + endif +endif + +ifneq ($(filter ppc64le%,$(UNAME_M)),) + MK_CFLAGS += -mcpu=powerpc64le + MK_CXXFLAGS += -mcpu=powerpc64le + CUDA_POWER_ARCH = 1 +endif + +ifneq ($(filter loongarch64%,$(UNAME_M)),) + MK_CFLAGS += -mlasx + MK_CXXFLAGS += -mlasx +endif + +ifneq ($(filter riscv64%,$(UNAME_M)),) + MK_CFLAGS += -march=rv64gcv -mabi=lp64d + MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d +endif + +else # RISC-V CROSS COMPILATION + MK_CFLAGS += -march=rv64gcv -mabi=lp64d + MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d +endif + +ifndef GGML_NO_ACCELERATE + # Mac OS - include Accelerate framework. + # `-framework Accelerate` works both with Apple Silicon and Mac Intel + ifeq ($(UNAME_S),Darwin) + MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS + MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK + MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64 + MK_LDFLAGS += -framework Accelerate + OBJ_GGML += ggml/src/ggml-blas.o + endif +endif # GGML_NO_ACCELERATE + +ifdef GGML_MUSA + CC := clang + CXX := clang++ + GGML_CUDA := 1 + MK_CPPFLAGS += -DGGML_USE_MUSA +endif + +ifndef GGML_NO_OPENMP + MK_CPPFLAGS += -DGGML_USE_OPENMP + MK_CFLAGS += -fopenmp + MK_CXXFLAGS += -fopenmp + ifdef GGML_MUSA + MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp + MK_LDFLAGS += -L/usr/lib/llvm-10/lib + endif # GGML_MUSA +endif # GGML_NO_OPENMP + +ifdef GGML_OPENBLAS + MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas) + MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas) + MK_LDFLAGS += $(shell pkg-config --libs openblas) + OBJ_GGML += ggml/src/ggml-blas.o +endif # GGML_OPENBLAS + +ifdef GGML_OPENBLAS64 + MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64) + MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64) + MK_LDFLAGS += $(shell pkg-config --libs openblas64) + OBJ_GGML += ggml/src/ggml-blas.o +endif # GGML_OPENBLAS64 + +ifdef GGML_BLIS + MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_BLIS -I/usr/local/include/blis -I/usr/include/blis + MK_LDFLAGS += -lblis -L/usr/local/lib + OBJ_GGML += ggml/src/ggml-blas.o +endif # GGML_BLIS + +ifdef GGML_NVPL + MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_NVPL -DNVPL_ILP64 -I/usr/local/include/nvpl_blas -I/usr/include/nvpl_blas + MK_LDFLAGS += -L/usr/local/lib -lnvpl_blas_core -lnvpl_blas_ilp64_gomp + OBJ_GGML += ggml/src/ggml-blas.o +endif # GGML_NVPL + +ifndef GGML_NO_LLAMAFILE + MK_CPPFLAGS += -DGGML_USE_LLAMAFILE + OBJ_GGML += ggml/src/llamafile/sgemm.o +endif + +ifndef GGML_NO_AMX + MK_CPPFLAGS += -DGGML_USE_AMX + OBJ_GGML += ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o +endif + +ifdef GGML_RPC + MK_CPPFLAGS += -DGGML_USE_RPC + OBJ_GGML += ggml/src/ggml-rpc.o +endif # GGML_RPC + +OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu)) +OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu)) + +ifdef GGML_CUDA_FA_ALL_QUANTS + OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*.cu)) +else + OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu)) + OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu)) + OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu)) +endif # GGML_CUDA_FA_ALL_QUANTS + +ifdef GGML_CUDA + ifdef GGML_MUSA + ifneq ('', '$(wildcard /opt/musa)') + CUDA_PATH ?= /opt/musa + else + CUDA_PATH ?= /usr/local/musa + endif + + MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include + MK_LDFLAGS += -lmusa -lmublas -lmusart -lpthread -ldl -lrt -L$(CUDA_PATH)/lib -L/usr/lib64 + MK_NVCCFLAGS += -x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22 + else + ifneq ('', '$(wildcard /opt/cuda)') + CUDA_PATH ?= /opt/cuda + else + CUDA_PATH ?= /usr/local/cuda + endif + + MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include + MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib + MK_NVCCFLAGS += -use_fast_math + endif # GGML_MUSA + + OBJ_GGML += ggml/src/ggml-cuda.o + OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) + OBJ_GGML += $(OBJ_CUDA_TMPL) + +ifdef LLAMA_FATAL_WARNINGS + MK_NVCCFLAGS += -Werror all-warnings +endif # LLAMA_FATAL_WARNINGS + +ifndef GGML_MUSA +ifndef JETSON_EOL_MODULE_DETECT + MK_NVCCFLAGS += --forward-unknown-to-host-compiler +endif # JETSON_EOL_MODULE_DETECT +endif # GGML_MUSA + +ifdef LLAMA_DEBUG + MK_NVCCFLAGS += -lineinfo +endif # LLAMA_DEBUG + +ifdef GGML_CUDA_DEBUG + MK_NVCCFLAGS += --device-debug +endif # GGML_CUDA_DEBUG + +ifdef GGML_CUDA_NVCC + NVCC = $(CCACHE) $(GGML_CUDA_NVCC) +else + ifdef GGML_MUSA + NVCC = $(CCACHE) mcc + else + NVCC = $(CCACHE) nvcc + endif # GGML_MUSA +endif # GGML_CUDA_NVCC + +ifdef CUDA_DOCKER_ARCH + MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH) +else ifndef CUDA_POWER_ARCH + MK_NVCCFLAGS += -arch=native +endif # CUDA_DOCKER_ARCH + +ifdef GGML_CUDA_FORCE_DMMV + MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV +endif # GGML_CUDA_FORCE_DMMV + +ifdef GGML_CUDA_FORCE_MMQ + MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ +endif # GGML_CUDA_FORCE_MMQ + +ifdef GGML_CUDA_FORCE_CUBLAS + MK_NVCCFLAGS += -DGGML_CUDA_FORCE_CUBLAS +endif # GGML_CUDA_FORCE_CUBLAS + +ifdef GGML_CUDA_DMMV_X + MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(GGML_CUDA_DMMV_X) +else + MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=32 +endif # GGML_CUDA_DMMV_X + +ifdef GGML_CUDA_MMV_Y + MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_MMV_Y) +else ifdef GGML_CUDA_DMMV_Y + MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_DMMV_Y) # for backwards compatibility +else + MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=1 +endif # GGML_CUDA_MMV_Y + +ifdef GGML_CUDA_F16 + MK_NVCCFLAGS += -DGGML_CUDA_F16 +endif # GGML_CUDA_F16 + +ifdef GGML_CUDA_DMMV_F16 + MK_NVCCFLAGS += -DGGML_CUDA_F16 +endif # GGML_CUDA_DMMV_F16 + +ifdef GGML_CUDA_KQUANTS_ITER + MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(GGML_CUDA_KQUANTS_ITER) +else + MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2 +endif + +ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE + MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE) +else + MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 +endif # GGML_CUDA_PEER_MAX_BATCH_SIZE + +ifdef GGML_CUDA_NO_PEER_COPY + MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY +endif # GGML_CUDA_NO_PEER_COPY + +ifdef GGML_CUDA_CCBIN + MK_NVCCFLAGS += -ccbin $(GGML_CUDA_CCBIN) +endif # GGML_CUDA_CCBIN + +ifdef GGML_CUDA_FA_ALL_QUANTS + MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS +endif # GGML_CUDA_FA_ALL_QUANTS + +ifdef JETSON_EOL_MODULE_DETECT +define NVCC_COMPILE + $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ +endef # NVCC_COMPILE +else + ifdef GGML_MUSA +define NVCC_COMPILE + $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -c $< -o $@ +endef # NVCC_COMPILE + else +define NVCC_COMPILE + $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ +endef # NVCC_COMPILE + endif # GGML_MUSA +endif # JETSON_EOL_MODULE_DETECT + +ggml/src/ggml-cuda/%.o: \ + ggml/src/ggml-cuda/%.cu \ + ggml/include/ggml.h \ + ggml/src/ggml-common.h \ + ggml/src/ggml-cuda/common.cuh + $(NVCC_COMPILE) + +ggml/src/ggml-cuda.o: \ + ggml/src/ggml-cuda.cu \ + ggml/include/ggml-cuda.h \ + ggml/include/ggml.h \ + ggml/include/ggml-backend.h \ + ggml/src/ggml-backend-impl.h \ + ggml/src/ggml-common.h \ + $(wildcard ggml/src/ggml-cuda/*.cuh) + $(NVCC_COMPILE) +endif # GGML_CUDA + +ifdef GGML_VULKAN + MK_CPPFLAGS += -DGGML_USE_VULKAN + MK_LDFLAGS += $(shell pkg-config --libs vulkan) + OBJ_GGML += ggml/src/ggml-vulkan.o ggml/src/ggml-vulkan-shaders.o + +ifdef GGML_VULKAN_CHECK_RESULTS + MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS +endif + +ifdef GGML_VULKAN_DEBUG + MK_CPPFLAGS += -DGGML_VULKAN_DEBUG +endif + +ifdef GGML_VULKAN_MEMORY_DEBUG + MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG +endif + +ifdef GGML_VULKAN_PERF + MK_CPPFLAGS += -DGGML_VULKAN_PERF +endif + +ifdef GGML_VULKAN_VALIDATE + MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE +endif + +ifdef GGML_VULKAN_RUN_TESTS + MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS +endif + +GLSLC_CMD = glslc +_ggml_vk_genshaders_cmd = $(shell pwd)/vulkan-shaders-gen +_ggml_vk_header = ggml/src/ggml-vulkan-shaders.hpp +_ggml_vk_source = ggml/src/ggml-vulkan-shaders.cpp +_ggml_vk_input_dir = ggml/src/vulkan-shaders +_ggml_vk_shader_deps = $(echo $(_ggml_vk_input_dir)/*.comp) + +ggml/src/ggml-vulkan.o: ggml/src/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(_ggml_vk_header) $(_ggml_vk_source) + $(CXX) $(CXXFLAGS) $(shell pkg-config --cflags vulkan) -c $< -o $@ + +$(_ggml_vk_header): $(_ggml_vk_source) + +$(_ggml_vk_source): $(_ggml_vk_shader_deps) vulkan-shaders-gen + $(_ggml_vk_genshaders_cmd) \ + --glslc $(GLSLC_CMD) \ + --input-dir $(_ggml_vk_input_dir) \ + --target-hpp $(_ggml_vk_header) \ + --target-cpp $(_ggml_vk_source) + +vulkan-shaders-gen: ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp + $(CXX) $(CXXFLAGS) -o $@ $(LDFLAGS) ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp + +endif # GGML_VULKAN + +ifdef GGML_HIPBLAS + ifeq ($(wildcard /opt/rocm),) + ROCM_PATH ?= /usr + AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch)) + else + ROCM_PATH ?= /opt/rocm + AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch) + endif + + GGML_CUDA_DMMV_X ?= 32 + GGML_CUDA_MMV_Y ?= 1 + GGML_CUDA_KQUANTS_ITER ?= 2 + + MK_CPPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA + +ifdef GGML_HIP_UMA + MK_CPPFLAGS += -DGGML_HIP_UMA +endif # GGML_HIP_UMA + + MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib + MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64 + MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas + + HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc + + HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS)) + HIPFLAGS += -DGGML_CUDA_DMMV_X=$(GGML_CUDA_DMMV_X) + HIPFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_MMV_Y) + HIPFLAGS += -DK_QUANTS_PER_ITERATION=$(GGML_CUDA_KQUANTS_ITER) + +ifdef GGML_CUDA_FORCE_DMMV + HIPFLAGS += -DGGML_CUDA_FORCE_DMMV +endif # GGML_CUDA_FORCE_DMMV + +ifdef GGML_CUDA_FORCE_MMQ + HIPFLAGS += -DGGML_CUDA_FORCE_MMQ +endif # GGML_CUDA_FORCE_MMQ + +ifdef GGML_CUDA_FORCE_CUBLAS + HIPFLAGS += -DGGML_CUDA_FORCE_CUBLAS +endif # GGML_CUDA_FORCE_CUBLAS + +ifdef GGML_CUDA_NO_PEER_COPY + HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY +endif # GGML_CUDA_NO_PEER_COPY + + OBJ_GGML += ggml/src/ggml-cuda.o + OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) + OBJ_GGML += $(OBJ_CUDA_TMPL) + +ggml/src/ggml-cuda.o: \ + ggml/src/ggml-cuda.cu \ + ggml/include/ggml-cuda.h \ + ggml/include/ggml.h \ + ggml/include/ggml-backend.h \ + ggml/src/ggml-backend-impl.h \ + ggml/src/ggml-common.h \ + $(wildcard ggml/src/ggml-cuda/*.cuh) + $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $< + +ggml/src/ggml-cuda/%.o: \ + ggml/src/ggml-cuda/%.cu \ + ggml/include/ggml.h \ + ggml/src/ggml-common.h \ + ggml/src/ggml-cuda/common.cuh + $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $< +endif # GGML_HIPBLAS + +ifdef GGML_METAL + MK_CPPFLAGS += -DGGML_USE_METAL + MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit + OBJ_GGML += ggml/src/ggml-metal.o +ifdef GGML_METAL_NDEBUG + MK_CPPFLAGS += -DGGML_METAL_NDEBUG +endif +ifdef GGML_METAL_EMBED_LIBRARY + MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY + OBJ_GGML += ggml/src/ggml-metal-embed.o +endif +endif # GGML_METAL + +ifdef GGML_METAL +ggml/src/ggml-metal.o: \ + ggml/src/ggml-metal.m \ + ggml/include/ggml-metal.h \ + ggml/include/ggml.h + $(CC) $(CFLAGS) -c $< -o $@ + +ifdef GGML_METAL_EMBED_LIBRARY +ggml/src/ggml-metal-embed.o: \ + ggml/src/ggml-metal.metal \ + ggml/src/ggml-common.h + @echo "Embedding Metal library" + @sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal + $(eval TEMP_ASSEMBLY=$(shell mktemp -d)) + @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s + @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s + @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s + @echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s + @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s + @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s + $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@ + @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s + @rmdir ${TEMP_ASSEMBLY} +endif +endif # GGML_METAL + +OBJ_GGML += \ + ggml/src/ggml.o \ + ggml/src/ggml-cpu.o \ + ggml/src/ggml-alloc.o \ + ggml/src/ggml-backend.o \ + ggml/src/ggml-quants.o \ + ggml/src/ggml-aarch64.o + +OBJ_LLAMA = \ + src/llama.o \ + src/llama-vocab.o \ + src/llama-grammar.o \ + src/llama-sampling.o \ + src/unicode.o \ + src/unicode-data.o + +OBJ_COMMON = \ + common/common.o \ + common/arg.o \ + common/log.o \ + common/console.o \ + common/ngram-cache.o \ + common/sampling.o \ + common/build-info.o \ + common/json-schema-to-grammar.o + +OBJ_ALL = $(OBJ_GGML) $(OBJ_LLAMA) $(OBJ_COMMON) + +LIB_GGML = $(LIB_PRE)ggml$(DSO_EXT) +LIB_GGML_S = $(LIB_PRE)ggml.a + +LIB_LLAMA = $(LIB_PRE)llama$(DSO_EXT) +LIB_LLAMA_S = $(LIB_PRE)llama.a + +LIB_COMMON = $(LIB_PRE)common$(DSO_EXT) +LIB_COMMON_S = $(LIB_PRE)common.a + +LIB_ALL = $(LIB_GGML) $(LIB_LLAMA) $(LIB_COMMON) +LIB_ALL_S = $(LIB_GGML_S) $(LIB_LLAMA_S) $(LIB_COMMON_S) + +GF_CC := $(CC) +include scripts/get-flags.mk + +# combine build flags with cmdline overrides +override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) +override CFLAGS := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS) +BASE_CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS) +override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS) +override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS) +override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS) + +# identify CUDA host compiler +ifdef GGML_CUDA +GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler +include scripts/get-flags.mk +CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic +endif + +ifdef LLAMA_CURL +override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL +override LDFLAGS := $(LDFLAGS) -lcurl +endif + +# +# Print build information +# + +$(info I llama.cpp build info: ) +$(info I UNAME_S: $(UNAME_S)) +$(info I UNAME_P: $(UNAME_P)) +$(info I UNAME_M: $(UNAME_M)) +$(info I CFLAGS: $(CFLAGS)) +$(info I CXXFLAGS: $(CXXFLAGS)) +$(info I NVCCFLAGS: $(NVCCFLAGS)) +$(info I LDFLAGS: $(LDFLAGS)) +$(info I CC: $(shell $(CC) --version | head -n 1)) +$(info I CXX: $(shell $(CXX) --version | head -n 1)) +ifdef GGML_CUDA +$(info I NVCC: $(shell $(NVCC) --version | tail -n 1)) +CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])') +ifndef GGML_MUSA +ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1) + +ifndef CUDA_DOCKER_ARCH +ifndef CUDA_POWER_ARCH +$(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus ) +endif # CUDA_POWER_ARCH +endif # CUDA_DOCKER_ARCH + +endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1) +endif # GGML_MUSA +endif # GGML_CUDA +$(info ) + +ifdef DEPRECATE_WARNING +$(info !!! DEPRECATION WARNING !!!) +$(info The following LLAMA_ options are deprecated and will be removed in the future. Use the GGML_ prefix instead) +$(info - LLAMA_CUDA) +$(info - LLAMA_METAL) +$(info - LLAMA_METAL_EMBED_LIBRARY) +$(info - LLAMA_OPENMP) +$(info - LLAMA_RPC) +$(info - LLAMA_SYCL) +$(info - LLAMA_SYCL_F16) +$(info - LLAMA_OPENBLAS) +$(info - LLAMA_OPENBLAS64) +$(info - LLAMA_BLIS) +$(info - LLAMA_NO_LLAMAFILE) +$(info - LLAMA_NO_ACCELERATE) +$(info - LLAMA_NO_OPENMP) +$(info - LLAMA_NO_METAL) +$(info - LLAMA_NO_CCACHE) +$(info ) +endif + +ifdef REMOVE_WARNING +$(info !!! REMOVAL WARNING !!!) +$(info The following LLAMA_ options have been removed and are no longer supported) +$(info - LLAMA_DISABLE_LOGS (https://github.com/ggerganov/llama.cpp/pull/9418)) +$(info - LLAMA_SERVER_VERBOSE (https://github.com/ggerganov/llama.cpp/pull/9418)) +$(info ) +endif + +# +# Build libraries +# + +# ggml + +ggml/src/ggml.o: \ + ggml/src/ggml.c \ + ggml/include/ggml.h + $(CC) $(CFLAGS) -c $< -o $@ + +ggml/src/ggml-cpu.o: \ + ggml/src/ggml-cpu.c \ + ggml/include/ggml.h \ + ggml/src/ggml-common.h + $(CC) $(CFLAGS) -c $< -o $@ + +ggml/src/ggml-alloc.o: \ + ggml/src/ggml-alloc.c \ + ggml/include/ggml.h \ + ggml/include/ggml-alloc.h + $(CC) $(CFLAGS) -c $< -o $@ + +ggml/src/ggml-backend.o: \ + ggml/src/ggml-backend.cpp \ + ggml/src/ggml-backend-impl.h \ + ggml/include/ggml.h \ + ggml/include/ggml-backend.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +ggml/src/ggml-quants.o: \ + ggml/src/ggml-quants.c \ + ggml/include/ggml.h \ + ggml/src/ggml-quants.h \ + ggml/src/ggml-common.h + $(CC) $(CFLAGS) -c $< -o $@ + +ggml/src/ggml-aarch64.o: \ + ggml/src/ggml-aarch64.c \ + ggml/include/ggml.h \ + ggml/src/ggml-aarch64.h \ + ggml/src/ggml-common.h + $(CC) $(CFLAGS) -c $< -o $@ + +ggml/src/ggml-blas.o: \ + ggml/src/ggml-blas.cpp \ + ggml/include/ggml-blas.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +ifndef GGML_NO_LLAMAFILE +ggml/src/llamafile/sgemm.o: \ + ggml/src/llamafile/sgemm.cpp \ + ggml/src/llamafile/sgemm.h \ + ggml/include/ggml.h + $(CXX) $(CXXFLAGS) -c $< -o $@ +endif # GGML_NO_LLAMAFILE + +ifndef GGML_NO_AMX +ggml/src/ggml-amx.o: \ + ggml/src/ggml-amx.cpp \ + ggml/include/ggml-amx.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +ggml/src/ggml-amx/mmq.o: \ + ggml/src/ggml-amx/mmq.cpp \ + ggml/src/ggml-amx/mmq.h \ + ggml/include/ggml.h + $(CXX) $(CXXFLAGS) -c $< -o $@ +endif + +ifdef GGML_RPC +ggml/src/ggml-rpc.o: \ + ggml/src/ggml-rpc.cpp \ + ggml/include/ggml-rpc.h + $(CXX) $(CXXFLAGS) -c $< -o $@ +endif # GGML_RPC + +$(LIB_GGML): \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) + +$(LIB_GGML_S): \ + $(OBJ_GGML) + ar rcs $(LIB_GGML_S) $^ + +# llama + +src/unicode.o: \ + src/unicode.cpp \ + src/unicode.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +src/unicode-data.o: \ + src/unicode-data.cpp \ + src/unicode-data.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +src/llama.o: \ + src/llama.cpp \ + src/llama-impl.h \ + src/llama-vocab.h \ + src/llama-grammar.h \ + src/llama-sampling.h \ + src/unicode.h \ + include/llama.h \ + ggml/include/ggml-cuda.h \ + ggml/include/ggml-metal.h \ + ggml/include/ggml.h \ + ggml/include/ggml-alloc.h \ + ggml/include/ggml-backend.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +src/llama-vocab.o: \ + src/llama-vocab.cpp \ + src/llama-vocab.h \ + src/llama-impl.h \ + include/llama.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +src/llama-grammar.o: \ + src/llama-grammar.cpp \ + src/llama-grammar.h \ + src/llama-impl.h \ + src/llama-vocab.h \ + src/llama-sampling.h \ + include/llama.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +src/llama-sampling.o: \ + src/llama-sampling.cpp \ + src/llama-sampling.h \ + src/llama-impl.h \ + include/llama.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +$(LIB_LLAMA): \ + $(OBJ_LLAMA) \ + $(LIB_GGML) + $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) + +$(LIB_LLAMA_S): \ + $(OBJ_LLAMA) + ar rcs $(LIB_LLAMA_S) $^ + +# common + +common/common.o: \ + common/common.cpp \ + common/common.h \ + common/console.h \ + common/sampling.h \ + common/json.hpp \ + common/json-schema-to-grammar.h \ + include/llama.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +common/arg.o: \ + common/arg.cpp \ + common/arg.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +common/log.o: \ + common/log.cpp \ + common/log.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +common/sampling.o: \ + common/sampling.cpp \ + common/sampling.h \ + include/llama.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +common/console.o: \ + common/console.cpp \ + common/console.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +common/json-schema-to-grammar.o: \ + common/json-schema-to-grammar.cpp \ + common/json-schema-to-grammar.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +common/ngram-cache.o: \ + common/ngram-cache.cpp \ + common/ngram-cache.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +$(LIB_COMMON): \ + $(OBJ_COMMON) \ + $(LIB_LLAMA) \ + $(LIB_GGML) + $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) + +$(LIB_COMMON_S): \ + $(OBJ_COMMON) + ar rcs $(LIB_COMMON_S) $^ + +clean: + rm -vrf *.dot $(BUILD_TARGETS) $(TEST_TARGETS) + rm -rvf src/*.o + rm -rvf tests/*.o + rm -rvf examples/*.o + rm -rvf common/*.o + rm -rvf *.a + rm -rvf *.dll + rm -rvf *.so + rm -rvf *.dot + rm -rvf ggml/*.a + rm -rvf ggml/*.dll + rm -rvf ggml/*.so + rm -vrf ggml/src/*.o + rm -rvf ggml/src/llamafile/*.o + rm -rvf common/build-info.cpp + rm -vrf ggml/src/ggml-metal-embed.metal + rm -vrf ggml/src/ggml-cuda/*.o + rm -vrf ggml/src/ggml-cuda/template-instances/*.o + rm -vrf ggml/src/ggml-amx/*.o + rm -rvf $(BUILD_TARGETS) + rm -rvf $(TEST_TARGETS) + rm -f vulkan-shaders-gen ggml/src/ggml-vulkan-shaders.hpp ggml/src/ggml-vulkan-shaders.cpp + rm -rvf $(LEGACY_TARGETS_CLEAN) + find examples pocs -type f -name "*.o" -delete + +# +# Examples +# + +# $< is the first prerequisite, i.e. the source file. +# Explicitly compile this to an object file so that it can be cached with ccache. +# The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead. + +# Helper function that replaces .c, .cpp, and .cu file endings with .o: +GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1)))) + +llama-cli: examples/main/main.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + @echo + @echo '==== Run ./llama-cli -h for help. ====' + @echo + +llama-infill: examples/infill/infill.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-simple: examples/simple/simple.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-simple-chat: examples/simple-chat/simple-chat.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-tokenize: examples/tokenize/tokenize.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-batched: examples/batched/batched.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-batched-bench: examples/batched-bench/batched-bench.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-quantize: examples/quantize/quantize.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-perplexity: examples/perplexity/perplexity.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-imatrix: examples/imatrix/imatrix.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-embedding: examples/embedding/embedding.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-gritlm: examples/gritlm/gritlm.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-save-load-state: examples/save-load-state/save-load-state.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-gguf: examples/gguf/gguf.cpp \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +examples/gguf-hash/deps/sha1/sha1.o: \ + examples/gguf-hash/deps/sha1/sha1.c + $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@ + +examples/gguf-hash/deps/xxhash/xxhash.o: \ + examples/gguf-hash/deps/xxhash/xxhash.c + $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@ + +examples/gguf-hash/deps/sha256/sha256.o: \ + examples/gguf-hash/deps/sha256/sha256.c + $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@ + +llama-gguf-hash: examples/gguf-hash/gguf-hash.cpp examples/gguf-hash/deps/sha1/sha1.o examples/gguf-hash/deps/xxhash/xxhash.o examples/gguf-hash/deps/sha256/sha256.o\ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -Iexamples/gguf-hash/deps -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-gguf-split: examples/gguf-split/gguf-split.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-eval-callback: examples/eval-callback/eval-callback.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-bench: examples/llama-bench/llama-bench.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-export-lora: examples/export-lora/export-lora.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-retrieval: examples/retrieval/retrieval.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-speculative: examples/speculative/speculative.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-parallel: examples/parallel/parallel.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-lookahead: examples/lookahead/lookahead.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-lookup: examples/lookup/lookup.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-lookup-create: examples/lookup/lookup-create.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-lookup-merge: examples/lookup/lookup-merge.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-lookup-stats: examples/lookup/lookup-stats.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-passkey: examples/passkey/passkey.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +ifdef GGML_RPC +rpc-server: examples/rpc/rpc-server.cpp \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) +endif # GGML_RPC + +llama-server: \ + examples/server/server.cpp \ + examples/server/utils.hpp \ + examples/server/httplib.h \ + examples/server/colorthemes.css.hpp \ + examples/server/style.css.hpp \ + examples/server/theme-beeninorder.css.hpp \ + examples/server/theme-ketivah.css.hpp \ + examples/server/theme-mangotango.css.hpp \ + examples/server/theme-playground.css.hpp \ + examples/server/theme-polarnight.css.hpp \ + examples/server/theme-snowstorm.css.hpp \ + examples/server/index.html.hpp \ + examples/server/index-new.html.hpp \ + examples/server/index.js.hpp \ + examples/server/completion.js.hpp \ + examples/server/system-prompts.js.hpp \ + examples/server/prompt-formats.js.hpp \ + examples/server/json-schema-to-grammar.mjs.hpp \ + examples/server/loading.html.hpp \ + common/json.hpp \ + common/stb_image.h \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2) + +# Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`: +examples/server/%.hpp: examples/server/public/% Makefile + @( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \ + echo "unsigned char $${NAME}[] = {" && \ + cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \ + echo "};" && \ + echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \ + ) > $@ + +llama-gen-docs: examples/gen-docs/gen-docs.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +libllava.a: examples/llava/llava.cpp \ + examples/llava/llava.h \ + examples/llava/clip.cpp \ + examples/llava/clip.h \ + common/stb_image.h \ + common/base64.hpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual + +llama-llava-cli: examples/llava/llava-cli.cpp \ + examples/llava/llava.cpp \ + examples/llava/llava.h \ + examples/llava/clip.cpp \ + examples/llava/clip.h \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual + +llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \ + examples/llava/llava.cpp \ + examples/llava/llava.h \ + examples/llava/clip.cpp \ + examples/llava/clip.h \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual + +ifeq ($(UNAME_S),Darwin) +swift: examples/batched.swift + (cd examples/batched.swift; make build) +endif + +common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh + @sh scripts/build-info.sh "$(CC)" > $@.tmp + @if ! cmp -s $@.tmp $@; then \ + mv $@.tmp $@; \ + else \ + rm $@.tmp; \ + fi + +common/build-info.o: common/build-info.cpp + $(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@ + +# +# Tests +# + +tests: $(TEST_TARGETS) + +tests/test-arg-parser: tests/test-arg-parser.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-llama-grammar: tests/test-llama-grammar.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-log: tests/test-log.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-grammar-parser: tests/test-grammar-parser.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-grammar-integration: tests/test-grammar-integration.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-double-float: tests/test-double-float.cpp + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-grad0: tests/test-grad0.cpp \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-opt: tests/test-opt.cpp \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-quantize-fns: tests/test-quantize-fns.cpp \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-quantize-perf: tests/test-quantize-perf.cpp \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-sampling: tests/test-sampling.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-tokenizer-0: tests/test-tokenizer-0.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-tokenizer-1-spm: tests/test-tokenizer-1-spm.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-rope: tests/test-rope.cpp ggml/src/ggml.o \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-c.o: tests/test-c.c include/llama.h + $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@ + +tests/test-backend-ops: tests/test-backend-ops.cpp \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-model-load-cancel: tests/test-model-load-cancel.cpp tests/get-model.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-autorelease: tests/test-autorelease.cpp tests/get-model.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-chat-template: tests/test-chat-template.cpp \ + $(OBJ_ALL) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +# +# PoCs +# + +llama-vdot: pocs/vdot/vdot.cpp ggml/src/ggml.o \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-q8dot: pocs/vdot/q8dot.cpp ggml/src/ggml.o \ + $(OBJ_GGML) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +# +# Deprecated binaries that we want to keep around long enough for people to migrate to the new filenames, then these can be removed. +# +# Mark legacy binary targets as .PHONY so that they are always checked. +.PHONY: main quantize perplexity embedding server + +# Define the object file target +examples/deprecation-warning/deprecation-warning.o: examples/deprecation-warning/deprecation-warning.cpp + $(CXX) $(CXXFLAGS) -c $< -o $@ + +# NOTE: We currently will always build the deprecation-warning `main` and `server` binaries to help users migrate. +# Eventually we will want to remove these target from building all the time. +main: examples/deprecation-warning/deprecation-warning.o + $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) + @echo "NOTICE: The 'main' binary is deprecated. Please use 'llama-cli' instead." + +server: examples/deprecation-warning/deprecation-warning.o + $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) + @echo "NOTICE: The 'server' binary is deprecated. Please use 'llama-server' instead." + +quantize: examples/deprecation-warning/deprecation-warning.o +ifneq (,$(wildcard quantize)) + $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) + @echo "#########" + @echo "WARNING: The 'quantize' binary is deprecated. Please use 'llama-quantize' instead." + @echo " Remove the 'quantize' binary to remove this warning." + @echo "#########" +endif + +perplexity: examples/deprecation-warning/deprecation-warning.o +ifneq (,$(wildcard perplexity)) + $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) + @echo "#########" + @echo "WARNING: The 'perplexity' binary is deprecated. Please use 'llama-perplexity' instead." + @echo " Remove the 'perplexity' binary to remove this warning." + @echo "#########" +endif + +embedding: examples/deprecation-warning/deprecation-warning.o +ifneq (,$(wildcard embedding)) + $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) + @echo "#########" + @echo "WARNING: The 'embedding' binary is deprecated. Please use 'llama-embedding' instead." + @echo " Remove the 'embedding' binary to remove this warning." + @echo "#########" +endif diff --git a/Package.swift b/Package.swift new file mode 100644 index 0000000000000000000000000000000000000000..d3661d13c80f2fc0ac2045101964b02dbdb27460 --- /dev/null +++ b/Package.swift @@ -0,0 +1,80 @@ +// swift-tools-version:5.5 + +import PackageDescription + +var sources = [ + "src/llama.cpp", + "src/llama-vocab.cpp", + "src/llama-grammar.cpp", + "src/llama-sampling.cpp", + "src/unicode.cpp", + "src/unicode-data.cpp", + "ggml/src/ggml.c", + "ggml/src/ggml-cpu.c", + "ggml/src/ggml-alloc.c", + "ggml/src/ggml-backend.cpp", + "ggml/src/ggml-quants.c", + "ggml/src/ggml-aarch64.c", +] + +var resources: [Resource] = [] +var linkerSettings: [LinkerSetting] = [] +var cSettings: [CSetting] = [ + .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]), + .unsafeFlags(["-fno-objc-arc"]), + // NOTE: NEW_LAPACK will required iOS version 16.4+ + // We should consider add this in the future when we drop support for iOS 14 + // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc) + // .define("ACCELERATE_NEW_LAPACK"), + // .define("ACCELERATE_LAPACK_ILP64") +] + +#if canImport(Darwin) +sources.append("ggml/src/ggml-metal.m") +resources.append(.process("ggml/src/ggml-metal.metal")) +linkerSettings.append(.linkedFramework("Accelerate")) +cSettings.append( + contentsOf: [ + .define("GGML_USE_ACCELERATE"), + .define("GGML_USE_METAL") + ] +) +#endif + +#if os(Linux) + cSettings.append(.define("_GNU_SOURCE")) +#endif + +let package = Package( + name: "llama", + platforms: [ + .macOS(.v12), + .iOS(.v14), + .watchOS(.v4), + .tvOS(.v14) + ], + products: [ + .library(name: "llama", targets: ["llama"]), + ], + targets: [ + .target( + name: "llama", + path: ".", + exclude: [ + "cmake", + "examples", + "scripts", + "models", + "tests", + "CMakeLists.txt", + "Makefile" + ], + sources: sources, + resources: resources, + publicHeadersPath: "spm-headers", + cSettings: cSettings, + linkerSettings: linkerSettings + ) + ], + cxxLanguageStandard: .cxx11 +) diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000000000000000000000000000000000000..f4322c6ee4d18c3fb9f694fcc01f2be8640adb50 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,67 @@ +# Security Policy + + - [**Using llama.cpp securely**](#using-llamacpp-securely) + - [Untrusted models](#untrusted-models) + - [Untrusted inputs](#untrusted-inputs) + - [Data privacy](#data-privacy) + - [Untrusted environments or networks](#untrusted-environments-or-networks) + - [Multi-Tenant environments](#multi-tenant-environments) + - [**Reporting a vulnerability**](#reporting-a-vulnerability) + +## Using llama.cpp securely + +### Untrusted models +Be careful when running untrusted models. This classification includes models created by unknown developers or utilizing data obtained from unknown sources. + +*Always execute untrusted models within a secure, isolated environment such as a sandbox* (e.g., containers, virtual machines). This helps protect your system from potentially malicious code. + +> [!NOTE] +> The trustworthiness of a model is not binary. You must always determine the proper level of caution depending on the specific model and how it matches your use case and risk tolerance. + +### Untrusted inputs + +Some models accept various input formats (text, images, audio, etc.). The libraries converting these inputs have varying security levels, so it's crucial to isolate the model and carefully pre-process inputs to mitigate script injection risks. + +For maximum security when handling untrusted inputs, you may need to employ the following: + +* Sandboxing: Isolate the environment where the inference happens. +* Pre-analysis: Check how the model performs by default when exposed to prompt injection (e.g. using [fuzzing for prompt injection](https://github.com/FonduAI/awesome-prompt-injection?tab=readme-ov-file#tools)). This will give you leads on how hard you will have to work on the next topics. +* Updates: Keep both LLaMA C++ and your libraries updated with the latest security patches. +* Input Sanitation: Before feeding data to the model, sanitize inputs rigorously. This involves techniques such as: + * Validation: Enforce strict rules on allowed characters and data types. + * Filtering: Remove potentially malicious scripts or code fragments. + * Encoding: Convert special characters into safe representations. + * Verification: Run tooling that identifies potential script injections (e.g. [models that detect prompt injection attempts](https://python.langchain.com/docs/guides/safety/hugging_face_prompt_injection)). + +### Data privacy + +To protect sensitive data from potential leaks or unauthorized access, it is crucial to sandbox the model execution. This means running the model in a secure, isolated environment, which helps mitigate many attack vectors. + +### Untrusted environments or networks + +If you can't run your models in a secure and isolated environment or if it must be exposed to an untrusted network, make sure to take the following security precautions: +* Confirm the hash of any downloaded artifact (e.g. pre-trained model weights) matches a known-good value +* Encrypt your data if sending it over the network. + +### Multi-Tenant environments + +If you intend to run multiple models in parallel with shared memory, it is your responsibility to ensure the models do not interact or access each other's data. The primary areas of concern are tenant isolation, resource allocation, model sharing and hardware attacks. + +1. Tenant Isolation: Models should run separately with strong isolation methods to prevent unwanted data access. Separating networks is crucial for isolation, as it prevents unauthorized access to data or models and malicious users from sending graphs to execute under another tenant's identity. + +2. Resource Allocation: A denial of service caused by one model can impact the overall system health. Implement safeguards like rate limits, access controls, and health monitoring. + +3. Model Sharing: In a multitenant model sharing design, tenants and users must understand the security risks of running code provided by others. Since there are no reliable methods to detect malicious models, sandboxing the model execution is the recommended approach to mitigate the risk. + +4. Hardware Attacks: GPUs or TPUs can also be attacked. [Researches](https://scholar.google.com/scholar?q=gpu+side+channel) has shown that side channel attacks on GPUs are possible, which can make data leak from other models or processes running on the same system at the same time. + +## Reporting a vulnerability + +Beware that none of the topics under [Using llama.cpp securely](#using-llamacpp-securely) are considered vulnerabilities of LLaMA C++. + + +However, If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released. + +Please disclose it as a private [security advisory](https://github.com/ggerganov/llama.cpp/security/advisories/new). + +A team of volunteers on a reasonable-effort basis maintains this project. As such, please give us at least 90 days to work on a fix before public exposure. diff --git a/ci/README.md b/ci/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4064705190697a7da430e1b85863afe3c71ea123 --- /dev/null +++ b/ci/README.md @@ -0,0 +1,29 @@ +# CI + +In addition to [Github Actions](https://github.com/ggerganov/llama.cpp/actions) `llama.cpp` uses a custom CI framework: + +https://github.com/ggml-org/ci + +It monitors the `master` branch for new commits and runs the +[ci/run.sh](https://github.com/ggerganov/llama.cpp/blob/master/ci/run.sh) script on dedicated cloud instances. This allows us +to execute heavier workloads compared to just using Github Actions. Also with time, the cloud instances will be scaled +to cover various hardware architectures, including GPU and Apple Silicon instances. + +Collaborators can optionally trigger the CI run by adding the `ggml-ci` keyword to their commit message. +Only the branches of this repo are monitored for this keyword. + +It is a good practice, before publishing changes to execute the full CI locally on your machine: + +```bash +mkdir tmp + +# CPU-only build +bash ./ci/run.sh ./tmp/results ./tmp/mnt + +# with CUDA support +GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + +# with SYCL support +source /opt/intel/oneapi/setvars.sh +GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt +``` diff --git a/ci/run.sh b/ci/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..21b62dd1ef73367bb7fc69ad34d23367149bb5d5 --- /dev/null +++ b/ci/run.sh @@ -0,0 +1,851 @@ +#!/bin/bash +# +# sample usage: +# +# mkdir tmp +# +# # CPU-only build +# bash ./ci/run.sh ./tmp/results ./tmp/mnt +# +# # with CUDA support +# GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt +# +# # with SYCL support +# GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt +# +# # with VULKAN support +# GG_BUILD_VULKAN=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt +# + +if [ -z "$2" ]; then + echo "usage: $0 " + exit 1 +fi + +mkdir -p "$1" +mkdir -p "$2" + +OUT=$(realpath "$1") +MNT=$(realpath "$2") + +rm -f "$OUT/*.log" +rm -f "$OUT/*.exit" +rm -f "$OUT/*.md" + +sd=`dirname $0` +cd $sd/../ +SRC=`pwd` + +CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON" + +if [ ! -z ${GG_BUILD_METAL} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON" +fi + +if [ ! -z ${GG_BUILD_CUDA} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native" +fi + +if [ ! -z ${GG_BUILD_SYCL} ]; then + if [ -z ${ONEAPI_ROOT} ]; then + echo "Not detected ONEAPI_ROOT, please install oneAPI base toolkit and enable it by:" + echo "source /opt/intel/oneapi/setvars.sh" + exit 1 + fi + + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON" +fi + +if [ ! -z ${GG_BUILD_VULKAN} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1" +fi +## helpers + +# download a file if it does not exist or if it is outdated +function gg_wget { + local out=$1 + local url=$2 + + local cwd=`pwd` + + mkdir -p $out + cd $out + + # should not re-download if file is the same + wget -nv -N $url + + cd $cwd +} + +function gg_printf { + printf -- "$@" >> $OUT/README.md +} + +function gg_run { + ci=$1 + + set -o pipefail + set -x + + gg_run_$ci | tee $OUT/$ci.log + cur=$? + echo "$cur" > $OUT/$ci.exit + + set +x + set +o pipefail + + gg_sum_$ci + + ret=$((ret | cur)) +} + +## ci + +# ctest_debug + +function gg_run_ctest_debug { + cd ${SRC} + + rm -rf build-ci-debug && mkdir build-ci-debug && cd build-ci-debug + + set -e + + # Check cmake, make and ctest are installed + gg_check_build_requirements + + (time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + + (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log + + set +e +} + +function gg_sum_ctest_debug { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs ctest in debug mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" + gg_printf '```\n' + gg_printf '\n' +} + +# ctest_release + +function gg_run_ctest_release { + cd ${SRC} + + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + + set -e + + # Check cmake, make and ctest are installed + gg_check_build_requirements + + (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + + if [ -z ${GG_BUILD_LOW_PERF} ]; then + (time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log + else + (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log + fi + + set +e +} + +function gg_sum_ctest_release { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs ctest in release mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" + gg_printf '```\n' +} + +# test_scripts_debug + +function gg_run_test_scripts_debug { + cd ${SRC} + + set -e + + (cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log + (cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log + + set +e +} + +function gg_sum_test_scripts_debug { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs test scripts in debug mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)" + gg_printf '```\n' + gg_printf '\n' +} + +# test_scripts_release + +function gg_run_test_scripts_release { + cd ${SRC} + + set -e + + (cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log + (cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log + + set +e +} + +function gg_sum_test_scripts_release { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs test scripts in release mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)" + gg_printf '```\n' + gg_printf '\n' +} + +function gg_get_model { + local gguf_0="$MNT/models/pythia/1.4B/ggml-model-f16.gguf" + local gguf_1="$MNT/models/pythia/2.8B/ggml-model-f16.gguf" + local gguf_2="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf" + if [[ -s $gguf_0 ]]; then + echo -n "$gguf_0" + elif [[ -s $gguf_1 ]]; then + echo -n "$gguf_1" + elif [[ -s $gguf_2 ]]; then + echo -n "$gguf_2" + else + echo >&2 "No model found. Can't run gg_run_ctest_with_model." + exit 1 + fi +} + +function gg_run_ctest_with_model_debug { + cd ${SRC} + + local model; model=$(gg_get_model) + cd build-ci-debug + set -e + (LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log + set +e + cd .. +} + +function gg_run_ctest_with_model_release { + cd ${SRC} + + local model; model=$(gg_get_model) + cd build-ci-release + set -e + (LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log + set +e + cd .. +} + +function gg_sum_ctest_with_model_debug { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs ctest with model files in debug mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" + gg_printf '```\n' +} + +function gg_sum_ctest_with_model_release { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs ctest with model files in release mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" + gg_printf '```\n' +} + +# open_llama_7b_v2 + +function gg_run_open_llama_7b_v2 { + cd ${SRC} + + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/config.json + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/tokenizer.model + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/tokenizer_config.json + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/special_tokens_map.json + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/pytorch_model.bin.index.json + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00001-of-00002.bin + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00002-of-00002.bin + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/generation_config.json + + gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip + unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/ + + path_models="../models-mnt/open-llama/7B-v2" + path_wiki="../models-mnt/wikitext/wikitext-2-raw" + + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + + set -e + + (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + + python3 ../examples/convert_legacy_llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf + + model_f16="${path_models}/ggml-model-f16.gguf" + model_q8_0="${path_models}/ggml-model-q8_0.gguf" + model_q4_0="${path_models}/ggml-model-q4_0.gguf" + model_q4_1="${path_models}/ggml-model-q4_1.gguf" + model_q5_0="${path_models}/ggml-model-q5_0.gguf" + model_q5_1="${path_models}/ggml-model-q5_1.gguf" + model_q2_k="${path_models}/ggml-model-q2_k.gguf" + model_q3_k="${path_models}/ggml-model-q3_k.gguf" + model_q4_k="${path_models}/ggml-model-q4_k.gguf" + model_q5_k="${path_models}/ggml-model-q5_k.gguf" + model_q6_k="${path_models}/ggml-model-q6_k.gguf" + + wiki_test="${path_wiki}/wiki.test.raw" + + ./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0 + ./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0 + ./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1 + ./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0 + ./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1 + ./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k + ./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k + ./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k + ./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k + ./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k + + (time ./bin/llama-cli --model ${model_f16} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/llama-cli --model ${model_q8_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + (time ./bin/llama-cli --model ${model_q4_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log + (time ./bin/llama-cli --model ${model_q4_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log + (time ./bin/llama-cli --model ${model_q5_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log + (time ./bin/llama-cli --model ${model_q5_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log + (time ./bin/llama-cli --model ${model_q2_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log + (time ./bin/llama-cli --model ${model_q3_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log + (time ./bin/llama-cli --model ${model_q4_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log + (time ./bin/llama-cli --model ${model_q5_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log + (time ./bin/llama-cli --model ${model_q6_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + + (time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + (time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log + (time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log + (time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log + (time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log + (time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log + (time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log + (time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log + (time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log + (time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + + (time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log + + (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + + function check_ppl { + qnt="$1" + ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) + + if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then + printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl" + return 20 + fi + + printf ' - %s @ %s OK\n' "$qnt" "$ppl" + return 0 + } + + check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + + cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log + + set +e +} + +function gg_sum_open_llama_7b_v2 { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'OpenLLaMA 7B-v2:\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)" + gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)" + gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)" + gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)" + gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)" + gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)" + gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)" + gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)" + gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)" + gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)" + gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)" + gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)" + gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)" + gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)" +} + +# pythia_1.4b + +function gg_run_pythia_1_4b { + cd ${SRC} + + gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/config.json + gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/tokenizer.json + gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/tokenizer_config.json + gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/special_tokens_map.json + gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/resolve/main/pytorch_model.bin + + gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip + unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/ + head -n 60 models-mnt/wikitext/wikitext-2-raw/wiki.test.raw > models-mnt/wikitext/wikitext-2-raw/wiki.test-60.raw + + path_models="../models-mnt/pythia/1.4B" + path_wiki="../models-mnt/wikitext/wikitext-2-raw" + + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + + set -e + + (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + + python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf + + model_f16="${path_models}/ggml-model-f16.gguf" + model_q8_0="${path_models}/ggml-model-q8_0.gguf" + model_q4_0="${path_models}/ggml-model-q4_0.gguf" + model_q4_1="${path_models}/ggml-model-q4_1.gguf" + model_q5_0="${path_models}/ggml-model-q5_0.gguf" + model_q5_1="${path_models}/ggml-model-q5_1.gguf" + model_q2_k="${path_models}/ggml-model-q2_k.gguf" + model_q3_k="${path_models}/ggml-model-q3_k.gguf" + model_q4_k="${path_models}/ggml-model-q4_k.gguf" + model_q5_k="${path_models}/ggml-model-q5_k.gguf" + model_q6_k="${path_models}/ggml-model-q6_k.gguf" + + wiki_test_60="${path_wiki}/wiki.test-60.raw" + + ./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0 + ./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0 + ./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1 + ./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0 + ./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1 + ./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k + ./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k + ./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k + ./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k + ./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k + + (time ./bin/llama-cli --model ${model_f16} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/llama-cli --model ${model_q8_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + (time ./bin/llama-cli --model ${model_q4_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log + (time ./bin/llama-cli --model ${model_q4_1} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log + (time ./bin/llama-cli --model ${model_q5_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log + (time ./bin/llama-cli --model ${model_q5_1} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log + (time ./bin/llama-cli --model ${model_q2_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log + (time ./bin/llama-cli --model ${model_q3_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log + (time ./bin/llama-cli --model ${model_q4_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log + (time ./bin/llama-cli --model ${model_q5_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log + (time ./bin/llama-cli --model ${model_q6_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + + (time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + (time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log + (time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log + (time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log + (time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log + (time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log + (time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log + (time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log + (time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log + (time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + + (time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log + + (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + + function check_ppl { + qnt="$1" + ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) + + if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then + printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl" + return 20 + fi + + printf ' - %s @ %s OK\n' "$qnt" "$ppl" + return 0 + } + + check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + #check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log # note: ppl > 20.0 for this quant and model + check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + + cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log + + set +e +} + +function gg_sum_pythia_1_4b { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Pythia 1.4B:\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)" + gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)" + gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)" + gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)" + gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)" + gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)" + gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)" + gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)" + gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)" + gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)" + gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)" + gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)" + gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)" + gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)" +} + +# pythia_2_8b + +function gg_run_pythia_2_8b { + cd ${SRC} + + gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/config.json + gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/tokenizer.json + gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/tokenizer_config.json + gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/special_tokens_map.json + gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/resolve/main/pytorch_model.bin + + gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip + unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/ + + path_models="../models-mnt/pythia/2.8B" + path_wiki="../models-mnt/wikitext/wikitext-2-raw" + + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + + set -e + + (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + + python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf + + model_f16="${path_models}/ggml-model-f16.gguf" + model_q8_0="${path_models}/ggml-model-q8_0.gguf" + model_q4_0="${path_models}/ggml-model-q4_0.gguf" + model_q4_1="${path_models}/ggml-model-q4_1.gguf" + model_q5_0="${path_models}/ggml-model-q5_0.gguf" + model_q5_1="${path_models}/ggml-model-q5_1.gguf" + model_q2_k="${path_models}/ggml-model-q2_k.gguf" + model_q3_k="${path_models}/ggml-model-q3_k.gguf" + model_q4_k="${path_models}/ggml-model-q4_k.gguf" + model_q5_k="${path_models}/ggml-model-q5_k.gguf" + model_q6_k="${path_models}/ggml-model-q6_k.gguf" + + wiki_test="${path_wiki}/wiki.test.raw" + + ./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0 + ./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0 + ./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1 + ./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0 + ./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1 + ./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k + ./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k + ./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k + ./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k + ./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k + + (time ./bin/llama-cli --model ${model_f16} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/llama-cli --model ${model_q8_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + (time ./bin/llama-cli --model ${model_q4_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log + (time ./bin/llama-cli --model ${model_q4_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log + (time ./bin/llama-cli --model ${model_q5_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log + (time ./bin/llama-cli --model ${model_q5_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log + (time ./bin/llama-cli --model ${model_q2_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log + (time ./bin/llama-cli --model ${model_q3_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log + (time ./bin/llama-cli --model ${model_q4_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log + (time ./bin/llama-cli --model ${model_q5_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log + (time ./bin/llama-cli --model ${model_q6_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + + (time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + (time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log + (time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log + (time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log + (time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log + (time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log + (time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log + (time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log + (time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log + (time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + + (time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log + + (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + + function check_ppl { + qnt="$1" + ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) + + if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then + printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl" + return 20 + fi + + printf ' - %s @ %s OK\n' "$qnt" "$ppl" + return 0 + } + + check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + #check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log # note: ppl > 20.0 for this quant and model + check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + + cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log + + set +e +} + +function gg_sum_pythia_2_8b { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Pythia 2.8B:\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)" + gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)" + gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)" + gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)" + gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)" + gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)" + gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)" + gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)" + gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)" + gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)" + gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)" + gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)" + gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)" + gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)" +} + +# bge-small + +function gg_run_embd_bge_small { + cd ${SRC} + + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer_config.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/special_tokens_map.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/pytorch_model.bin + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/sentence_bert_config.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/vocab.txt + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/modules.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json + + gg_wget models-mnt/bge-small/1_Pooling https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json + + path_models="../models-mnt/bge-small" + + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + + set -e + + (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + + python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf + + model_f16="${path_models}/ggml-model-f16.gguf" + model_q8_0="${path_models}/ggml-model-q8_0.gguf" + + ./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0 + + (time ./bin/llama-embedding --model ${model_f16} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/llama-embedding --model ${model_q8_0} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + + set +e +} + +function gg_sum_embd_bge_small { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'BGE Small (BERT):\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)" + gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)" +} + +# rerank_tiny + +function gg_run_rerank_tiny { + cd ${SRC} + + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer.json + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer_config.json + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/special_tokens_map.json + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/resolve/main/pytorch_model.bin + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/sentence_bert_config.json + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/vocab.txt + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/modules.json + gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json + + gg_wget models-mnt/rerank-tiny/1_Pooling https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/1_Pooling/config.json + + path_models="../models-mnt/rerank-tiny" + + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + + set -e + + (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log + + python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf + + model_f16="${path_models}/ggml-model-f16.gguf" + + # for this model, the SEP token is "" + (time ./bin/llama-embedding --model ${model_f16} -p "what is panda?hi\nwhat is panda?it's a bear\nwhat is panda?The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log + + # sample output + # rerank score 0: 0.029 + # rerank score 1: 0.029 + # rerank score 2: 0.135 + + # check that the score is in the range [$3, $4] + function check_score { + qnt="$1" + score=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) + + if [ $(echo "$score < $3" | bc) -eq 1 ] || [ $(echo "$score > $4" | bc) -eq 1 ]; then + printf ' - %s @ %s (FAIL: score not in range [%s, %s])\n' "$qnt" "$score" "$3" "$4" + return 20 + fi + + printf ' - %s @ %s OK\n' "$qnt" "$score" + return 0 + } + + check_score "rerank score 0" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 0")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log + check_score "rerank score 1" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 1")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log + check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.30" | tee -a $OUT/${ci}-rk-f16.log + + set +e +} + +function gg_sum_rerank_tiny { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Rerank Tiny (Jina):\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-rk-f16.log)" +} + +function gg_check_build_requirements { + if ! command -v cmake &> /dev/null; then + gg_printf 'cmake not found, please install' + fi + + if ! command -v make &> /dev/null; then + gg_printf 'make not found, please install' + fi + + if ! command -v ctest &> /dev/null; then + gg_printf 'ctest not found, please install' + fi +} + +## main + +export LLAMA_LOG_PREFIX=1 +export LLAMA_LOG_TIMESTAMPS=1 + +if [ -z ${GG_BUILD_LOW_PERF} ]; then + # Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt + rm -rf ${SRC}/models-mnt + mnt_models=${MNT}/models + mkdir -p ${mnt_models} + ln -sfn ${mnt_models} ${SRC}/models-mnt + + # Create a fresh python3 venv and enter it + python3 -m venv "$MNT/venv" + source "$MNT/venv/bin/activate" + + pip install -r ${SRC}/requirements.txt --disable-pip-version-check + pip install --editable gguf-py --disable-pip-version-check +fi + +ret=0 + +test $ret -eq 0 && gg_run ctest_debug +test $ret -eq 0 && gg_run ctest_release + +if [ -z ${GG_BUILD_LOW_PERF} ]; then + test $ret -eq 0 && gg_run embd_bge_small + test $ret -eq 0 && gg_run rerank_tiny + + if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then + test $ret -eq 0 && gg_run test_scripts_debug + test $ret -eq 0 && gg_run test_scripts_release + fi + + if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then + if [ -z ${GG_BUILD_CUDA} ] && [ -z ${GG_BUILD_VULKAN} ]; then + test $ret -eq 0 && gg_run pythia_1_4b + else + test $ret -eq 0 && gg_run pythia_2_8b + #test $ret -eq 0 && gg_run open_llama_7b_v2 + fi + test $ret -eq 0 && gg_run ctest_with_model_debug + test $ret -eq 0 && gg_run ctest_with_model_release + fi +fi + +exit $ret diff --git a/cmake/arm64-apple-clang.cmake b/cmake/arm64-apple-clang.cmake new file mode 100644 index 0000000000000000000000000000000000000000..5fcd2882afc9d96ccc1c28396b91e5e46c93f66c --- /dev/null +++ b/cmake/arm64-apple-clang.cmake @@ -0,0 +1,16 @@ +set( CMAKE_SYSTEM_NAME Darwin ) +set( CMAKE_SYSTEM_PROCESSOR arm64 ) + +set( target arm64-apple-darwin-macho ) + +set( CMAKE_C_COMPILER clang ) +set( CMAKE_CXX_COMPILER clang++ ) + +set( CMAKE_C_COMPILER_TARGET ${target} ) +set( CMAKE_CXX_COMPILER_TARGET ${target} ) + +set( arch_c_flags "-march=armv8.4-a -fvectorize -ffp-model=fast -fno-finite-math-only" ) +set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function" ) + +set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) +set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) diff --git a/cmake/arm64-windows-llvm.cmake b/cmake/arm64-windows-llvm.cmake new file mode 100644 index 0000000000000000000000000000000000000000..8023796800683240e32048180a1eb874ddab1963 --- /dev/null +++ b/cmake/arm64-windows-llvm.cmake @@ -0,0 +1,16 @@ +set( CMAKE_SYSTEM_NAME Windows ) +set( CMAKE_SYSTEM_PROCESSOR arm64 ) + +set( target arm64-pc-windows-msvc ) + +set( CMAKE_C_COMPILER clang ) +set( CMAKE_CXX_COMPILER clang++ ) + +set( CMAKE_C_COMPILER_TARGET ${target} ) +set( CMAKE_CXX_COMPILER_TARGET ${target} ) + +set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" ) +set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" ) + +set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) +set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) diff --git a/cmake/arm64-windows-msvc.cmake b/cmake/arm64-windows-msvc.cmake new file mode 100644 index 0000000000000000000000000000000000000000..c77631420ce848d051eb4ef774313964dd5636ed --- /dev/null +++ b/cmake/arm64-windows-msvc.cmake @@ -0,0 +1,6 @@ +set( CMAKE_SYSTEM_NAME Windows ) +set( CMAKE_SYSTEM_PROCESSOR arm64 ) + +set( target arm64-pc-windows-msvc ) +set( CMAKE_C_COMPILER_TARGET ${target} ) +set( CMAKE_CXX_COMPILER_TARGET ${target} ) diff --git a/cmake/build-info.cmake b/cmake/build-info.cmake new file mode 100644 index 0000000000000000000000000000000000000000..ea3dc55c83439a91c9289760bc17b1b0dcd69aa1 --- /dev/null +++ b/cmake/build-info.cmake @@ -0,0 +1,58 @@ +set(BUILD_NUMBER 0) +set(BUILD_COMMIT "unknown") +set(BUILD_COMPILER "unknown") +set(BUILD_TARGET "unknown") + +# Look for git +find_package(Git) +if(NOT Git_FOUND) + find_program(GIT_EXECUTABLE NAMES git git.exe) + if(GIT_EXECUTABLE) + set(Git_FOUND TRUE) + message(STATUS "Found Git: ${GIT_EXECUTABLE}") + else() + message(WARNING "Git not found. Build info will not be accurate.") + endif() +endif() + +# Get the commit count and hash +if(Git_FOUND) + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE HEAD + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE RES + ) + if (RES EQUAL 0) + set(BUILD_COMMIT ${HEAD}) + endif() + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE COUNT + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE RES + ) + if (RES EQUAL 0) + set(BUILD_NUMBER ${COUNT}) + endif() +endif() + +if(MSVC) + set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}") + set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME}) +else() + execute_process( + COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER} + OUTPUT_VARIABLE OUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + set(BUILD_COMPILER ${OUT}) + execute_process( + COMMAND ${CMAKE_C_COMPILER} -dumpmachine + OUTPUT_VARIABLE OUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + set(BUILD_TARGET ${OUT}) +endif() diff --git a/cmake/git-vars.cmake b/cmake/git-vars.cmake new file mode 100644 index 0000000000000000000000000000000000000000..1a4c24ebf6adeb1126e626f56de601621179353d --- /dev/null +++ b/cmake/git-vars.cmake @@ -0,0 +1,22 @@ +find_package(Git) + +# the commit's SHA1 +execute_process(COMMAND + "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8 + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + OUTPUT_VARIABLE GIT_SHA1 + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + +# the date of the commit +execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + OUTPUT_VARIABLE GIT_DATE + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + +# the subject of the commit +execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%s + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + OUTPUT_VARIABLE GIT_COMMIT_SUBJECT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/cmake/llama-config.cmake.in b/cmake/llama-config.cmake.in new file mode 100644 index 0000000000000000000000000000000000000000..f072b76a39d2e1589b328aa106e32285f61f4d7f --- /dev/null +++ b/cmake/llama-config.cmake.in @@ -0,0 +1,90 @@ +set(LLAMA_VERSION @LLAMA_INSTALL_VERSION@) +set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@) +set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@) +set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@) + +set(GGML_BLAS @GGML_BLAS@) +set(GGML_CUDA @GGML_CUDA@) +set(GGML_METAL @GGML_METAL@) +set(GGML_HIPBLAS @GGML_HIPBLAS@) +set(GGML_ACCELERATE @GGML_ACCELERATE@) +set(GGML_VULKAN @GGML_VULKAN@) +set(GGML_VULKAN_CHECK_RESULTS @GGML_VULKAN_CHECK_RESULTS@) +set(GGML_VULKAN_DEBUG @GGML_VULKAN_DEBUG@) +set(GGML_VULKAN_MEMORY_DEBUG @GGML_VULKAN_MEMORY_DEBUG@) +set(GGML_VULKAN_VALIDATE @GGML_VULKAN_VALIDATE@) +set(GGML_SYCL @GGML_SYCL@) +set(GGML_OPENMP @GGML_OPENMP@) + +@PACKAGE_INIT@ + +set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@") +set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@") +set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@") + +# Ensure transient dependencies satisfied + +find_package(Threads REQUIRED) + +if (APPLE AND GGML_ACCELERATE) + find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED) +endif() + +if (GGML_BLAS) + find_package(BLAS REQUIRED) +endif() + +if (GGML_CUDA) + find_package(CUDAToolkit REQUIRED) +endif() + +if (GGML_METAL) + find_library(FOUNDATION_LIBRARY Foundation REQUIRED) + find_library(METAL_FRAMEWORK Metal REQUIRED) + find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) +endif() + +if (GGML_VULKAN) + find_package(Vulkan REQUIRED) +endif() + +if (GGML_HIPBLAS) + find_package(hip REQUIRED) + find_package(hipblas REQUIRED) + find_package(rocblas REQUIRED) +endif() + +if (GGML_SYCL) + find_package(IntelSYCL REQUIRED) + find_package(MKL REQUIRED) +endif() + +if (GGML_OPENMP) + find_package(OpenMP REQUIRED) +endif() + + +find_library(ggml_LIBRARY ggml + REQUIRED + HINTS ${LLAMA_LIB_DIR}) + +find_library(llama_LIBRARY llama + REQUIRED + HINTS ${LLAMA_LIB_DIR}) + +set(_llama_link_deps "${ggml_LIBRARY}" "@GGML_LINK_LIBRARIES@") +set(_llama_transient_defines "@GGML_TRANSIENT_DEFINES@") + +add_library(llama UNKNOWN IMPORTED) + +set_target_properties(llama + PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}" + INTERFACE_LINK_LIBRARIES "${_llama_link_deps}" + INTERFACE_COMPILE_DEFINITIONS "${_llama_transient_defines}" + IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" + IMPORTED_LOCATION "${llama_LIBRARY}" + INTERFACE_COMPILE_FEATURES cxx_std_11 + POSITION_INDEPENDENT_CODE ON ) + +check_required_components(Llama) diff --git a/cmake/llama.pc.in b/cmake/llama.pc.in new file mode 100644 index 0000000000000000000000000000000000000000..326acbb6108fd02e13787371d6dbefcd511c97f8 --- /dev/null +++ b/cmake/llama.pc.in @@ -0,0 +1,10 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} +libdir=${exec_prefix}/lib +includedir=${prefix}/include + +Name: llama +Description: Port of Facebook's LLaMA model in C/C++ +Version: @PROJECT_VERSION@ +Libs: -L${libdir} -lllama +Cflags: -I${includedir} diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ab1ffa1922aa9a69aa1ae99aace11c309f13c1d --- /dev/null +++ b/common/CMakeLists.txt @@ -0,0 +1,88 @@ +# common + +find_package(Threads REQUIRED) + +# Build info header +# + +if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git") + set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../.git") + + # Is git submodule + if(NOT IS_DIRECTORY "${GIT_DIR}") + file(READ ${GIT_DIR} REAL_GIT_DIR_LINK) + string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" REAL_GIT_DIR ${REAL_GIT_DIR_LINK}) + string(FIND "${REAL_GIT_DIR}" "/" SLASH_POS) + if (SLASH_POS EQUAL 0) + set(GIT_DIR "${REAL_GIT_DIR}") + else() + set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}") + endif() + endif() + + if(EXISTS "${GIT_DIR}/index") + set(GIT_INDEX "${GIT_DIR}/index") + else() + message(WARNING "Git index not found in git repository.") + set(GIT_INDEX "") + endif() +else() + message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.") + set(GIT_INDEX "") +endif() + +# Add a custom command to rebuild build-info.cpp when .git/index changes +add_custom_command( + OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp" + COMMENT "Generating build details from Git" + COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION} + -DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake" + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.." + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX} + VERBATIM +) +set(TARGET build_info) +add_library(${TARGET} OBJECT build-info.cpp) +if (BUILD_SHARED_LIBS) + set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif() + +set(TARGET common) + +add_library(${TARGET} STATIC + arg.cpp + arg.h + base64.hpp + common.cpp + common.h + console.cpp + console.h + json-schema-to-grammar.cpp + json.hpp + log.cpp + log.h + ngram-cache.cpp + ngram-cache.h + sampling.cpp + sampling.h + ) + +if (BUILD_SHARED_LIBS) + set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif() + +set(LLAMA_COMMON_EXTRA_LIBS build_info) + +# Use curl to download model url +if (LLAMA_CURL) + find_package(CURL REQUIRED) + add_definitions(-DLLAMA_USE_CURL) + include_directories(${CURL_INCLUDE_DIRS}) + find_library(CURL_LIBRARY curl REQUIRED) + set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY}) +endif () + +target_include_directories(${TARGET} PUBLIC .) +target_compile_features (${TARGET} PUBLIC cxx_std_11) +target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads) diff --git a/common/arg.cpp b/common/arg.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7c5c5e5cd5b887f71549e734c816bd573d2003d8 --- /dev/null +++ b/common/arg.cpp @@ -0,0 +1,2052 @@ +#include "arg.h" + +#include "log.h" +#include "sampling.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "json-schema-to-grammar.h" + +using json = nlohmann::ordered_json; + +common_arg & common_arg::set_examples(std::initializer_list examples) { + this->examples = std::move(examples); + return *this; +} + +common_arg & common_arg::set_env(const char * env) { + help = help + "\n(env: " + env + ")"; + this->env = env; + return *this; +} + +common_arg & common_arg::set_sparam() { + is_sparam = true; + return *this; +} + +bool common_arg::in_example(enum llama_example ex) { + return examples.find(ex) != examples.end(); +} + +bool common_arg::get_value_from_env(std::string & output) { + if (env == nullptr) return false; + char * value = std::getenv(env); + if (value) { + output = value; + return true; + } + return false; +} + +bool common_arg::has_value_from_env() { + return env != nullptr && std::getenv(env); +} + +static std::vector break_str_into_lines(std::string input, size_t max_char_per_line) { + std::vector result; + std::istringstream iss(input); + std::string line; + auto add_line = [&](const std::string& l) { + if (l.length() <= max_char_per_line) { + result.push_back(l); + } else { + std::istringstream line_stream(l); + std::string word, current_line; + while (line_stream >> word) { + if (current_line.length() + !current_line.empty() + word.length() > max_char_per_line) { + if (!current_line.empty()) result.push_back(current_line); + current_line = word; + } else { + current_line += (!current_line.empty() ? " " : "") + word; + } + } + if (!current_line.empty()) result.push_back(current_line); + } + }; + while (std::getline(iss, line)) { + add_line(line); + } + return result; +} + +std::string common_arg::to_string() { + // params for printing to console + const static int n_leading_spaces = 40; + const static int n_char_per_line_help = 70; // TODO: detect this based on current console + std::string leading_spaces(n_leading_spaces, ' '); + + std::ostringstream ss; + for (const auto arg : args) { + if (arg == args.front()) { + if (args.size() == 1) { + ss << arg; + } else { + // first arg is usually abbreviation, we need padding to make it more beautiful + auto tmp = std::string(arg) + ", "; + auto spaces = std::string(std::max(0, 7 - (int)tmp.size()), ' '); + ss << tmp << spaces; + } + } else { + ss << arg << (arg != args.back() ? ", " : ""); + } + } + if (value_hint) ss << " " << value_hint; + if (value_hint_2) ss << " " << value_hint_2; + if (ss.tellp() > n_leading_spaces - 3) { + // current line is too long, add new line + ss << "\n" << leading_spaces; + } else { + // padding between arg and help, same line + ss << std::string(leading_spaces.size() - ss.tellp(), ' '); + } + const auto help_lines = break_str_into_lines(help, n_char_per_line_help); + for (const auto & line : help_lines) { + ss << (&line == &help_lines.front() ? "" : leading_spaces) << line << "\n"; + } + return ss.str(); +} + +// +// utils +// + +static void common_params_handle_model_default(common_params & params) { + if (!params.hf_repo.empty()) { + // short-hand to avoid specifying --hf-file -> default it to --model + if (params.hf_file.empty()) { + if (params.model.empty()) { + throw std::invalid_argument("error: --hf-repo requires either --hf-file or --model\n"); + } + params.hf_file = params.model; + } else if (params.model.empty()) { + params.model = fs_get_cache_file(string_split(params.hf_file, '/').back()); + } + } else if (!params.model_url.empty()) { + if (params.model.empty()) { + auto f = string_split(params.model_url, '#').front(); + f = string_split(f, '?').front(); + params.model = fs_get_cache_file(string_split(f, '/').back()); + } + } else if (params.model.empty()) { + params.model = DEFAULT_MODEL_PATH; + } +} + +// +// CLI argument parsing functions +// + +static bool common_params_parse_ex(int argc, char ** argv, common_params_context & ctx_arg) { + std::string arg; + const std::string arg_prefix = "--"; + common_params & params = ctx_arg.params; + + std::unordered_map arg_to_options; + for (auto & opt : ctx_arg.options) { + for (const auto & arg : opt.args) { + arg_to_options[arg] = &opt; + } + } + + // handle environment variables + for (auto & opt : ctx_arg.options) { + std::string value; + if (opt.get_value_from_env(value)) { + try { + if (opt.handler_void && (value == "1" || value == "true")) { + opt.handler_void(params); + } + if (opt.handler_int) { + opt.handler_int(params, std::stoi(value)); + } + if (opt.handler_string) { + opt.handler_string(params, value); + continue; + } + } catch (std::exception & e) { + throw std::invalid_argument(string_format( + "error while handling environment variable \"%s\": %s\n\n", opt.env, e.what())); + } + } + } + + // handle command line arguments + auto check_arg = [&](int i) { + if (i+1 >= argc) { + throw std::invalid_argument("expected value for argument"); + } + }; + + for (int i = 1; i < argc; i++) { + const std::string arg_prefix = "--"; + + std::string arg = argv[i]; + if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) { + std::replace(arg.begin(), arg.end(), '_', '-'); + } + if (arg_to_options.find(arg) == arg_to_options.end()) { + throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str())); + } + auto opt = *arg_to_options[arg]; + if (opt.has_value_from_env()) { + fprintf(stderr, "warn: %s environment variable is set, but will be overwritten by command line argument %s\n", opt.env, arg.c_str()); + } + try { + if (opt.handler_void) { + opt.handler_void(params); + continue; + } + + // arg with single value + check_arg(i); + std::string val = argv[++i]; + if (opt.handler_int) { + opt.handler_int(params, std::stoi(val)); + continue; + } + if (opt.handler_string) { + opt.handler_string(params, val); + continue; + } + + // arg with 2 values + check_arg(i); + std::string val2 = argv[++i]; + if (opt.handler_str_str) { + opt.handler_str_str(params, val, val2); + continue; + } + } catch (std::exception & e) { + throw std::invalid_argument(string_format( + "error while handling argument \"%s\": %s\n\n" + "usage:\n%s\n\nto show complete usage, run with -h", + arg.c_str(), e.what(), arg_to_options[arg]->to_string().c_str())); + } + } + + postprocess_cpu_params(params.cpuparams, nullptr); + postprocess_cpu_params(params.cpuparams_batch, ¶ms.cpuparams); + postprocess_cpu_params(params.draft_cpuparams, ¶ms.cpuparams); + postprocess_cpu_params(params.draft_cpuparams_batch, ¶ms.cpuparams_batch); + + if (params.prompt_cache_all && (params.interactive || params.interactive_first)) { + throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n"); + } + + common_params_handle_model_default(params); + + if (params.escape) { + string_process_escapes(params.prompt); + string_process_escapes(params.input_prefix); + string_process_escapes(params.input_suffix); + for (auto & antiprompt : params.antiprompt) { + string_process_escapes(antiprompt); + } + for (auto & seq_breaker : params.sparams.dry_sequence_breakers) { + string_process_escapes(seq_breaker); + } + } + + if (!params.kv_overrides.empty()) { + params.kv_overrides.emplace_back(); + params.kv_overrides.back().key[0] = 0; + } + + if (params.reranking && params.embedding) { + throw std::invalid_argument("error: either --embedding or --reranking can be specified, but not both"); + } + + return true; +} + +static void common_params_print_usage(common_params_context & ctx_arg) { + auto print_options = [](std::vector & options) { + for (common_arg * opt : options) { + printf("%s", opt->to_string().c_str()); + } + }; + + std::vector common_options; + std::vector sparam_options; + std::vector specific_options; + for (auto & opt : ctx_arg.options) { + // in case multiple LLAMA_EXAMPLE_* are set, we prioritize the LLAMA_EXAMPLE_* matching current example + if (opt.is_sparam) { + sparam_options.push_back(&opt); + } else if (opt.in_example(ctx_arg.ex)) { + specific_options.push_back(&opt); + } else { + common_options.push_back(&opt); + } + } + printf("----- common params -----\n\n"); + print_options(common_options); + printf("\n\n----- sampling params -----\n\n"); + print_options(sparam_options); + // TODO: maybe convert enum llama_example to string + printf("\n\n----- example-specific params -----\n\n"); + print_options(specific_options); +} + +bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) { + auto ctx_arg = common_params_parser_init(params, ex, print_usage); + const common_params params_org = ctx_arg.params; // the example can modify the default params + + try { + if (!common_params_parse_ex(argc, argv, ctx_arg)) { + ctx_arg.params = params_org; + return false; + } + if (ctx_arg.params.usage) { + common_params_print_usage(ctx_arg); + if (ctx_arg.print_usage) { + ctx_arg.print_usage(argc, argv); + } + exit(0); + } + } catch (const std::invalid_argument & ex) { + fprintf(stderr, "%s\n", ex.what()); + ctx_arg.params = params_org; + return false; + } + + return true; +} + +common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **)) { + common_params_context ctx_arg(params); + ctx_arg.print_usage = print_usage; + ctx_arg.ex = ex; + + std::string sampler_type_chars; + std::string sampler_type_names; + for (const auto & sampler : params.sparams.samplers) { + sampler_type_chars += common_sampler_type_to_chr(sampler); + sampler_type_names += common_sampler_type_to_str(sampler) + ";"; + } + sampler_type_names.pop_back(); + + + /** + * filter options by example + * rules: + * - all examples inherit options from LLAMA_EXAMPLE_COMMON + * - if LLAMA_EXAMPLE_* is set (other than COMMON), we only show the option in the corresponding example + * - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example + */ + auto add_opt = [&](common_arg arg) { + if (arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) { + ctx_arg.options.push_back(std::move(arg)); + } + }; + + + add_opt(common_arg( + {"-h", "--help", "--usage"}, + "print usage and exit", + [](common_params & params) { + params.usage = true; + } + )); + add_opt(common_arg( + {"--version"}, + "show version and build info", + [](common_params &) { + fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT); + fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET); + exit(0); + } + )); + add_opt(common_arg( + {"--verbose-prompt"}, + string_format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"), + [](common_params & params) { + params.verbose_prompt = true; + } + )); + add_opt(common_arg( + {"--no-display-prompt"}, + string_format("don't print prompt at generation (default: %s)", !params.display_prompt ? "true" : "false"), + [](common_params & params) { + params.display_prompt = false; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"-co", "--color"}, + string_format("colorise output to distinguish prompt and user input from generations (default: %s)", params.use_color ? "true" : "false"), + [](common_params & params) { + params.use_color = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL, LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP})); + add_opt(common_arg( + {"-t", "--threads"}, "N", + string_format("number of threads to use during generation (default: %d)", params.cpuparams.n_threads), + [](common_params & params, int value) { + params.cpuparams.n_threads = value; + if (params.cpuparams.n_threads <= 0) { + params.cpuparams.n_threads = std::thread::hardware_concurrency(); + } + } + ).set_env("LLAMA_ARG_THREADS")); + add_opt(common_arg( + {"-tb", "--threads-batch"}, "N", + "number of threads to use during batch and prompt processing (default: same as --threads)", + [](common_params & params, int value) { + params.cpuparams_batch.n_threads = value; + if (params.cpuparams_batch.n_threads <= 0) { + params.cpuparams_batch.n_threads = std::thread::hardware_concurrency(); + } + } + )); + add_opt(common_arg( + {"-td", "--threads-draft"}, "N", + "number of threads to use during generation (default: same as --threads)", + [](common_params & params, int value) { + params.draft_cpuparams.n_threads = value; + if (params.draft_cpuparams.n_threads <= 0) { + params.draft_cpuparams.n_threads = std::thread::hardware_concurrency(); + } + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"-tbd", "--threads-batch-draft"}, "N", + "number of threads to use during batch and prompt processing (default: same as --threads-draft)", + [](common_params & params, int value) { + params.draft_cpuparams_batch.n_threads = value; + if (params.draft_cpuparams_batch.n_threads <= 0) { + params.draft_cpuparams_batch.n_threads = std::thread::hardware_concurrency(); + } + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"-C", "--cpu-mask"}, "M", + "CPU affinity mask: arbitrarily long hex. Complements cpu-range (default: \"\")", + [](common_params & params, const std::string & mask) { + params.cpuparams.mask_valid = true; + if (!parse_cpu_mask(mask, params.cpuparams.cpumask)) { + throw std::invalid_argument("invalid cpumask"); + } + } + )); + add_opt(common_arg( + {"-Cr", "--cpu-range"}, "lo-hi", + "range of CPUs for affinity. Complements --cpu-mask", + [](common_params & params, const std::string & range) { + params.cpuparams.mask_valid = true; + if (!parse_cpu_range(range, params.cpuparams.cpumask)) { + throw std::invalid_argument("invalid range"); + } + } + )); + add_opt(common_arg( + {"--cpu-strict"}, "<0|1>", + string_format("use strict CPU placement (default: %u)\n", (unsigned) params.cpuparams.strict_cpu), + [](common_params & params, const std::string & value) { + params.cpuparams.strict_cpu = std::stoul(value); + } + )); + add_opt(common_arg( + {"--prio"}, "N", + string_format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority), + [](common_params & params, int prio) { + if (prio < 0 || prio > 3) { + throw std::invalid_argument("invalid value"); + } + params.cpuparams.priority = (enum ggml_sched_priority) prio; + } + )); + add_opt(common_arg( + {"--poll"}, "<0...100>", + string_format("use polling level to wait for work (0 - no polling, default: %u)\n", (unsigned) params.cpuparams.poll), + [](common_params & params, const std::string & value) { + params.cpuparams.poll = std::stoul(value); + } + )); + add_opt(common_arg( + {"-Cb", "--cpu-mask-batch"}, "M", + "CPU affinity mask: arbitrarily long hex. Complements cpu-range-batch (default: same as --cpu-mask)", + [](common_params & params, const std::string & mask) { + params.cpuparams_batch.mask_valid = true; + if (!parse_cpu_mask(mask, params.cpuparams_batch.cpumask)) { + throw std::invalid_argument("invalid cpumask"); + } + } + )); + add_opt(common_arg( + {"-Crb", "--cpu-range-batch"}, "lo-hi", + "ranges of CPUs for affinity. Complements --cpu-mask-batch", + [](common_params & params, const std::string & range) { + params.cpuparams_batch.mask_valid = true; + if (!parse_cpu_range(range, params.cpuparams_batch.cpumask)) { + throw std::invalid_argument("invalid range"); + } + } + )); + add_opt(common_arg( + {"--cpu-strict-batch"}, "<0|1>", + "use strict CPU placement (default: same as --cpu-strict)", + [](common_params & params, int value) { + params.cpuparams_batch.strict_cpu = value; + } + )); + add_opt(common_arg( + {"--prio-batch"}, "N", + string_format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams_batch.priority), + [](common_params & params, int prio) { + if (prio < 0 || prio > 3) { + throw std::invalid_argument("invalid value"); + } + params.cpuparams_batch.priority = (enum ggml_sched_priority) prio; + } + )); + add_opt(common_arg( + {"--poll-batch"}, "<0|1>", + "use polling to wait for work (default: same as --poll)", + [](common_params & params, int value) { + params.cpuparams_batch.poll = value; + } + )); + add_opt(common_arg( + {"-Cd", "--cpu-mask-draft"}, "M", + "Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)", + [](common_params & params, const std::string & mask) { + params.draft_cpuparams.mask_valid = true; + if (!parse_cpu_mask(mask, params.draft_cpuparams.cpumask)) { + throw std::invalid_argument("invalid cpumask"); + } + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"-Crd", "--cpu-range-draft"}, "lo-hi", + "Ranges of CPUs for affinity. Complements --cpu-mask-draft", + [](common_params & params, const std::string & range) { + params.draft_cpuparams.mask_valid = true; + if (!parse_cpu_range(range, params.draft_cpuparams.cpumask)) { + throw std::invalid_argument("invalid range"); + } + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"--cpu-strict-draft"}, "<0|1>", + "Use strict CPU placement for draft model (default: same as --cpu-strict)", + [](common_params & params, int value) { + params.draft_cpuparams.strict_cpu = value; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"--prio-draft"}, "N", + string_format("set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.draft_cpuparams.priority), + [](common_params & params, int prio) { + if (prio < 0 || prio > 3) { + throw std::invalid_argument("invalid value"); + } + params.draft_cpuparams.priority = (enum ggml_sched_priority) prio; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"--poll-draft"}, "<0|1>", + "Use polling to wait for draft model work (default: same as --poll])", + [](common_params & params, int value) { + params.draft_cpuparams.poll = value; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"-Cbd", "--cpu-mask-batch-draft"}, "M", + "Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)", + [](common_params & params, const std::string & mask) { + params.draft_cpuparams_batch.mask_valid = true; + if (!parse_cpu_mask(mask, params.draft_cpuparams_batch.cpumask)) { + throw std::invalid_argument("invalid cpumask"); + } + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"-Crbd", "--cpu-range-batch-draft"}, "lo-hi", + "Ranges of CPUs for affinity. Complements --cpu-mask-draft-batch)", + [](common_params & params, const std::string & range) { + params.draft_cpuparams_batch.mask_valid = true; + if (!parse_cpu_range(range, params.draft_cpuparams_batch.cpumask)) { + throw std::invalid_argument("invalid cpumask"); + } + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"--cpu-strict-batch-draft"}, "<0|1>", + "Use strict CPU placement for draft model (default: --cpu-strict-draft)", + [](common_params & params, int value) { + params.draft_cpuparams_batch.strict_cpu = value; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"--prio-batch-draft"}, "N", + string_format("set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.draft_cpuparams_batch.priority), + [](common_params & params, int prio) { + if (prio < 0 || prio > 3) { + throw std::invalid_argument("invalid value"); + } + params.draft_cpuparams_batch.priority = (enum ggml_sched_priority) prio; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"--poll-batch-draft"}, "<0|1>", + "Use polling to wait for draft model work (default: --poll-draft)", + [](common_params & params, int value) { + params.draft_cpuparams_batch.poll = value; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"--draft"}, "N", + string_format("number of tokens to draft for speculative decoding (default: %d)", params.n_draft), + [](common_params & params, int value) { + params.n_draft = value; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP})); + add_opt(common_arg( + {"-ps", "--p-split"}, "N", + string_format("speculative decoding split probability (default: %.1f)", (double)params.p_split), + [](common_params & params, const std::string & value) { + params.p_split = std::stof(value); + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"-lcs", "--lookup-cache-static"}, "FNAME", + "path to static lookup cache to use for lookup decoding (not updated by generation)", + [](common_params & params, const std::string & value) { + params.lookup_cache_static = value; + } + ).set_examples({LLAMA_EXAMPLE_LOOKUP})); + add_opt(common_arg( + {"-lcd", "--lookup-cache-dynamic"}, "FNAME", + "path to dynamic lookup cache to use for lookup decoding (updated by generation)", + [](common_params & params, const std::string & value) { + params.lookup_cache_dynamic = value; + } + ).set_examples({LLAMA_EXAMPLE_LOOKUP})); + add_opt(common_arg( + {"-c", "--ctx-size"}, "N", + string_format("size of the prompt context (default: %d, 0 = loaded from model)", params.n_ctx), + [](common_params & params, int value) { + params.n_ctx = value; + } + ).set_env("LLAMA_ARG_CTX_SIZE")); + add_opt(common_arg( + {"-n", "--predict", "--n-predict"}, "N", + string_format("number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)", params.n_predict), + [](common_params & params, int value) { + params.n_predict = value; + } + ).set_env("LLAMA_ARG_N_PREDICT")); + add_opt(common_arg( + {"-b", "--batch-size"}, "N", + string_format("logical maximum batch size (default: %d)", params.n_batch), + [](common_params & params, int value) { + params.n_batch = value; + } + ).set_env("LLAMA_ARG_BATCH")); + add_opt(common_arg( + {"-ub", "--ubatch-size"}, "N", + string_format("physical maximum batch size (default: %d)", params.n_ubatch), + [](common_params & params, int value) { + params.n_ubatch = value; + } + ).set_env("LLAMA_ARG_UBATCH")); + add_opt(common_arg( + {"--keep"}, "N", + string_format("number of tokens to keep from the initial prompt (default: %d, -1 = all)", params.n_keep), + [](common_params & params, int value) { + params.n_keep = value; + } + )); + add_opt(common_arg( + {"--no-context-shift"}, + string_format("disables context shift on inifinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"), + [](common_params & params) { + params.ctx_shift = false; + } + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT")); + add_opt(common_arg( + {"--chunks"}, "N", + string_format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks), + [](common_params & params, int value) { + params.n_chunks = value; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY, LLAMA_EXAMPLE_RETRIEVAL})); + add_opt(common_arg( + {"-fa", "--flash-attn"}, + string_format("enable Flash Attention (default: %s)", params.flash_attn ? "enabled" : "disabled"), + [](common_params & params) { + params.flash_attn = true; + } + ).set_env("LLAMA_ARG_FLASH_ATTN")); + add_opt(common_arg( + {"-p", "--prompt"}, "PROMPT", + ex == LLAMA_EXAMPLE_MAIN + ? "prompt to start generation with\nif -cnv is set, this will be used as system prompt" + : "prompt to start generation with", + [](common_params & params, const std::string & value) { + params.prompt = value; + } + )); + add_opt(common_arg( + {"--no-perf"}, + string_format("disable internal libllama performance timings (default: %s)", params.no_perf ? "true" : "false"), + [](common_params & params) { + params.no_perf = true; + params.sparams.no_perf = true; + } + ).set_env("LLAMA_ARG_NO_PERF")); + add_opt(common_arg( + {"-f", "--file"}, "FNAME", + "a file containing the prompt (default: none)", + [](common_params & params, const std::string & value) { + std::ifstream file(value); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); + } + // store the external file name in params + params.prompt_file = value; + std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(params.prompt)); + if (!params.prompt.empty() && params.prompt.back() == '\n') { + params.prompt.pop_back(); + } + } + )); + add_opt(common_arg( + {"--in-file"}, "FNAME", + "an input file (repeat to specify multiple files)", + [](common_params & params, const std::string & value) { + std::ifstream file(value); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); + } + params.in_files.push_back(value); + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"-bf", "--binary-file"}, "FNAME", + "binary file containing the prompt (default: none)", + [](common_params & params, const std::string & value) { + std::ifstream file(value, std::ios::binary); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); + } + // store the external file name in params + params.prompt_file = value; + std::ostringstream ss; + ss << file.rdbuf(); + params.prompt = ss.str(); + fprintf(stderr, "Read %zu bytes from binary file %s\n", params.prompt.size(), value.c_str()); + } + )); + add_opt(common_arg( + {"-e", "--escape"}, + string_format("process escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) (default: %s)", params.escape ? "true" : "false"), + [](common_params & params) { + params.escape = true; + } + )); + add_opt(common_arg( + {"--no-escape"}, + "do not process escape sequences", + [](common_params & params) { + params.escape = false; + } + )); + add_opt(common_arg( + {"-ptc", "--print-token-count"}, "N", + string_format("print token count every N tokens (default: %d)", params.n_print), + [](common_params & params, int value) { + params.n_print = value; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"--prompt-cache"}, "FNAME", + "file to cache prompt state for faster startup (default: none)", + [](common_params & params, const std::string & value) { + params.path_prompt_cache = value; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"--prompt-cache-all"}, + "if specified, saves user input and generations to cache as well\n", + [](common_params & params) { + params.prompt_cache_all = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"--prompt-cache-ro"}, + "if specified, uses the prompt cache but does not update it", + [](common_params & params) { + params.prompt_cache_ro = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"-r", "--reverse-prompt"}, "PROMPT", + "halt generation at PROMPT, return control in interactive mode\n", + [](common_params & params, const std::string & value) { + params.antiprompt.emplace_back(value); + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"-sp", "--special"}, + string_format("special tokens output enabled (default: %s)", params.special ? "true" : "false"), + [](common_params & params) { + params.special = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"-cnv", "--conversation"}, + string_format( + "run in conversation mode:\n" + "- does not print special tokens and suffix/prefix\n" + "- interactive mode is also enabled\n" + "(default: %s)", + params.conversation ? "true" : "false" + ), + [](common_params & params) { + params.conversation = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"-i", "--interactive"}, + string_format("run in interactive mode (default: %s)", params.interactive ? "true" : "false"), + [](common_params & params) { + params.interactive = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"-if", "--interactive-first"}, + string_format("run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false"), + [](common_params & params) { + params.interactive_first = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"-mli", "--multiline-input"}, + "allows you to write or paste multiple lines without ending each in '\\'", + [](common_params & params) { + params.multiline_input = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"--in-prefix-bos"}, + "prefix BOS to user inputs, preceding the `--in-prefix` string", + [](common_params & params) { + params.input_prefix_bos = true; + params.enable_chat_template = false; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"--in-prefix"}, "STRING", + "string to prefix user inputs with (default: empty)", + [](common_params & params, const std::string & value) { + params.input_prefix = value; + params.enable_chat_template = false; + } + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL})); + add_opt(common_arg( + {"--in-suffix"}, "STRING", + "string to suffix after user inputs with (default: empty)", + [](common_params & params, const std::string & value) { + params.input_suffix = value; + params.enable_chat_template = false; + } + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL})); + add_opt(common_arg( + {"--no-warmup"}, + "skip warming up the model with an empty run", + [](common_params & params) { + params.warmup = false; + } + ).set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"--spm-infill"}, + string_format( + "use Suffix/Prefix/Middle pattern for infill (instead of Prefix/Suffix/Middle) as some models prefer this. (default: %s)", + params.spm_infill ? "enabled" : "disabled" + ), + [](common_params & params) { + params.spm_infill = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_INFILL})); + add_opt(common_arg( + {"--samplers"}, "SAMPLERS", + string_format("samplers that will be used for generation in the order, separated by \';\'\n(default: %s)", sampler_type_names.c_str()), + [](common_params & params, const std::string & value) { + const auto sampler_names = string_split(value, ';'); + params.sparams.samplers = common_sampler_types_from_names(sampler_names, true); + } + ).set_sparam()); + add_opt(common_arg( + {"-s", "--seed"}, "SEED", + string_format("RNG seed (default: %d, use random seed for %d)", params.sparams.seed, LLAMA_DEFAULT_SEED), + [](common_params & params, const std::string & value) { + params.sparams.seed = std::stoul(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--sampling-seq"}, "SEQUENCE", + string_format("simplified sequence for samplers that will be used (default: %s)", sampler_type_chars.c_str()), + [](common_params & params, const std::string & value) { + params.sparams.samplers = common_sampler_types_from_chars(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--ignore-eos"}, + "ignore end of stream token and continue generating (implies --logit-bias EOS-inf)", + [](common_params & params) { + params.sparams.ignore_eos = true; + } + ).set_sparam()); + add_opt(common_arg( + {"--penalize-nl"}, + string_format("penalize newline tokens (default: %s)", params.sparams.penalize_nl ? "true" : "false"), + [](common_params & params) { + params.sparams.penalize_nl = true; + } + ).set_sparam()); + add_opt(common_arg( + {"--temp"}, "N", + string_format("temperature (default: %.1f)", (double)params.sparams.temp), + [](common_params & params, const std::string & value) { + params.sparams.temp = std::stof(value); + params.sparams.temp = std::max(params.sparams.temp, 0.0f); + } + ).set_sparam()); + add_opt(common_arg( + {"--top-k"}, "N", + string_format("top-k sampling (default: %d, 0 = disabled)", params.sparams.top_k), + [](common_params & params, int value) { + params.sparams.top_k = value; + } + ).set_sparam()); + add_opt(common_arg( + {"--top-p"}, "N", + string_format("top-p sampling (default: %.1f, 1.0 = disabled)", (double)params.sparams.top_p), + [](common_params & params, const std::string & value) { + params.sparams.top_p = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--min-p"}, "N", + string_format("min-p sampling (default: %.1f, 0.0 = disabled)", (double)params.sparams.min_p), + [](common_params & params, const std::string & value) { + params.sparams.min_p = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--xtc-probability"}, "N", + string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sparams.xtc_probability), + [](common_params & params, const std::string & value) { + params.sparams.xtc_probability = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--xtc-threshold"}, "N", + string_format("xtc threshold (default: %.1f, 1.0 = disabled)", (double)params.sparams.xtc_threshold), + [](common_params & params, const std::string & value) { + params.sparams.xtc_threshold = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--typical"}, "N", + string_format("locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)", (double)params.sparams.typ_p), + [](common_params & params, const std::string & value) { + params.sparams.typ_p = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--repeat-last-n"}, "N", + string_format("last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)", params.sparams.penalty_last_n), + [](common_params & params, int value) { + params.sparams.penalty_last_n = value; + params.sparams.n_prev = std::max(params.sparams.n_prev, params.sparams.penalty_last_n); + } + ).set_sparam()); + add_opt(common_arg( + {"--repeat-penalty"}, "N", + string_format("penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)", (double)params.sparams.penalty_repeat), + [](common_params & params, const std::string & value) { + params.sparams.penalty_repeat = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--presence-penalty"}, "N", + string_format("repeat alpha presence penalty (default: %.1f, 0.0 = disabled)", (double)params.sparams.penalty_present), + [](common_params & params, const std::string & value) { + params.sparams.penalty_present = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--frequency-penalty"}, "N", + string_format("repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)", (double)params.sparams.penalty_freq), + [](common_params & params, const std::string & value) { + params.sparams.penalty_freq = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--dry-multiplier"}, "N", + string_format("set DRY sampling multiplier (default: %.1f, 0.0 = disabled)", (double)params.sparams.dry_multiplier), + [](common_params & params, const std::string & value) { + params.sparams.dry_multiplier = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--dry-base"}, "N", + string_format("set DRY sampling base value (default: %.2f)", (double)params.sparams.dry_base), + [](common_params & params, const std::string & value) { + float potential_base = std::stof(value); + if (potential_base >= 1.0f) + { + params.sparams.dry_base = potential_base; + } + } + ).set_sparam()); + add_opt(common_arg( + {"--dry-allowed-length"}, "N", + string_format("set allowed length for DRY sampling (default: %d)", params.sparams.dry_allowed_length), + [](common_params & params, int value) { + params.sparams.dry_allowed_length = value; + } + ).set_sparam()); + add_opt(common_arg( + {"--dry-penalty-last-n"}, "N", + string_format("set DRY penalty for the last n tokens (default: %d, 0 = disable, -1 = context size)", params.sparams.dry_penalty_last_n), + [](common_params & params, int value) { + params.sparams.dry_penalty_last_n = value; + } + ).set_sparam()); + add_opt(common_arg( + {"--dry-sequence-breaker"}, "STRING", + string_format("add sequence breaker for DRY sampling, clearing out default breakers (%s) in the process; use \"none\" to not use any sequence breakers\n", + params.sparams.dry_sequence_breakers.empty() ? "none" : + std::accumulate(std::next(params.sparams.dry_sequence_breakers.begin()), + params.sparams.dry_sequence_breakers.end(), + std::string("'") + (params.sparams.dry_sequence_breakers[0] == "\n" ? "\\n" : params.sparams.dry_sequence_breakers[0]) + "'", + [](const std::string& a, const std::string& b) { + std::string formatted_b = (b == "\n") ? "\\n" : b; + return a + ", '" + formatted_b + "'"; + }).c_str()), + [](common_params & params, const std::string & value) { + static bool defaults_cleared = false; + + if (!defaults_cleared) { + params.sparams.dry_sequence_breakers.clear(); + defaults_cleared = true; + } + + if (value == "none") { + params.sparams.dry_sequence_breakers.clear(); + } else { + params.sparams.dry_sequence_breakers.emplace_back(value); + } + } + ).set_sparam()); + add_opt(common_arg( + {"--dynatemp-range"}, "N", + string_format("dynamic temperature range (default: %.1f, 0.0 = disabled)", (double)params.sparams.dynatemp_range), + [](common_params & params, const std::string & value) { + params.sparams.dynatemp_range = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--dynatemp-exp"}, "N", + string_format("dynamic temperature exponent (default: %.1f)", (double)params.sparams.dynatemp_exponent), + [](common_params & params, const std::string & value) { + params.sparams.dynatemp_exponent = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--mirostat"}, "N", + string_format("use Mirostat sampling.\nTop K, Nucleus and Locally Typical samplers are ignored if used.\n" + "(default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)", params.sparams.mirostat), + [](common_params & params, int value) { + params.sparams.mirostat = value; + } + ).set_sparam()); + add_opt(common_arg( + {"--mirostat-lr"}, "N", + string_format("Mirostat learning rate, parameter eta (default: %.1f)", (double)params.sparams.mirostat_eta), + [](common_params & params, const std::string & value) { + params.sparams.mirostat_eta = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"--mirostat-ent"}, "N", + string_format("Mirostat target entropy, parameter tau (default: %.1f)", (double)params.sparams.mirostat_tau), + [](common_params & params, const std::string & value) { + params.sparams.mirostat_tau = std::stof(value); + } + ).set_sparam()); + add_opt(common_arg( + {"-l", "--logit-bias"}, "TOKEN_ID(+/-)BIAS", + "modifies the likelihood of token appearing in the completion,\n" + "i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n" + "or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'", + [](common_params & params, const std::string & value) { + std::stringstream ss(value); + llama_token key; + char sign; + std::string value_str; + try { + if (ss >> key && ss >> sign && std::getline(ss, value_str) && (sign == '+' || sign == '-')) { + const float bias = std::stof(value_str) * ((sign == '-') ? -1.0f : 1.0f); + params.sparams.logit_bias.push_back({key, bias}); + } else { + throw std::invalid_argument("invalid input format"); + } + } catch (const std::exception&) { + throw std::invalid_argument("invalid input format"); + } + } + ).set_sparam()); + add_opt(common_arg( + {"--grammar"}, "GRAMMAR", + string_format("BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '%s')", params.sparams.grammar.c_str()), + [](common_params & params, const std::string & value) { + params.sparams.grammar = value; + } + ).set_sparam()); + add_opt(common_arg( + {"--grammar-file"}, "FNAME", + "file to read grammar from", + [](common_params & params, const std::string & value) { + std::ifstream file(value); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); + } + std::copy( + std::istreambuf_iterator(file), + std::istreambuf_iterator(), + std::back_inserter(params.sparams.grammar) + ); + } + ).set_sparam()); + add_opt(common_arg( + {"-j", "--json-schema"}, "SCHEMA", + "JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object\nFor schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead", + [](common_params & params, const std::string & value) { + params.sparams.grammar = json_schema_to_grammar(json::parse(value)); + } + ).set_sparam()); + add_opt(common_arg( + {"--pooling"}, "{none,mean,cls,last,rank}", + "pooling type for embeddings, use model default if unspecified", + [](common_params & params, const std::string & value) { + /**/ if (value == "none") { params.pooling_type = LLAMA_POOLING_TYPE_NONE; } + else if (value == "mean") { params.pooling_type = LLAMA_POOLING_TYPE_MEAN; } + else if (value == "cls") { params.pooling_type = LLAMA_POOLING_TYPE_CLS; } + else if (value == "last") { params.pooling_type = LLAMA_POOLING_TYPE_LAST; } + else if (value == "rank") { params.pooling_type = LLAMA_POOLING_TYPE_RANK; } + else { throw std::invalid_argument("invalid value"); } + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_POOLING")); + add_opt(common_arg( + {"--attention"}, "{causal,non-causal}", + "attention type for embeddings, use model default if unspecified", + [](common_params & params, const std::string & value) { + /**/ if (value == "causal") { params.attention_type = LLAMA_ATTENTION_TYPE_CAUSAL; } + else if (value == "non-causal") { params.attention_type = LLAMA_ATTENTION_TYPE_NON_CAUSAL; } + else { throw std::invalid_argument("invalid value"); } + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING})); + add_opt(common_arg( + {"--rope-scaling"}, "{none,linear,yarn}", + "RoPE frequency scaling method, defaults to linear unless specified by the model", + [](common_params & params, const std::string & value) { + /**/ if (value == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; } + else if (value == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR; } + else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; } + else { throw std::invalid_argument("invalid value"); } + } + ).set_env("LLAMA_ARG_ROPE_SCALING_TYPE")); + add_opt(common_arg( + {"--rope-scale"}, "N", + "RoPE context scaling factor, expands context by a factor of N", + [](common_params & params, const std::string & value) { + params.rope_freq_scale = 1.0f / std::stof(value); + } + ).set_env("LLAMA_ARG_ROPE_SCALE")); + add_opt(common_arg( + {"--rope-freq-base"}, "N", + "RoPE base frequency, used by NTK-aware scaling (default: loaded from model)", + [](common_params & params, const std::string & value) { + params.rope_freq_base = std::stof(value); + } + ).set_env("LLAMA_ARG_ROPE_FREQ_BASE")); + add_opt(common_arg( + {"--rope-freq-scale"}, "N", + "RoPE frequency scaling factor, expands context by a factor of 1/N", + [](common_params & params, const std::string & value) { + params.rope_freq_scale = std::stof(value); + } + ).set_env("LLAMA_ARG_ROPE_FREQ_SCALE")); + add_opt(common_arg( + {"--yarn-orig-ctx"}, "N", + string_format("YaRN: original context size of model (default: %d = model training context size)", params.yarn_orig_ctx), + [](common_params & params, int value) { + params.yarn_orig_ctx = value; + } + ).set_env("LLAMA_ARG_YARN_ORIG_CTX")); + add_opt(common_arg( + {"--yarn-ext-factor"}, "N", + string_format("YaRN: extrapolation mix factor (default: %.1f, 0.0 = full interpolation)", (double)params.yarn_ext_factor), + [](common_params & params, const std::string & value) { + params.yarn_ext_factor = std::stof(value); + } + ).set_env("LLAMA_ARG_YARN_EXT_FACTOR")); + add_opt(common_arg( + {"--yarn-attn-factor"}, "N", + string_format("YaRN: scale sqrt(t) or attention magnitude (default: %.1f)", (double)params.yarn_attn_factor), + [](common_params & params, const std::string & value) { + params.yarn_attn_factor = std::stof(value); + } + ).set_env("LLAMA_ARG_YARN_ATTN_FACTOR")); + add_opt(common_arg( + {"--yarn-beta-slow"}, "N", + string_format("YaRN: high correction dim or alpha (default: %.1f)", (double)params.yarn_beta_slow), + [](common_params & params, const std::string & value) { + params.yarn_beta_slow = std::stof(value); + } + ).set_env("LLAMA_ARG_YARN_BETA_SLOW")); + add_opt(common_arg( + {"--yarn-beta-fast"}, "N", + string_format("YaRN: low correction dim or beta (default: %.1f)", (double)params.yarn_beta_fast), + [](common_params & params, const std::string & value) { + params.yarn_beta_fast = std::stof(value); + } + ).set_env("LLAMA_ARG_YARN_BETA_FAST")); + add_opt(common_arg( + {"-gan", "--grp-attn-n"}, "N", + string_format("group-attention factor (default: %d)", params.grp_attn_n), + [](common_params & params, int value) { + params.grp_attn_n = value; + } + ).set_env("LLAMA_ARG_GRP_ATTN_N").set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_PASSKEY})); + add_opt(common_arg( + {"-gaw", "--grp-attn-w"}, "N", + string_format("group-attention width (default: %d)", params.grp_attn_w), + [](common_params & params, int value) { + params.grp_attn_w = value; + } + ).set_env("LLAMA_ARG_GRP_ATTN_W").set_examples({LLAMA_EXAMPLE_MAIN})); + add_opt(common_arg( + {"-dkvc", "--dump-kv-cache"}, + "verbose print of the KV cache", + [](common_params & params) { + params.dump_kv_cache = true; + } + )); + add_opt(common_arg( + {"-nkvo", "--no-kv-offload"}, + "disable KV offload", + [](common_params & params) { + params.no_kv_offload = true; + } + ).set_env("LLAMA_ARG_NO_KV_OFFLOAD")); + add_opt(common_arg( + {"-ctk", "--cache-type-k"}, "TYPE", + string_format("KV cache data type for K (default: %s)", params.cache_type_k.c_str()), + [](common_params & params, const std::string & value) { + // TODO: get the type right here + params.cache_type_k = value; + } + ).set_env("LLAMA_ARG_CACHE_TYPE_K")); + add_opt(common_arg( + {"-ctv", "--cache-type-v"}, "TYPE", + string_format("KV cache data type for V (default: %s)", params.cache_type_v.c_str()), + [](common_params & params, const std::string & value) { + // TODO: get the type right here + params.cache_type_v = value; + } + ).set_env("LLAMA_ARG_CACHE_TYPE_V")); + add_opt(common_arg( + {"--perplexity", "--all-logits"}, + string_format("return logits for all tokens in the batch (default: %s)", params.logits_all ? "true" : "false"), + [](common_params & params) { + params.logits_all = true; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--hellaswag"}, + "compute HellaSwag score over random tasks from datafile supplied with -f", + [](common_params & params) { + params.hellaswag = true; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--hellaswag-tasks"}, "N", + string_format("number of tasks to use when computing the HellaSwag score (default: %zu)", params.hellaswag_tasks), + [](common_params & params, int value) { + params.hellaswag_tasks = value; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--winogrande"}, + "compute Winogrande score over random tasks from datafile supplied with -f", + [](common_params & params) { + params.winogrande = true; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--winogrande-tasks"}, "N", + string_format("number of tasks to use when computing the Winogrande score (default: %zu)", params.winogrande_tasks), + [](common_params & params, int value) { + params.winogrande_tasks = value; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--multiple-choice"}, + "compute multiple choice score over random tasks from datafile supplied with -f", + [](common_params & params) { + params.multiple_choice = true; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--multiple-choice-tasks"}, "N", + string_format("number of tasks to use when computing the multiple choice score (default: %zu)", params.multiple_choice_tasks), + [](common_params & params, int value) { + params.multiple_choice_tasks = value; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--kl-divergence"}, + "computes KL-divergence to logits provided via --kl-divergence-base", + [](common_params & params) { + params.kl_divergence = true; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--save-all-logits", "--kl-divergence-base"}, "FNAME", + "set logits file", + [](common_params & params, const std::string & value) { + params.logits_file = value; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--ppl-stride"}, "N", + string_format("stride for perplexity calculation (default: %d)", params.ppl_stride), + [](common_params & params, int value) { + params.ppl_stride = value; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"--ppl-output-type"}, "<0|1>", + string_format("output type for perplexity calculation (default: %d)", params.ppl_output_type), + [](common_params & params, int value) { + params.ppl_output_type = value; + } + ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); + add_opt(common_arg( + {"-dt", "--defrag-thold"}, "N", + string_format("KV cache defragmentation threshold (default: %.1f, < 0 - disabled)", (double)params.defrag_thold), + [](common_params & params, const std::string & value) { + params.defrag_thold = std::stof(value); + } + ).set_env("LLAMA_ARG_DEFRAG_THOLD")); + add_opt(common_arg( + {"-np", "--parallel"}, "N", + string_format("number of parallel sequences to decode (default: %d)", params.n_parallel), + [](common_params & params, int value) { + params.n_parallel = value; + } + ).set_env("LLAMA_ARG_N_PARALLEL")); + add_opt(common_arg( + {"-ns", "--sequences"}, "N", + string_format("number of sequences to decode (default: %d)", params.n_sequences), + [](common_params & params, int value) { + params.n_sequences = value; + } + ).set_examples({LLAMA_EXAMPLE_PARALLEL})); + add_opt(common_arg( + {"-cb", "--cont-batching"}, + string_format("enable continuous batching (a.k.a dynamic batching) (default: %s)", params.cont_batching ? "enabled" : "disabled"), + [](common_params & params) { + params.cont_batching = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CONT_BATCHING")); + add_opt(common_arg( + {"-nocb", "--no-cont-batching"}, + "disable continuous batching", + [](common_params & params) { + params.cont_batching = false; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING")); + add_opt(common_arg( + {"--mmproj"}, "FILE", + "path to a multimodal projector file for LLaVA. see examples/llava/README.md", + [](common_params & params, const std::string & value) { + params.mmproj = value; + } + ).set_examples({LLAMA_EXAMPLE_LLAVA})); + add_opt(common_arg( + {"--image"}, "FILE", + "path to an image file. use with multimodal models. Specify multiple times for batching", + [](common_params & params, const std::string & value) { + params.image.emplace_back(value); + } + ).set_examples({LLAMA_EXAMPLE_LLAVA})); + if (llama_supports_rpc()) { + add_opt(common_arg( + {"--rpc"}, "SERVERS", + "comma separated list of RPC servers", + [](common_params & params, const std::string & value) { + params.rpc_servers = value; + } + ).set_env("LLAMA_ARG_RPC")); + } + add_opt(common_arg( + {"--mlock"}, + "force system to keep model in RAM rather than swapping or compressing", + [](common_params & params) { + params.use_mlock = true; + } + ).set_env("LLAMA_ARG_MLOCK")); + add_opt(common_arg( + {"--no-mmap"}, + "do not memory-map model (slower load but may reduce pageouts if not using mlock)", + [](common_params & params) { + params.use_mmap = false; + } + ).set_env("LLAMA_ARG_NO_MMAP")); + add_opt(common_arg( + {"--numa"}, "TYPE", + "attempt optimizations that help on some NUMA systems\n" + "- distribute: spread execution evenly over all nodes\n" + "- isolate: only spawn threads on CPUs on the node that execution started on\n" + "- numactl: use the CPU map provided by numactl\n" + "if run without this previously, it is recommended to drop the system page cache before using this\n" + "see https://github.com/ggerganov/llama.cpp/issues/1437", + [](common_params & params, const std::string & value) { + /**/ if (value == "distribute" || value == "") { params.numa = GGML_NUMA_STRATEGY_DISTRIBUTE; } + else if (value == "isolate") { params.numa = GGML_NUMA_STRATEGY_ISOLATE; } + else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; } + else { throw std::invalid_argument("invalid value"); } + } + ).set_env("LLAMA_ARG_NUMA")); + add_opt(common_arg( + {"-ngl", "--gpu-layers", "--n-gpu-layers"}, "N", + "number of layers to store in VRAM", + [](common_params & params, int value) { + params.n_gpu_layers = value; + if (!llama_supports_gpu_offload()) { + fprintf(stderr, "warning: not compiled with GPU offload support, --gpu-layers option will be ignored\n"); + fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); + } + } + ).set_env("LLAMA_ARG_N_GPU_LAYERS")); + add_opt(common_arg( + {"-ngld", "--gpu-layers-draft", "--n-gpu-layers-draft"}, "N", + "number of layers to store in VRAM for the draft model", + [](common_params & params, int value) { + params.n_gpu_layers_draft = value; + if (!llama_supports_gpu_offload()) { + fprintf(stderr, "warning: not compiled with GPU offload support, --gpu-layers-draft option will be ignored\n"); + fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); + } + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"-sm", "--split-mode"}, "{none,layer,row}", + "how to split the model across multiple GPUs, one of:\n" + "- none: use one GPU only\n" + "- layer (default): split layers and KV across GPUs\n" + "- row: split rows across GPUs", + [](common_params & params, const std::string & value) { + std::string arg_next = value; + if (arg_next == "none") { + params.split_mode = LLAMA_SPLIT_MODE_NONE; + } else if (arg_next == "layer") { + params.split_mode = LLAMA_SPLIT_MODE_LAYER; + } else if (arg_next == "row") { +#ifdef GGML_USE_SYCL + fprintf(stderr, "warning: The split mode value:[row] is not supported by llama.cpp with SYCL. It's developing.\nExit!\n"); + exit(1); +#endif // GGML_USE_SYCL + params.split_mode = LLAMA_SPLIT_MODE_ROW; + } else { + throw std::invalid_argument("invalid value"); + } + if (!llama_supports_gpu_offload()) { + fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting the split mode has no effect.\n"); + } + } + ).set_env("LLAMA_ARG_SPLIT_MODE")); + add_opt(common_arg( + {"-ts", "--tensor-split"}, "N0,N1,N2,...", + "fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1", + [](common_params & params, const std::string & value) { + std::string arg_next = value; + + // split string by , and / + const std::regex regex{ R"([,/]+)" }; + std::sregex_token_iterator it{ arg_next.begin(), arg_next.end(), regex, -1 }; + std::vector split_arg{ it, {} }; + if (split_arg.size() >= llama_max_devices()) { + throw std::invalid_argument( + string_format("got %d input configs, but system only has %d devices", (int)split_arg.size(), (int)llama_max_devices()) + ); + } + for (size_t i = 0; i < llama_max_devices(); ++i) { + if (i < split_arg.size()) { + params.tensor_split[i] = std::stof(split_arg[i]); + } else { + params.tensor_split[i] = 0.0f; + } + } + if (!llama_supports_gpu_offload()) { + fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting a tensor split has no effect.\n"); + } + } + ).set_env("LLAMA_ARG_TENSOR_SPLIT")); + add_opt(common_arg( + {"-mg", "--main-gpu"}, "INDEX", + string_format("the GPU to use for the model (with split-mode = none), or for intermediate results and KV (with split-mode = row) (default: %d)", params.main_gpu), + [](common_params & params, int value) { + params.main_gpu = value; + if (!llama_supports_gpu_offload()) { + fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting the main GPU has no effect.\n"); + } + } + ).set_env("LLAMA_ARG_MAIN_GPU")); + add_opt(common_arg( + {"--check-tensors"}, + string_format("check model tensor data for invalid values (default: %s)", params.check_tensors ? "true" : "false"), + [](common_params & params) { + params.check_tensors = true; + } + )); + add_opt(common_arg( + {"--override-kv"}, "KEY=TYPE:VALUE", + "advanced option to override model metadata by key. may be specified multiple times.\n" + "types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false", + [](common_params & params, const std::string & value) { + if (!string_parse_kv_override(value.c_str(), params.kv_overrides)) { + throw std::runtime_error(string_format("error: Invalid type for KV override: %s\n", value.c_str())); + } + } + )); + add_opt(common_arg( + {"--lora"}, "FNAME", + "path to LoRA adapter (can be repeated to use multiple adapters)", + [](common_params & params, const std::string & value) { + params.lora_adapters.push_back({ std::string(value), 1.0 }); + } + // we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg + ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA})); + add_opt(common_arg( + {"--lora-scaled"}, "FNAME", "SCALE", + "path to LoRA adapter with user defined scaling (can be repeated to use multiple adapters)", + [](common_params & params, const std::string & fname, const std::string & scale) { + params.lora_adapters.push_back({ fname, std::stof(scale) }); + } + // we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg + ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA})); + add_opt(common_arg( + {"--control-vector"}, "FNAME", + "add a control vector\nnote: this argument can be repeated to add multiple control vectors", + [](common_params & params, const std::string & value) { + params.control_vectors.push_back({ 1.0f, value, }); + } + )); + add_opt(common_arg( + {"--control-vector-scaled"}, "FNAME", "SCALE", + "add a control vector with user defined scaling SCALE\n" + "note: this argument can be repeated to add multiple scaled control vectors", + [](common_params & params, const std::string & fname, const std::string & scale) { + params.control_vectors.push_back({ std::stof(scale), fname }); + } + )); + add_opt(common_arg( + {"--control-vector-layer-range"}, "START", "END", + "layer range to apply the control vector(s) to, start and end inclusive", + [](common_params & params, const std::string & start, const std::string & end) { + params.control_vector_layer_start = std::stoi(start); + params.control_vector_layer_end = std::stoi(end); + } + )); + add_opt(common_arg( + {"-a", "--alias"}, "STRING", + "set alias for model name (to be used by REST API)", + [](common_params & params, const std::string & value) { + params.model_alias = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ALIAS")); + add_opt(common_arg( + {"-m", "--model"}, "FNAME", + ex == LLAMA_EXAMPLE_EXPORT_LORA + ? std::string("model path from which to load base model") + : string_format( + "model path (default: `models/$filename` with filename from `--hf-file` " + "or `--model-url` if set, otherwise %s)", DEFAULT_MODEL_PATH + ), + [](common_params & params, const std::string & value) { + params.model = value; + } + ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}).set_env("LLAMA_ARG_MODEL")); + add_opt(common_arg( + {"-md", "--model-draft"}, "FNAME", + "draft model for speculative decoding (default: unused)", + [](common_params & params, const std::string & value) { + params.model_draft = value; + } + ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); + add_opt(common_arg( + {"-mu", "--model-url"}, "MODEL_URL", + "model download url (default: unused)", + [](common_params & params, const std::string & value) { + params.model_url = value; + } + ).set_env("LLAMA_ARG_MODEL_URL")); + add_opt(common_arg( + {"-hfr", "--hf-repo"}, "REPO", + "Hugging Face model repository (default: unused)", + [](common_params & params, const std::string & value) { + params.hf_repo = value; + } + ).set_env("LLAMA_ARG_HF_REPO")); + add_opt(common_arg( + {"-hff", "--hf-file"}, "FILE", + "Hugging Face model file (default: unused)", + [](common_params & params, const std::string & value) { + params.hf_file = value; + } + ).set_env("LLAMA_ARG_HF_FILE")); + add_opt(common_arg( + {"-hft", "--hf-token"}, "TOKEN", + "Hugging Face access token (default: value from HF_TOKEN environment variable)", + [](common_params & params, const std::string & value) { + params.hf_token = value; + } + ).set_env("HF_TOKEN")); + add_opt(common_arg( + {"--context-file"}, "FNAME", + "file to load context from (repeat to specify multiple files)", + [](common_params & params, const std::string & value) { + std::ifstream file(value, std::ios::binary); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); + } + params.context_files.push_back(value); + } + ).set_examples({LLAMA_EXAMPLE_RETRIEVAL})); + add_opt(common_arg( + {"--chunk-size"}, "N", + string_format("minimum length of embedded text chunks (default: %d)", params.chunk_size), + [](common_params & params, int value) { + params.chunk_size = value; + } + ).set_examples({LLAMA_EXAMPLE_RETRIEVAL})); + add_opt(common_arg( + {"--chunk-separator"}, "STRING", + string_format("separator between chunks (default: '%s')", params.chunk_separator.c_str()), + [](common_params & params, const std::string & value) { + params.chunk_separator = value; + } + ).set_examples({LLAMA_EXAMPLE_RETRIEVAL})); + add_opt(common_arg( + {"--junk"}, "N", + string_format("number of times to repeat the junk text (default: %d)", params.n_junk), + [](common_params & params, int value) { + params.n_junk = value; + } + ).set_examples({LLAMA_EXAMPLE_PASSKEY})); + add_opt(common_arg( + {"--pos"}, "N", + string_format("position of the passkey in the junk text (default: %d)", params.i_pos), + [](common_params & params, int value) { + params.i_pos = value; + } + ).set_examples({LLAMA_EXAMPLE_PASSKEY})); + add_opt(common_arg( + {"-o", "--output", "--output-file"}, "FNAME", + string_format("output file (default: '%s')", + ex == LLAMA_EXAMPLE_EXPORT_LORA + ? params.lora_outfile.c_str() + : ex == LLAMA_EXAMPLE_CVECTOR_GENERATOR + ? params.cvector_outfile.c_str() + : params.out_file.c_str()), + [](common_params & params, const std::string & value) { + params.out_file = value; + params.cvector_outfile = value; + params.lora_outfile = value; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA})); + add_opt(common_arg( + {"-ofreq", "--output-frequency"}, "N", + string_format("output the imatrix every N iterations (default: %d)", params.n_out_freq), + [](common_params & params, int value) { + params.n_out_freq = value; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"--save-frequency"}, "N", + string_format("save an imatrix copy every N iterations (default: %d)", params.n_save_freq), + [](common_params & params, int value) { + params.n_save_freq = value; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"--process-output"}, + string_format("collect data for the output tensor (default: %s)", params.process_output ? "true" : "false"), + [](common_params & params) { + params.process_output = true; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"--no-ppl"}, + string_format("do not compute perplexity (default: %s)", params.compute_ppl ? "true" : "false"), + [](common_params & params) { + params.compute_ppl = false; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"--chunk", "--from-chunk"}, "N", + string_format("start processing the input from chunk N (default: %d)", params.i_chunk), + [](common_params & params, int value) { + params.i_chunk = value; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"-pps"}, + string_format("is the prompt shared across parallel sequences (default: %s)", params.is_pp_shared ? "true" : "false"), + [](common_params & params) { + params.is_pp_shared = true; + } + ).set_examples({LLAMA_EXAMPLE_BENCH})); + add_opt(common_arg( + {"-npp"}, "n0,n1,...", + "number of prompt tokens", + [](common_params & params, const std::string & value) { + auto p = string_split(value, ','); + params.n_pp.insert(params.n_pp.end(), p.begin(), p.end()); + } + ).set_examples({LLAMA_EXAMPLE_BENCH})); + add_opt(common_arg( + {"-ntg"}, "n0,n1,...", + "number of text generation tokens", + [](common_params & params, const std::string & value) { + auto p = string_split(value, ','); + params.n_tg.insert(params.n_tg.end(), p.begin(), p.end()); + } + ).set_examples({LLAMA_EXAMPLE_BENCH})); + add_opt(common_arg( + {"-npl"}, "n0,n1,...", + "number of parallel prompts", + [](common_params & params, const std::string & value) { + auto p = string_split(value, ','); + params.n_pl.insert(params.n_pl.end(), p.begin(), p.end()); + } + ).set_examples({LLAMA_EXAMPLE_BENCH})); + add_opt(common_arg( + {"--embd-normalize"}, "N", + string_format("normalisation for embeddings (default: %d) (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)", params.embd_normalize), + [](common_params & params, int value) { + params.embd_normalize = value; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING})); + add_opt(common_arg( + {"--embd-output-format"}, "FORMAT", + "empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix", + [](common_params & params, const std::string & value) { + params.embd_out = value; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING})); + add_opt(common_arg( + {"--embd-separator"}, "STRING", + "separator of embeddings (default \\n) for example \"<#sep#>\"", + [](common_params & params, const std::string & value) { + params.embd_sep = value; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING})); + add_opt(common_arg( + {"--host"}, "HOST", + string_format("ip address to listen (default: %s)", params.hostname.c_str()), + [](common_params & params, const std::string & value) { + params.hostname = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_HOST")); + add_opt(common_arg( + {"--port"}, "PORT", + string_format("port to listen (default: %d)", params.port), + [](common_params & params, int value) { + params.port = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_PORT")); + add_opt(common_arg( + {"--path"}, "PATH", + string_format("path to serve static files from (default: %s)", params.public_path.c_str()), + [](common_params & params, const std::string & value) { + params.public_path = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH")); + add_opt(common_arg( + {"--embedding", "--embeddings"}, + string_format("restrict to only support embedding use case; use only with dedicated embedding models (default: %s)", params.embedding ? "enabled" : "disabled"), + [](common_params & params) { + params.embedding = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_EMBEDDINGS")); + add_opt(common_arg( + {"--reranking", "--rerank"}, + string_format("enable reranking endpoint on server (default: %s)", params.reranking ? "enabled" : "disabled"), + [](common_params & params) { + params.reranking = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_RERANKING")); + add_opt(common_arg( + {"--api-key"}, "KEY", + "API key to use for authentication (default: none)", + [](common_params & params, const std::string & value) { + params.api_keys.push_back(value); + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_API_KEY")); + add_opt(common_arg( + {"--api-key-file"}, "FNAME", + "path to file containing API keys (default: none)", + [](common_params & params, const std::string & value) { + std::ifstream key_file(value); + if (!key_file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); + } + std::string key; + while (std::getline(key_file, key)) { + if (!key.empty()) { + params.api_keys.push_back(key); + } + } + key_file.close(); + } + ).set_examples({LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"--ssl-key-file"}, "FNAME", + "path to file a PEM-encoded SSL private key", + [](common_params & params, const std::string & value) { + params.ssl_file_key = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SSL_KEY_FILE")); + add_opt(common_arg( + {"--ssl-cert-file"}, "FNAME", + "path to file a PEM-encoded SSL certificate", + [](common_params & params, const std::string & value) { + params.ssl_file_cert = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SSL_CERT_FILE")); + add_opt(common_arg( + {"-to", "--timeout"}, "N", + string_format("server read/write timeout in seconds (default: %d)", params.timeout_read), + [](common_params & params, int value) { + params.timeout_read = value; + params.timeout_write = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_TIMEOUT")); + add_opt(common_arg( + {"--threads-http"}, "N", + string_format("number of threads used to process HTTP requests (default: %d)", params.n_threads_http), + [](common_params & params, int value) { + params.n_threads_http = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_THREADS_HTTP")); + add_opt(common_arg( + {"--cache-reuse"}, "N", + string_format("min chunk size to attempt reusing from the cache via KV shifting (default: %d)", params.n_cache_reuse), + [](common_params & params, int value) { + params.n_cache_reuse = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CACHE_REUSE")); + add_opt(common_arg( + {"--metrics"}, + string_format("enable prometheus compatible metrics endpoint (default: %s)", params.endpoint_metrics ? "enabled" : "disabled"), + [](common_params & params) { + params.endpoint_metrics = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ENDPOINT_METRICS")); + add_opt(common_arg( + {"--slots"}, + string_format("enable slots monitoring endpoint (default: %s)", params.endpoint_slots ? "enabled" : "disabled"), + [](common_params & params) { + params.endpoint_slots = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ENDPOINT_SLOTS")); + add_opt(common_arg( + {"--props"}, + string_format("enable changing global properties via POST /props (default: %s)", params.endpoint_props ? "enabled" : "disabled"), + [](common_params & params) { + params.endpoint_props = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ENDPOINT_PROPS")); + add_opt(common_arg( + {"--no-slots"}, + "disables slots monitoring endpoint", + [](common_params & params) { + params.endpoint_slots = false; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_ENDPOINT_SLOTS")); + add_opt(common_arg( + {"--slot-save-path"}, "PATH", + "path to save slot kv cache (default: disabled)", + [](common_params & params, const std::string & value) { + params.slot_save_path = value; + // if doesn't end with DIRECTORY_SEPARATOR, add it + if (!params.slot_save_path.empty() && params.slot_save_path[params.slot_save_path.size() - 1] != DIRECTORY_SEPARATOR) { + params.slot_save_path += DIRECTORY_SEPARATOR; + } + } + ).set_examples({LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"--chat-template"}, "JINJA_TEMPLATE", + "set custom jinja chat template (default: template taken from model's metadata)\n" + "if suffix/prefix are specified, template will be disabled\n" + "only commonly used templates are accepted:\nhttps://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template", + [](common_params & params, const std::string & value) { + if (!common_chat_verify_template(value)) { + throw std::runtime_error(string_format( + "error: the supplied chat template is not supported: %s\n" + "note: llama.cpp does not use jinja parser, we only support commonly used templates\n", + value.c_str() + )); + } + params.chat_template = value; + } + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE")); + add_opt(common_arg( + {"-sps", "--slot-prompt-similarity"}, "SIMILARITY", + string_format("how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f, 0.0 = disabled)\n", params.slot_prompt_similarity), + [](common_params & params, const std::string & value) { + params.slot_prompt_similarity = std::stof(value); + } + ).set_examples({LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"--lora-init-without-apply"}, + string_format("load LoRA adapters without applying them (apply later via POST /lora-adapters) (default: %s)", params.lora_init_without_apply ? "enabled" : "disabled"), + [](common_params & params) { + params.lora_init_without_apply = true; + } + ).set_examples({LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"--simple-io"}, + "use basic IO for better compatibility in subprocesses and limited consoles", + [](common_params & params) { + params.simple_io = true; + } + ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL})); + add_opt(common_arg( + {"-ld", "--logdir"}, "LOGDIR", + "path under which to save YAML logs (no logging if unset)", + [](common_params & params, const std::string & value) { + params.logdir = value; + + if (params.logdir.back() != DIRECTORY_SEPARATOR) { + params.logdir += DIRECTORY_SEPARATOR; + } + } + )); + add_opt(common_arg( + {"--positive-file"}, "FNAME", + string_format("positive prompts file, one prompt per line (default: '%s')", params.cvector_positive_file.c_str()), + [](common_params & params, const std::string & value) { + params.cvector_positive_file = value; + } + ).set_examples({LLAMA_EXAMPLE_CVECTOR_GENERATOR})); + add_opt(common_arg( + {"--negative-file"}, "FNAME", + string_format("negative prompts file, one prompt per line (default: '%s')", params.cvector_negative_file.c_str()), + [](common_params & params, const std::string & value) { + params.cvector_negative_file = value; + } + ).set_examples({LLAMA_EXAMPLE_CVECTOR_GENERATOR})); + add_opt(common_arg( + {"--pca-batch"}, "N", + string_format("batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch), + [](common_params & params, int value) { + params.n_pca_batch = value; + } + ).set_examples({LLAMA_EXAMPLE_CVECTOR_GENERATOR})); + add_opt(common_arg( + {"--pca-iter"}, "N", + string_format("number of iterations used for PCA (default: %d)", params.n_pca_iterations), + [](common_params & params, int value) { + params.n_pca_iterations = value; + } + ).set_examples({LLAMA_EXAMPLE_CVECTOR_GENERATOR})); + add_opt(common_arg( + {"--method"}, "{pca, mean}", + "dimensionality reduction method to be used (default: pca)", + [](common_params & params, const std::string & value) { + /**/ if (value == "pca") { params.cvector_dimre_method = DIMRE_METHOD_PCA; } + else if (value == "mean") { params.cvector_dimre_method = DIMRE_METHOD_MEAN; } + else { throw std::invalid_argument("invalid value"); } + } + ).set_examples({LLAMA_EXAMPLE_CVECTOR_GENERATOR})); + add_opt(common_arg( + {"--output-format"}, "{md,jsonl}", + "output format for batched-bench results (default: md)", + [](common_params & params, const std::string & value) { + /**/ if (value == "jsonl") { params.batched_bench_output_jsonl = true; } + else if (value == "md") { params.batched_bench_output_jsonl = false; } + else { std::invalid_argument("invalid value"); } + } + ).set_examples({LLAMA_EXAMPLE_BENCH})); + add_opt(common_arg( + {"--log-disable"}, + "Log disable", + [](common_params &) { + common_log_pause(common_log_main()); + } + )); + add_opt(common_arg( + {"--log-file"}, "FNAME", + "Log to file", + [](common_params &, const std::string & value) { + common_log_set_file(common_log_main(), value.c_str()); + } + )); + add_opt(common_arg( + {"--log-colors"}, + "Enable colored logging", + [](common_params &) { + common_log_set_colors(common_log_main(), true); + } + ).set_env("LLAMA_LOG_COLORS")); + add_opt(common_arg( + {"-v", "--verbose", "--log-verbose"}, + "Set verbosity level to infinity (i.e. log all messages, useful for debugging)", + [](common_params & params) { + params.verbosity = INT_MAX; + common_log_set_verbosity_thold(INT_MAX); + } + )); + add_opt(common_arg( + {"-lv", "--verbosity", "--log-verbosity"}, "N", + "Set the verbosity threshold. Messages with a higher verbosity will be ignored.", + [](common_params & params, int value) { + params.verbosity = value; + common_log_set_verbosity_thold(value); + } + ).set_env("LLAMA_LOG_VERBOSITY")); + add_opt(common_arg( + {"--log-prefix"}, + "Enable prefx in log messages", + [](common_params &) { + common_log_set_prefix(common_log_main(), true); + } + ).set_env("LLAMA_LOG_PREFIX")); + add_opt(common_arg( + {"--log-timestamps"}, + "Enable timestamps in log messages", + [](common_params &) { + common_log_set_timestamps(common_log_main(), true); + } + ).set_env("LLAMA_LOG_TIMESTAMPS")); + + return ctx_arg; +} diff --git a/common/arg.h b/common/arg.h new file mode 100644 index 0000000000000000000000000000000000000000..a6700d323cc14b54b4ef5b2445d29e4baec19594 --- /dev/null +++ b/common/arg.h @@ -0,0 +1,77 @@ +#pragma once + +#include "common.h" + +#include +#include +#include + +// +// CLI argument parsing +// + +struct common_arg { + std::set examples = {LLAMA_EXAMPLE_COMMON}; + std::vector args; + const char * value_hint = nullptr; // help text or example for arg value + const char * value_hint_2 = nullptr; // for second arg value + const char * env = nullptr; + std::string help; + bool is_sparam = false; // is current arg a sampling param? + void (*handler_void) (common_params & params) = nullptr; + void (*handler_string) (common_params & params, const std::string &) = nullptr; + void (*handler_str_str)(common_params & params, const std::string &, const std::string &) = nullptr; + void (*handler_int) (common_params & params, int) = nullptr; + + common_arg( + const std::initializer_list & args, + const char * value_hint, + const std::string & help, + void (*handler)(common_params & params, const std::string &) + ) : args(args), value_hint(value_hint), help(help), handler_string(handler) {} + + common_arg( + const std::initializer_list & args, + const char * value_hint, + const std::string & help, + void (*handler)(common_params & params, int) + ) : args(args), value_hint(value_hint), help(help), handler_int(handler) {} + + common_arg( + const std::initializer_list & args, + const std::string & help, + void (*handler)(common_params & params) + ) : args(args), help(help), handler_void(handler) {} + + // support 2 values for arg + common_arg( + const std::initializer_list & args, + const char * value_hint, + const char * value_hint_2, + const std::string & help, + void (*handler)(common_params & params, const std::string &, const std::string &) + ) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {} + + common_arg & set_examples(std::initializer_list examples); + common_arg & set_env(const char * env); + common_arg & set_sparam(); + bool in_example(enum llama_example ex); + bool get_value_from_env(std::string & output); + bool has_value_from_env(); + std::string to_string(); +}; + +struct common_params_context { + enum llama_example ex = LLAMA_EXAMPLE_COMMON; + common_params & params; + std::vector options; + void(*print_usage)(int, char **) = nullptr; + common_params_context(common_params & params) : params(params) {} +}; + +// parse input arguments from CLI +// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message) +bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr); + +// function to be used by test-arg-parser +common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr); diff --git a/common/base64.hpp b/common/base64.hpp new file mode 100644 index 0000000000000000000000000000000000000000..563247a6e5f7dba837c07a509026d8b36e61387c --- /dev/null +++ b/common/base64.hpp @@ -0,0 +1,392 @@ +/* +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to +*/ + +#ifndef PUBLIC_DOMAIN_BASE64_HPP_ +#define PUBLIC_DOMAIN_BASE64_HPP_ + +#include +#include +#include +#include + +class base64_error : public std::runtime_error +{ +public: + using std::runtime_error::runtime_error; +}; + +class base64 +{ +public: + enum class alphabet + { + /** the alphabet is detected automatically */ + auto_, + /** the standard base64 alphabet is used */ + standard, + /** like `standard` except that the characters `+` and `/` are replaced by `-` and `_` respectively*/ + url_filename_safe + }; + + enum class decoding_behavior + { + /** if the input is not padded, the remaining bits are ignored */ + moderate, + /** if a padding character is encounter decoding is finished */ + loose + }; + + /** + Encodes all the elements from `in_begin` to `in_end` to `out`. + + @warning The source and destination cannot overlap. The destination must be able to hold at least + `required_encode_size(std::distance(in_begin, in_end))`, otherwise the behavior depends on the output iterator. + + @tparam Input_iterator the source; the returned elements are cast to `std::uint8_t` and should not be greater than + 8 bits + @tparam Output_iterator the destination; the elements written to it are from the type `char` + @param in_begin the beginning of the source + @param in_end the ending of the source + @param out the destination iterator + @param alphabet which alphabet should be used + @returns the iterator to the next element past the last element copied + @throws see `Input_iterator` and `Output_iterator` + */ + template + static Output_iterator encode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out, + alphabet alphabet = alphabet::standard) + { + constexpr auto pad = '='; + const char* alpha = alphabet == alphabet::url_filename_safe + ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" + : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + while (in_begin != in_end) { + std::uint8_t i0 = 0, i1 = 0, i2 = 0; + + // first character + i0 = static_cast(*in_begin); + ++in_begin; + + *out = alpha[i0 >> 2 & 0x3f]; + ++out; + + // part of first character and second + if (in_begin != in_end) { + i1 = static_cast(*in_begin); + ++in_begin; + + *out = alpha[((i0 & 0x3) << 4) | (i1 >> 4 & 0x0f)]; + ++out; + } else { + *out = alpha[(i0 & 0x3) << 4]; + ++out; + + // last padding + *out = pad; + ++out; + + // last padding + *out = pad; + ++out; + + break; + } + + // part of second character and third + if (in_begin != in_end) { + i2 = static_cast(*in_begin); + ++in_begin; + + *out = alpha[((i1 & 0xf) << 2) | (i2 >> 6 & 0x03)]; + ++out; + } else { + *out = alpha[(i1 & 0xf) << 2]; + ++out; + + // last padding + *out = pad; + ++out; + + break; + } + + // rest of third + *out = alpha[i2 & 0x3f]; + ++out; + } + + return out; + } + /** + Encodes a string. + + @param str the string that should be encoded + @param alphabet which alphabet should be used + @returns the encoded base64 string + @throws see base64::encode() + */ + static std::string encode(const std::string& str, alphabet alphabet = alphabet::standard) + { + std::string result; + + result.reserve(required_encode_size(str.length()) + 1); + + encode(str.begin(), str.end(), std::back_inserter(result), alphabet); + + return result; + } + /** + Encodes a char array. + + @param buffer the char array + @param size the size of the array + @param alphabet which alphabet should be used + @returns the encoded string + */ + static std::string encode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::standard) + { + std::string result; + + result.reserve(required_encode_size(size) + 1); + + encode(buffer, buffer + size, std::back_inserter(result), alphabet); + + return result; + } + /** + Decodes all the elements from `in_begin` to `in_end` to `out`. `in_begin` may point to the same location as `out`, + in other words: inplace decoding is possible. + + @warning The destination must be able to hold at least `required_decode_size(std::distance(in_begin, in_end))`, + otherwise the behavior depends on the output iterator. + + @tparam Input_iterator the source; the returned elements are cast to `char` + @tparam Output_iterator the destination; the elements written to it are from the type `std::uint8_t` + @param in_begin the beginning of the source + @param in_end the ending of the source + @param out the destination iterator + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @returns the iterator to the next element past the last element copied + @throws base64_error depending on the set behavior + @throws see `Input_iterator` and `Output_iterator` + */ + template + static Output_iterator decode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out, + alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + //constexpr auto pad = '='; + std::uint8_t last = 0; + auto bits = 0; + + while (in_begin != in_end) { + auto c = *in_begin; + ++in_begin; + + if (c == '=') { + break; + } + + auto part = _base64_value(alphabet, c); + + // enough bits for one byte + if (bits + 6 >= 8) { + *out = (last << (8 - bits)) | (part >> (bits - 2)); + ++out; + + bits -= 2; + } else { + bits += 6; + } + + last = part; + } + + // check padding + if (behavior != decoding_behavior::loose) { + while (in_begin != in_end) { + auto c = *in_begin; + ++in_begin; + + if (c != '=') { + throw base64_error("invalid base64 character."); + } + } + } + + return out; + } + /** + Decodes a string. + + @param str the base64 encoded string + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @returns the decoded string + @throws see base64::decode() + */ + static std::string decode(const std::string& str, alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + std::string result; + + result.reserve(max_decode_size(str.length())); + + decode(str.begin(), str.end(), std::back_inserter(result), alphabet, behavior); + + return result; + } + /** + Decodes a string. + + @param buffer the base64 encoded buffer + @param size the size of the buffer + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @returns the decoded string + @throws see base64::decode() + */ + static std::string decode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + std::string result; + + result.reserve(max_decode_size(size)); + + decode(buffer, buffer + size, std::back_inserter(result), alphabet, behavior); + + return result; + } + /** + Decodes a string inplace. + + @param[in,out] str the base64 encoded string + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @throws base64::decode_inplace() + */ + static void decode_inplace(std::string& str, alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + str.resize(decode(str.begin(), str.end(), str.begin(), alphabet, behavior) - str.begin()); + } + /** + Decodes a char array inplace. + + @param[in,out] str the string array + @param size the length of the array + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @returns the pointer to the next element past the last element decoded + @throws base64::decode_inplace() + */ + static char* decode_inplace(char* str, std::size_t size, alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + return decode(str, str + size, str, alphabet, behavior); + } + /** + Returns the required decoding size for a given size. The value is calculated with the following formula: + + $$ + \lceil \frac{size}{4} \rceil \cdot 3 + $$ + + @param size the size of the encoded input + @returns the size of the resulting decoded buffer; this the absolute maximum + */ + static std::size_t max_decode_size(std::size_t size) noexcept + { + return (size / 4 + (size % 4 ? 1 : 0)) * 3; + } + /** + Returns the required encoding size for a given size. The value is calculated with the following formula: + + $$ + \lceil \frac{size}{3} \rceil \cdot 4 + $$ + + @param size the size of the decoded input + @returns the size of the resulting encoded buffer + */ + static std::size_t required_encode_size(std::size_t size) noexcept + { + return (size / 3 + (size % 3 ? 1 : 0)) * 4; + } + +private: + static std::uint8_t _base64_value(alphabet& alphabet, char c) + { + if (c >= 'A' && c <= 'Z') { + return c - 'A'; + } else if (c >= 'a' && c <= 'z') { + return c - 'a' + 26; + } else if (c >= '0' && c <= '9') { + return c - '0' + 52; + } + + // comes down to alphabet + if (alphabet == alphabet::standard) { + if (c == '+') { + return 62; + } else if (c == '/') { + return 63; + } + } else if (alphabet == alphabet::url_filename_safe) { + if (c == '-') { + return 62; + } else if (c == '_') { + return 63; + } + } // auto detect + else { + if (c == '+') { + alphabet = alphabet::standard; + + return 62; + } else if (c == '/') { + alphabet = alphabet::standard; + + return 63; + } else if (c == '-') { + alphabet = alphabet::url_filename_safe; + + return 62; + } else if (c == '_') { + alphabet = alphabet::url_filename_safe; + + return 63; + } + } + + throw base64_error("invalid base64 character."); + } +}; + +#endif // !PUBLIC_DOMAIN_BASE64_HPP_ diff --git a/common/build-info.cpp.in b/common/build-info.cpp.in new file mode 100644 index 0000000000000000000000000000000000000000..0b945aa68fff3e77b881328e4fd63f6e2f9017ee --- /dev/null +++ b/common/build-info.cpp.in @@ -0,0 +1,4 @@ +int LLAMA_BUILD_NUMBER = @BUILD_NUMBER@; +char const *LLAMA_COMMIT = "@BUILD_COMMIT@"; +char const *LLAMA_COMPILER = "@BUILD_COMPILER@"; +char const *LLAMA_BUILD_TARGET = "@BUILD_TARGET@"; diff --git a/common/common.cpp b/common/common.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c8cbaae11cf1c665984f9592cd5ac5f72677ae8d --- /dev/null +++ b/common/common.cpp @@ -0,0 +1,2104 @@ +#if defined(_MSC_VER) +#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING +#endif + +#include "common.h" +#include "log.h" +// Change JSON_ASSERT from assert() to GGML_ASSERT: +#define JSON_ASSERT GGML_ASSERT +#include "json.hpp" +#include "json-schema-to-grammar.h" +#include "llama.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__APPLE__) && defined(__MACH__) +#include +#include +#endif + +#if defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +# define NOMINMAX +#endif +#include +#include +#include +#include +#else +#include +#include +#include +#endif +#if defined(LLAMA_USE_CURL) +#include +#include +#include +#endif + +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + +#if defined(LLAMA_USE_CURL) +#ifdef __linux__ +#include +#elif defined(_WIN32) +#define PATH_MAX MAX_PATH +#else +#include +#endif +#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083 +#endif // LLAMA_USE_CURL + +using json = nlohmann::ordered_json; + +// +// CPU utils +// + +int32_t cpu_get_num_physical_cores() { +#ifdef __linux__ + // enumerate the set of thread siblings, num entries is num cores + std::unordered_set siblings; + for (uint32_t cpu=0; cpu < UINT32_MAX; ++cpu) { + std::ifstream thread_siblings("/sys/devices/system/cpu/cpu" + + std::to_string(cpu) + "/topology/thread_siblings"); + if (!thread_siblings.is_open()) { + break; // no more cpus + } + std::string line; + if (std::getline(thread_siblings, line)) { + siblings.insert(line); + } + } + if (!siblings.empty()) { + return static_cast(siblings.size()); + } +#elif defined(__APPLE__) && defined(__MACH__) + int32_t num_physical_cores; + size_t len = sizeof(num_physical_cores); + int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); + if (result == 0) { + return num_physical_cores; + } + result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0); + if (result == 0) { + return num_physical_cores; + } +#elif defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later + // TODO: windows + arm64 + mingw64 + unsigned int n_threads_win = std::thread::hardware_concurrency(); + unsigned int default_threads = n_threads_win > 0 ? (n_threads_win <= 4 ? n_threads_win : n_threads_win / 2) : 4; + + DWORD buffer_size = 0; + if (!GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &buffer_size)) { + if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + return default_threads; + } + } + + std::vector buffer(buffer_size); + if (!GetLogicalProcessorInformationEx(RelationProcessorCore, reinterpret_cast(buffer.data()), &buffer_size)) { + return default_threads; + } + + int32_t num_physical_cores = 0; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = reinterpret_cast(buffer.data()); + while (buffer_size > 0) { + if (info->Relationship == RelationProcessorCore) { + num_physical_cores += info->Processor.GroupCount; + } + buffer_size -= info->Size; + info = reinterpret_cast(reinterpret_cast(info) + info->Size); + } + + return num_physical_cores > 0 ? num_physical_cores : default_threads; +#endif + unsigned int n_threads = std::thread::hardware_concurrency(); + return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4; +} + +#if defined(__x86_64__) && defined(__linux__) && !defined(__ANDROID__) +#include + +static void cpuid(unsigned leaf, unsigned subleaf, + unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) { + __asm__("movq\t%%rbx,%%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx,%%rsi" + : "=a"(*eax), "=S"(*ebx), "=c"(*ecx), "=d"(*edx) + : "0"(leaf), "2"(subleaf)); +} + +static int pin_cpu(int cpu) { + cpu_set_t mask; + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + return pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask); +} + +static bool is_hybrid_cpu(void) { + unsigned eax, ebx, ecx, edx; + cpuid(7, 0, &eax, &ebx, &ecx, &edx); + return !!(edx & (1u << 15)); +} + +static bool is_running_on_efficiency_core(void) { + unsigned eax, ebx, ecx, edx; + cpuid(0x1a, 0, &eax, &ebx, &ecx, &edx); + int intel_atom = 0x20; + int core_type = (eax & 0xff000000u) >> 24; + return core_type == intel_atom; +} + +static int cpu_count_math_cpus(int n_cpu) { + int result = 0; + for (int cpu = 0; cpu < n_cpu; ++cpu) { + if (pin_cpu(cpu)) { + return -1; + } + if (is_running_on_efficiency_core()) { + continue; // efficiency cores harm lockstep threading + } + ++cpu; // hyperthreading isn't useful for linear algebra + ++result; + } + return result; +} + +#endif // __x86_64__ && __linux__ + +/** + * Returns number of CPUs on system that are useful for math. + */ +int32_t cpu_get_num_math() { +#if defined(__x86_64__) && defined(__linux__) && !defined(__ANDROID__) + int n_cpu = sysconf(_SC_NPROCESSORS_ONLN); + if (n_cpu < 1) { + return cpu_get_num_physical_cores(); + } + if (is_hybrid_cpu()) { + cpu_set_t affinity; + if (!pthread_getaffinity_np(pthread_self(), sizeof(affinity), &affinity)) { + int result = cpu_count_math_cpus(n_cpu); + pthread_setaffinity_np(pthread_self(), sizeof(affinity), &affinity); + if (result > 0) { + return result; + } + } + } +#endif + return cpu_get_num_physical_cores(); +} + +// Helper for setting process priority + +#if defined(_WIN32) + +bool set_process_priority(enum ggml_sched_priority prio) { + if (prio == GGML_SCHED_PRIO_NORMAL) { + return true; + } + + DWORD p = NORMAL_PRIORITY_CLASS; + switch (prio) { + case GGML_SCHED_PRIO_NORMAL: p = NORMAL_PRIORITY_CLASS; break; + case GGML_SCHED_PRIO_MEDIUM: p = ABOVE_NORMAL_PRIORITY_CLASS; break; + case GGML_SCHED_PRIO_HIGH: p = HIGH_PRIORITY_CLASS; break; + case GGML_SCHED_PRIO_REALTIME: p = REALTIME_PRIORITY_CLASS; break; + } + + if (!SetPriorityClass(GetCurrentProcess(), p)) { + LOG_WRN("failed to set process priority class %d : (%d)\n", prio, (int) GetLastError()); + return false; + } + + return true; +} + +#else // MacOS and POSIX +#include +#include + +bool set_process_priority(enum ggml_sched_priority prio) { + if (prio == GGML_SCHED_PRIO_NORMAL) { + return true; + } + + int p = 0; + switch (prio) { + case GGML_SCHED_PRIO_NORMAL: p = 0; break; + case GGML_SCHED_PRIO_MEDIUM: p = -5; break; + case GGML_SCHED_PRIO_HIGH: p = -10; break; + case GGML_SCHED_PRIO_REALTIME: p = -20; break; + } + + if (!setpriority(PRIO_PROCESS, 0, p)) { + LOG_WRN("failed to set process priority %d : %s (%d)\n", prio, strerror(errno), errno); + return false; + } + return true; +} + +#endif + +// +// CLI argument parsing +// + + +void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model) { + int32_t n_set = 0; + + if (cpuparams.n_threads < 0) { + // Assuming everything about cpuparams is invalid + if (role_model != nullptr) { + cpuparams = *role_model; + } else { + cpuparams.n_threads = cpu_get_num_math(); + } + } + + for (int32_t i = 0; i < GGML_MAX_N_THREADS; i++) { + if (cpuparams.cpumask[i]) { + n_set++; + } + } + + if (n_set && n_set < cpuparams.n_threads) { + // Not enough set bits, may experience performance issues. + LOG_WRN("Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads); + } +} + +bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THREADS]) { + size_t dash_loc = range.find('-'); + if (dash_loc == std::string::npos) { + LOG_ERR("Format of CPU range is invalid! Expected []-[].\n"); + return false; + } + + size_t start_i; + size_t end_i; + + if (dash_loc == 0) { + start_i = 0; + } else { + start_i = std::stoull(range.substr(0, dash_loc)); + if (start_i >= GGML_MAX_N_THREADS) { + LOG_ERR("Start index out of bounds!\n"); + return false; + } + } + + if (dash_loc == range.length() - 1) { + end_i = GGML_MAX_N_THREADS - 1; + } else { + end_i = std::stoull(range.substr(dash_loc + 1)); + if (end_i >= GGML_MAX_N_THREADS) { + LOG_ERR("End index out of bounds!\n"); + return false; + } + } + + for (size_t i = start_i; i <= end_i; i++) { + boolmask[i] = true; + } + + return true; +} + +bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREADS]) { + // Discard potential 0x prefix + size_t start_i = 0; + if (mask.length() >= 2 && mask.substr(0, 2) == "0x") { + start_i = 2; + } + + size_t num_digits = mask.length() - start_i; + if (num_digits > 128) num_digits = 128; + + size_t end_i = num_digits + start_i; + + for (size_t i = start_i, n = (num_digits*4 - 1); i < end_i; i++, n-=4) { + char c = mask.at(i); + int8_t id = c; + + if ((c >= '0' && c <= '9')) { + id -= '0'; + } else if (c >= 'a' && c <= 'f') { + id -= 'a' - 10; + } else if (c >= 'A' && c <= 'F') { + id -= 'A' - 10; + } else { + LOG_ERR("Invalid hex character '%c' at position %d\n", c, int32_t(i)); + return false; + } + + boolmask[ n ] = boolmask[ n ] || ((id & 8) != 0); + boolmask[n - 1] = boolmask[n - 1] || ((id & 4) != 0); + boolmask[n - 2] = boolmask[n - 2] || ((id & 2) != 0); + boolmask[n - 3] = boolmask[n - 3] || ((id & 1) != 0); + } + + return true; +} + +void common_init() { + llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { + if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) { + common_log_add(common_log_main(), level, "%s", text); + } + }, NULL); + +#ifdef NDEBUG + const char * build_type = ""; +#else + const char * build_type = " (debug)"; +#endif + + LOG_INF("build: %d (%s) with %s for %s%s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET, build_type); +} + +std::string common_params_get_system_info(const common_params & params) { + std::ostringstream os; + + os << "system_info: n_threads = " << params.cpuparams.n_threads; + if (params.cpuparams_batch.n_threads != -1) { + os << " (n_threads_batch = " << params.cpuparams_batch.n_threads << ")"; + } +#if defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later + // TODO: windows + arm64 + mingw64 + DWORD logicalProcessorCount = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS); + os << " / " << logicalProcessorCount << " | " << llama_print_system_info(); +#else + os << " / " << std::thread::hardware_concurrency() << " | " << llama_print_system_info(); +#endif + + return os.str(); +} + +// +// String utils +// + +std::string string_format(const char * fmt, ...) { + va_list ap; + va_list ap2; + va_start(ap, fmt); + va_copy(ap2, ap); + int size = vsnprintf(NULL, 0, fmt, ap); + GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT + std::vector buf(size + 1); + int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2); + GGML_ASSERT(size2 == size); + va_end(ap2); + va_end(ap); + return std::string(buf.data(), size); +} + +std::string string_strip(const std::string & str) { + size_t start = 0; + size_t end = str.size(); + while (start < end && std::isspace(str[start])) { + start++; + } + while (end > start && std::isspace(str[end - 1])) { + end--; + } + return str.substr(start, end - start); +} + +std::string string_get_sortable_timestamp() { + using clock = std::chrono::system_clock; + + const clock::time_point current_time = clock::now(); + const time_t as_time_t = clock::to_time_t(current_time); + char timestamp_no_ns[100]; + std::strftime(timestamp_no_ns, 100, "%Y_%m_%d-%H_%M_%S", std::localtime(&as_time_t)); + + const int64_t ns = std::chrono::duration_cast( + current_time.time_since_epoch() % 1000000000).count(); + char timestamp_ns[11]; + snprintf(timestamp_ns, 11, "%09" PRId64, ns); + + return std::string(timestamp_no_ns) + "." + std::string(timestamp_ns); +} + +void string_replace_all(std::string & s, const std::string & search, const std::string & replace) { + if (search.empty()) { + return; + } + std::string builder; + builder.reserve(s.length()); + size_t pos = 0; + size_t last_pos = 0; + while ((pos = s.find(search, last_pos)) != std::string::npos) { + builder.append(s, last_pos, pos - last_pos); + builder.append(replace); + last_pos = pos + search.length(); + } + builder.append(s, last_pos, std::string::npos); + s = std::move(builder); +} + +std::string string_from(bool value) { + return value ? "true" : "false"; +} + +std::string string_from(const std::vector & values) { + std::stringstream buf; + + buf << "[ "; + bool first = true; + for (auto e : values) { + if (first) { + first = false; + } else { + buf << ", "; + } + buf << std::to_string(e); + } + buf << " ]"; + + return buf.str(); +} + +std::string string_from(const struct llama_context * ctx, const std::vector & tokens) { + std::stringstream buf; + + buf << "[ "; + + bool first = true; + for (const auto & token : tokens) { + if (!first) { + buf << ", "; + } else { + first = false; + } + + auto detokenized = common_token_to_piece(ctx, token); + + detokenized.erase( + std::remove_if( + detokenized.begin(), + detokenized.end(), + [](const unsigned char c) { return !std::isprint(c); }), + detokenized.end()); + + buf << "'" << detokenized << "'" + << ":" << std::to_string(token); + } + + buf << " ]"; + + return buf.str(); +} + +std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch) { + std::stringstream buf; + + buf << "[ "; + + bool first = true; + for (int i = 0; i < batch.n_tokens; ++i) { + if (!first) { + buf << ", "; + } else { + first = false; + } + + auto detokenized = common_token_to_piece(ctx, batch.token[i]); + + detokenized.erase( + std::remove_if( + detokenized.begin(), + detokenized.end(), + [](const unsigned char c) { return !std::isprint(c); }), + detokenized.end()); + + buf << "\n" << std::to_string(i) + << ":token '" << detokenized << "'" + << ":pos " << std::to_string(batch.pos[i]) + << ":n_seq_id " << std::to_string(batch.n_seq_id[i]) + << ":seq_id " << std::to_string(batch.seq_id[i][0]) + << ":logits " << std::to_string(batch.logits[i]); + } + + buf << " ]"; + + return buf.str(); +} + +void string_process_escapes(std::string & input) { + std::size_t input_len = input.length(); + std::size_t output_idx = 0; + + for (std::size_t input_idx = 0; input_idx < input_len; ++input_idx) { + if (input[input_idx] == '\\' && input_idx + 1 < input_len) { + switch (input[++input_idx]) { + case 'n': input[output_idx++] = '\n'; break; + case 'r': input[output_idx++] = '\r'; break; + case 't': input[output_idx++] = '\t'; break; + case '\'': input[output_idx++] = '\''; break; + case '\"': input[output_idx++] = '\"'; break; + case '\\': input[output_idx++] = '\\'; break; + case 'x': + // Handle \x12, etc + if (input_idx + 2 < input_len) { + const char x[3] = { input[input_idx + 1], input[input_idx + 2], 0 }; + char *err_p = nullptr; + const long val = std::strtol(x, &err_p, 16); + if (err_p == x + 2) { + input_idx += 2; + input[output_idx++] = char(val); + break; + } + } + // fall through + default: input[output_idx++] = '\\'; + input[output_idx++] = input[input_idx]; break; + } + } else { + input[output_idx++] = input[input_idx]; + } + } + + input.resize(output_idx); +} + +bool string_parse_kv_override(const char * data, std::vector & overrides) { + const char * sep = strchr(data, '='); + if (sep == nullptr || sep - data >= 128) { + LOG_ERR("%s: malformed KV override '%s'\n", __func__, data); + return false; + } + llama_model_kv_override kvo; + std::strncpy(kvo.key, data, sep - data); + kvo.key[sep - data] = 0; + sep++; + if (strncmp(sep, "int:", 4) == 0) { + sep += 4; + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; + kvo.val_i64 = std::atol(sep); + } else if (strncmp(sep, "float:", 6) == 0) { + sep += 6; + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT; + kvo.val_f64 = std::atof(sep); + } else if (strncmp(sep, "bool:", 5) == 0) { + sep += 5; + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL; + if (std::strcmp(sep, "true") == 0) { + kvo.val_bool = true; + } else if (std::strcmp(sep, "false") == 0) { + kvo.val_bool = false; + } else { + LOG_ERR("%s: invalid boolean value for KV override '%s'\n", __func__, data); + return false; + } + } else if (strncmp(sep, "str:", 4) == 0) { + sep += 4; + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; + if (strlen(sep) > 127) { + LOG_ERR("%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data); + return false; + } + strncpy(kvo.val_str, sep, 127); + kvo.val_str[127] = '\0'; + } else { + LOG_ERR("%s: invalid type for KV override '%s'\n", __func__, data); + return false; + } + overrides.emplace_back(std::move(kvo)); + return true; +} + +// +// Filesystem utils +// + +// Validate if a filename is safe to use +// To validate a full path, split the path by the OS-specific path separator, and validate each part with this function +bool fs_validate_filename(const std::string & filename) { + if (!filename.length()) { + // Empty filename invalid + return false; + } + if (filename.length() > 255) { + // Limit at common largest possible filename on Linux filesystems + // to avoid unnecessary further validation + // (On systems with smaller limits it will be caught by the OS) + return false; + } + + std::u32string filename_utf32; + try { + std::wstring_convert, char32_t> converter; + filename_utf32 = converter.from_bytes(filename); + + // If the reverse conversion mismatches, it means overlong UTF-8 sequences were used, + // or invalid encodings were encountered. Reject such attempts + std::string filename_reencoded = converter.to_bytes(filename_utf32); + if (filename_reencoded != filename) { + return false; + } + } catch (const std::exception &) { + return false; + } + + // Check for forbidden codepoints: + // - Control characters + // - Unicode equivalents of illegal characters + // - UTF-16 surrogate pairs + // - UTF-8 replacement character + // - Byte order mark (BOM) + // - Illegal characters: / \ : * ? " < > | + for (char32_t c : filename_utf32) { + if (c <= 0x1F // Control characters (C0) + || c == 0x7F // Control characters (DEL) + || (c >= 0x80 && c <= 0x9F) // Control characters (C1) + || c == 0xFF0E // Fullwidth Full Stop (period equivalent) + || c == 0x2215 // Division Slash (forward slash equivalent) + || c == 0x2216 // Set Minus (backslash equivalent) + || (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs + || c == 0xFFFD // Replacement Character (UTF-8) + || c == 0xFEFF // Byte Order Mark (BOM) + || c == '/' || c == '\\' || c == ':' || c == '*' // Illegal characters + || c == '?' || c == '"' || c == '<' || c == '>' || c == '|') { + return false; + } + } + + // Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename + // Unicode and other whitespace is not affected, only 0x20 space + if (filename.front() == ' ' || filename.back() == ' ' || filename.back() == '.') { + return false; + } + + // Reject any ".." (currently stricter than necessary, it should be fine to just check for == ".." instead) + if (filename.find("..") != std::string::npos) { + return false; + } + + // Reject "." + if (filename == ".") { + return false; + } + + return true; +} + +// returns true if successful, false otherwise +bool fs_create_directory_with_parents(const std::string & path) { +#ifdef _WIN32 + std::wstring_convert> converter; + std::wstring wpath = converter.from_bytes(path); + + // if the path already exists, check whether it's a directory + const DWORD attributes = GetFileAttributesW(wpath.c_str()); + if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) { + return true; + } + + size_t pos_slash = 0; + + // process path from front to back, procedurally creating directories + while ((pos_slash = path.find('\\', pos_slash)) != std::string::npos) { + const std::wstring subpath = wpath.substr(0, pos_slash); + const wchar_t * test = subpath.c_str(); + + const bool success = CreateDirectoryW(test, NULL); + if (!success) { + const DWORD error = GetLastError(); + + // if the path already exists, ensure that it's a directory + if (error == ERROR_ALREADY_EXISTS) { + const DWORD attributes = GetFileAttributesW(subpath.c_str()); + if (attributes == INVALID_FILE_ATTRIBUTES || !(attributes & FILE_ATTRIBUTE_DIRECTORY)) { + return false; + } + } else { + return false; + } + } + + pos_slash += 1; + } + + return true; +#else + // if the path already exists, check whether it's a directory + struct stat info; + if (stat(path.c_str(), &info) == 0) { + return S_ISDIR(info.st_mode); + } + + size_t pos_slash = 1; // skip leading slashes for directory creation + + // process path from front to back, procedurally creating directories + while ((pos_slash = path.find('/', pos_slash)) != std::string::npos) { + const std::string subpath = path.substr(0, pos_slash); + struct stat info; + + // if the path already exists, ensure that it's a directory + if (stat(subpath.c_str(), &info) == 0) { + if (!S_ISDIR(info.st_mode)) { + return false; + } + } else { + // create parent directories + const int ret = mkdir(subpath.c_str(), 0755); + if (ret != 0) { + return false; + } + } + + pos_slash += 1; + } + + return true; +#endif // _WIN32 +} + +std::string fs_get_cache_directory() { + std::string cache_directory = ""; + auto ensure_trailing_slash = [](std::string p) { + // Make sure to add trailing slash + if (p.back() != DIRECTORY_SEPARATOR) { + p += DIRECTORY_SEPARATOR; + } + return p; + }; + if (getenv("LLAMA_CACHE")) { + cache_directory = std::getenv("LLAMA_CACHE"); + } else { +#ifdef __linux__ + if (std::getenv("XDG_CACHE_HOME")) { + cache_directory = std::getenv("XDG_CACHE_HOME"); + } else { + cache_directory = std::getenv("HOME") + std::string("/.cache/"); + } +#elif defined(__APPLE__) + cache_directory = std::getenv("HOME") + std::string("/Library/Caches/"); +#elif defined(_WIN32) + cache_directory = std::getenv("LOCALAPPDATA"); +#endif // __linux__ + cache_directory = ensure_trailing_slash(cache_directory); + cache_directory += "llama.cpp"; + } + return ensure_trailing_slash(cache_directory); +} + +std::string fs_get_cache_file(const std::string & filename) { + GGML_ASSERT(filename.find(DIRECTORY_SEPARATOR) == std::string::npos); + std::string cache_directory = fs_get_cache_directory(); + const bool success = fs_create_directory_with_parents(cache_directory); + if (!success) { + throw std::runtime_error("failed to create cache directory: " + cache_directory); + } + return cache_directory + filename; +} + + +// +// Model utils +// +struct common_init_result common_init_from_params(common_params & params) { + common_init_result iparams; + auto mparams = common_model_params_to_llama(params); + + llama_model * model = nullptr; + + if (!params.hf_repo.empty() && !params.hf_file.empty()) { + model = common_load_model_from_hf(params.hf_repo.c_str(), params.hf_file.c_str(), params.model.c_str(), params.hf_token.c_str(), mparams); + } else if (!params.model_url.empty()) { + model = common_load_model_from_url(params.model_url.c_str(), params.model.c_str(), params.hf_token.c_str(), mparams); + } else { + model = llama_load_model_from_file(params.model.c_str(), mparams); + } + + if (model == NULL) { + LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str()); + return iparams; + } + + if (params.reranking) { + bool ok = true; + + if (llama_token_bos(model) == LLAMA_TOKEN_NULL) { + LOG_WRN("%s: warning: model does not have a BOS token, reranking will not work\n", __func__); + ok = false; + } + + if (llama_token_eos(model) == LLAMA_TOKEN_NULL) { + LOG_WRN("%s: warning: model does not have an EOS token, reranking will not work\n", __func__); + ok = false; + } + + if (llama_token_sep(model) == LLAMA_TOKEN_NULL) { + LOG_WRN("%s: warning: model does not have a SEP token, reranking will not work\n", __func__); + ok = false; + } + + if (!ok) { + llama_free_model(model); + + return iparams; + } + } + + auto cparams = common_context_params_to_llama(params); + + llama_context * lctx = llama_new_context_with_model(model, cparams); + if (lctx == NULL) { + LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.c_str()); + llama_free_model(model); + return iparams; + } + + if (!params.control_vectors.empty()) { + if (params.control_vector_layer_start <= 0) params.control_vector_layer_start = 1; + if (params.control_vector_layer_end <= 0) params.control_vector_layer_end = llama_n_layer(model); + + const auto cvec = common_control_vector_load(params.control_vectors); + if (cvec.n_embd == -1) { + llama_free(lctx); + llama_free_model(model); + + return iparams; + } + + int err = llama_control_vector_apply(lctx, + cvec.data.data(), + cvec.data.size(), + cvec.n_embd, + params.control_vector_layer_start, + params.control_vector_layer_end); + if (err) { + llama_free(lctx); + llama_free_model(model); + + return iparams; + } + } + + // load and optionally apply lora adapters + for (auto & la : params.lora_adapters) { + common_lora_adapter_container loaded_la; + loaded_la.path = la.path; + loaded_la.scale = la.scale; + loaded_la.adapter = llama_lora_adapter_init(model, la.path.c_str()); + if (loaded_la.adapter == nullptr) { + LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str()); + llama_free(lctx); + llama_free_model(model); + return iparams; + } + iparams.lora_adapters.push_back(loaded_la); // copy to list of loaded adapters + } + if (!params.lora_init_without_apply) { + common_lora_adapters_apply(lctx, iparams.lora_adapters); + } + + if (params.sparams.ignore_eos && llama_token_eos(model) == LLAMA_TOKEN_NULL) { + LOG_WRN("%s: warning: model does not have an EOS token, ignoring --ignore-eos\n", __func__); + params.sparams.ignore_eos = false; + } + + if (params.warmup) { + LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__); + + std::vector tmp; + llama_token bos = llama_token_bos(model); + llama_token eos = llama_token_eos(model); + // some models (e.g. T5) don't have a BOS token + if (bos != LLAMA_TOKEN_NULL) { + tmp.push_back(bos); + } + if (eos != LLAMA_TOKEN_NULL) { + tmp.push_back(eos); + } + if (tmp.empty()) { + tmp.push_back(0); + } + + if (llama_model_has_encoder(model)) { + llama_encode(lctx, llama_batch_get_one(tmp.data(), tmp.size())); + llama_token decoder_start_token_id = llama_model_decoder_start_token(model); + if (decoder_start_token_id == -1) { + decoder_start_token_id = bos; + } + tmp.clear(); + tmp.push_back(decoder_start_token_id); + } + if (llama_model_has_decoder(model)) { + llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch))); + } + llama_kv_cache_clear(lctx); + llama_synchronize(lctx); + llama_perf_context_reset(lctx); + } + + iparams.model = model; + iparams.context = lctx; + + return iparams; +} + +void common_lora_adapters_apply(struct llama_context * ctx, std::vector & lora_adapters) { + llama_lora_adapter_clear(ctx); + for (auto & la : lora_adapters) { + if (la.scale != 0.0f) { + llama_lora_adapter_set(ctx, la.adapter, la.scale); + } + } +} + +struct llama_model_params common_model_params_to_llama(const common_params & params) { + auto mparams = llama_model_default_params(); + + if (params.n_gpu_layers != -1) { + mparams.n_gpu_layers = params.n_gpu_layers; + } + mparams.rpc_servers = params.rpc_servers.c_str(); + mparams.main_gpu = params.main_gpu; + mparams.split_mode = params.split_mode; + mparams.tensor_split = params.tensor_split; + mparams.use_mmap = params.use_mmap; + mparams.use_mlock = params.use_mlock; + mparams.check_tensors = params.check_tensors; + if (params.kv_overrides.empty()) { + mparams.kv_overrides = NULL; + } else { + GGML_ASSERT(params.kv_overrides.back().key[0] == 0 && "KV overrides not terminated with empty key"); + mparams.kv_overrides = params.kv_overrides.data(); + } + + return mparams; +} + +static ggml_type kv_cache_type_from_str(const std::string & s) { + if (s == "f32") { + return GGML_TYPE_F32; + } + if (s == "f16") { + return GGML_TYPE_F16; + } + if (s == "q8_0") { + return GGML_TYPE_Q8_0; + } + if (s == "q4_0") { + return GGML_TYPE_Q4_0; + } + if (s == "q4_1") { + return GGML_TYPE_Q4_1; + } + if (s == "iq4_nl") { + return GGML_TYPE_IQ4_NL; + } + if (s == "q5_0") { + return GGML_TYPE_Q5_0; + } + if (s == "q5_1") { + return GGML_TYPE_Q5_1; + } + + throw std::runtime_error("Unsupported cache type: " + s); +} + +struct llama_context_params common_context_params_to_llama(const common_params & params) { + auto cparams = llama_context_default_params(); + + cparams.n_ctx = params.n_ctx; + cparams.n_seq_max = params.n_parallel; + cparams.n_batch = params.n_batch; + cparams.n_ubatch = params.n_ubatch; + cparams.n_threads = params.cpuparams.n_threads; + cparams.n_threads_batch = params.cpuparams_batch.n_threads == -1 ? + params.cpuparams.n_threads : params.cpuparams_batch.n_threads; + cparams.logits_all = params.logits_all; + cparams.embeddings = params.embedding; + cparams.rope_scaling_type = params.rope_scaling_type; + cparams.rope_freq_base = params.rope_freq_base; + cparams.rope_freq_scale = params.rope_freq_scale; + cparams.yarn_ext_factor = params.yarn_ext_factor; + cparams.yarn_attn_factor = params.yarn_attn_factor; + cparams.yarn_beta_fast = params.yarn_beta_fast; + cparams.yarn_beta_slow = params.yarn_beta_slow; + cparams.yarn_orig_ctx = params.yarn_orig_ctx; + cparams.pooling_type = params.pooling_type; + cparams.attention_type = params.attention_type; + cparams.defrag_thold = params.defrag_thold; + cparams.cb_eval = params.cb_eval; + cparams.cb_eval_user_data = params.cb_eval_user_data; + cparams.offload_kqv = !params.no_kv_offload; + cparams.flash_attn = params.flash_attn; + cparams.no_perf = params.no_perf; + + if (params.reranking) { + cparams.embeddings = true; + cparams.pooling_type = LLAMA_POOLING_TYPE_RANK; + } + + cparams.type_k = kv_cache_type_from_str(params.cache_type_k); + cparams.type_v = kv_cache_type_from_str(params.cache_type_v); + + return cparams; +} + +struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params) { + struct ggml_threadpool_params tpp; + + ggml_threadpool_params_init(&tpp, params.n_threads); // setup the defaults + + if (params.mask_valid) { + std::memcpy(&tpp.cpumask, ¶ms.cpumask, GGML_MAX_N_THREADS); + } + + tpp.prio = params.priority; + tpp.poll = params.poll; + tpp.strict_cpu = params.strict_cpu; + + return tpp; +} + +#ifdef LLAMA_USE_CURL + +#define CURL_MAX_RETRY 3 +#define CURL_RETRY_DELAY_SECONDS 2 + + +static bool starts_with(const std::string & str, const std::string & prefix) { + // While we wait for C++20's std::string::starts_with... + return str.rfind(prefix, 0) == 0; +} + +static bool curl_perform_with_retry(const std::string& url, CURL* curl, int max_attempts, int retry_delay_seconds) { + int remaining_attempts = max_attempts; + + while (remaining_attempts > 0) { + LOG_INF("%s: Trying to download from %s (attempt %d of %d)...\n", __func__ , url.c_str(), max_attempts - remaining_attempts + 1, max_attempts); + + CURLcode res = curl_easy_perform(curl); + if (res == CURLE_OK) { + return true; + } + + int exponential_backoff_delay = std::pow(retry_delay_seconds, max_attempts - remaining_attempts) * 1000; + LOG_WRN("%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay); + + remaining_attempts--; + std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay)); + } + + LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts); + + return false; +} + +static bool common_download_file(const std::string & url, const std::string & path, const std::string & hf_token) { + + // Initialize libcurl + std::unique_ptr curl(curl_easy_init(), &curl_easy_cleanup); + if (!curl) { + LOG_ERR("%s: error initializing libcurl\n", __func__); + return false; + } + + bool force_download = false; + + // Set the URL, allow to follow http redirection + curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); + + // Check if hf-token or bearer-token was specified + if (!hf_token.empty()) { + std::string auth_header = "Authorization: Bearer "; + auth_header += hf_token.c_str(); + struct curl_slist *http_headers = NULL; + http_headers = curl_slist_append(http_headers, auth_header.c_str()); + curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers); + } + +#if defined(_WIN32) + // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of + // operating system. Currently implemented under MS-Windows. + curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); +#endif + + // Check if the file already exists locally + struct stat model_file_info; + auto file_exists = (stat(path.c_str(), &model_file_info) == 0); + + // If the file exists, check its JSON metadata companion file. + std::string metadata_path = path + ".json"; + nlohmann::json metadata; + std::string etag; + std::string last_modified; + + if (file_exists) { + // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block). + std::ifstream metadata_in(metadata_path); + if (metadata_in.good()) { + try { + metadata_in >> metadata; + LOG_INF("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str()); + if (metadata.contains("url") && metadata.at("url").is_string()) { + auto previous_url = metadata.at("url").get(); + if (previous_url != url) { + LOG_ERR("%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str()); + return false; + } + } + if (metadata.contains("etag") && metadata.at("etag").is_string()) { + etag = metadata.at("etag"); + } + if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) { + last_modified = metadata.at("lastModified"); + } + } catch (const nlohmann::json::exception & e) { + LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what()); + return false; + } + } + } else { + LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str()); + } + + // Send a HEAD request to retrieve the etag and last-modified headers + struct common_load_model_from_url_headers { + std::string etag; + std::string last_modified; + }; + common_load_model_from_url_headers headers; + { + typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); + auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t { + common_load_model_from_url_headers *headers = (common_load_model_from_url_headers *) userdata; + + static std::regex header_regex("([^:]+): (.*)\r\n"); + static std::regex etag_regex("ETag", std::regex_constants::icase); + static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase); + + std::string header(buffer, n_items); + std::smatch match; + if (std::regex_match(header, match, header_regex)) { + const std::string & key = match[1]; + const std::string & value = match[2]; + if (std::regex_match(key, match, etag_regex)) { + headers->etag = value; + } else if (std::regex_match(key, match, last_modified_regex)) { + headers->last_modified = value; + } + } + return n_items; + }; + + curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb + curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress + curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast(header_callback)); + curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers); + + bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS); + if (!was_perform_successful) { + return false; + } + + long http_code = 0; + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); + if (http_code != 200) { + // HEAD not supported, we don't know if the file has changed + // force trigger downloading + force_download = true; + LOG_ERR("%s: HEAD invalid http status code received: %ld\n", __func__, http_code); + } + } + + bool should_download = !file_exists || force_download; + if (!should_download) { + if (!etag.empty() && etag != headers.etag) { + LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str()); + should_download = true; + } else if (!last_modified.empty() && last_modified != headers.last_modified) { + LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str()); + should_download = true; + } + } + if (should_download) { + std::string path_temporary = path + ".downloadInProgress"; + if (file_exists) { + LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); + if (remove(path.c_str()) != 0) { + LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); + return false; + } + } + + // Set the output file + + struct FILE_deleter { + void operator()(FILE * f) const { + fclose(f); + } + }; + + std::unique_ptr outfile(fopen(path_temporary.c_str(), "wb")); + if (!outfile) { + LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str()); + return false; + } + + typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd); + auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t { + return fwrite(data, size, nmemb, (FILE *)fd); + }; + curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L); + curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); + curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get()); + + // display download progress + curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L); + + // helper function to hide password in URL + auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string { + std::size_t protocol_pos = url.find("://"); + if (protocol_pos == std::string::npos) { + return url; // Malformed URL + } + + std::size_t at_pos = url.find('@', protocol_pos + 3); + if (at_pos == std::string::npos) { + return url; // No password in URL + } + + return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos); + }; + + // start the download + LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__, + llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str()); + bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS); + if (!was_perform_successful) { + return false; + } + + long http_code = 0; + curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code); + if (http_code < 200 || http_code >= 400) { + LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code); + return false; + } + + // Causes file to be closed explicitly here before we rename it. + outfile.reset(); + + // Write the updated JSON metadata file. + metadata.update({ + {"url", url}, + {"etag", headers.etag}, + {"lastModified", headers.last_modified} + }); + std::ofstream(metadata_path) << metadata.dump(4); + LOG_INF("%s: file metadata saved: %s\n", __func__, metadata_path.c_str()); + + if (rename(path_temporary.c_str(), path.c_str()) != 0) { + LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); + return false; + } + } + + return true; +} + +struct llama_model * common_load_model_from_url( + const char * model_url, + const char * path_model, + const char * hf_token, + const struct llama_model_params & params) { + // Basic validation of the model_url + if (!model_url || strlen(model_url) == 0) { + LOG_ERR("%s: invalid model_url\n", __func__); + return NULL; + } + + if (!common_download_file(model_url, path_model, hf_token)) { + return NULL; + } + + // check for additional GGUFs split to download + int n_split = 0; + { + struct gguf_init_params gguf_params = { + /*.no_alloc = */ true, + /*.ctx = */ NULL, + }; + auto * ctx_gguf = gguf_init_from_file(path_model, gguf_params); + if (!ctx_gguf) { + LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, path_model); + return NULL; + } + + auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT); + if (key_n_split >= 0) { + n_split = gguf_get_val_u16(ctx_gguf, key_n_split); + } + + gguf_free(ctx_gguf); + } + + if (n_split > 1) { + char split_prefix[PATH_MAX] = {0}; + char split_url_prefix[LLAMA_CURL_MAX_URL_LENGTH] = {0}; + + // Verify the first split file format + // and extract split URL and PATH prefixes + { + if (!llama_split_prefix(split_prefix, sizeof(split_prefix), path_model, 0, n_split)) { + LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, path_model, n_split); + return NULL; + } + + if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url, 0, n_split)) { + LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model_url, n_split); + return NULL; + } + } + + // Prepare download in parallel + std::vector> futures_download; + for (int idx = 1; idx < n_split; idx++) { + futures_download.push_back(std::async(std::launch::async, [&split_prefix, &split_url_prefix, &n_split, hf_token](int download_idx) -> bool { + char split_path[PATH_MAX] = {0}; + llama_split_path(split_path, sizeof(split_path), split_prefix, download_idx, n_split); + + char split_url[LLAMA_CURL_MAX_URL_LENGTH] = {0}; + llama_split_path(split_url, sizeof(split_url), split_url_prefix, download_idx, n_split); + + return common_download_file(split_url, split_path, hf_token); + }, idx)); + } + + // Wait for all downloads to complete + for (auto & f : futures_download) { + if (!f.get()) { + return NULL; + } + } + } + + return llama_load_model_from_file(path_model, params); +} + +struct llama_model * common_load_model_from_hf( + const char * repo, + const char * model, + const char * path_model, + const char * hf_token, + const struct llama_model_params & params) { + // construct hugging face model url: + // + // --repo ggml-org/models --file tinyllama-1.1b/ggml-model-f16.gguf + // https://huggingface.co/ggml-org/models/resolve/main/tinyllama-1.1b/ggml-model-f16.gguf + // + // --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf + // https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf + // + + std::string model_url = "https://huggingface.co/"; + model_url += repo; + model_url += "/resolve/main/"; + model_url += model; + + return common_load_model_from_url(model_url.c_str(), path_model, hf_token, params); +} + +#else + +struct llama_model * common_load_model_from_url( + const char * /*model_url*/, + const char * /*path_model*/, + const char * /*hf_token*/, + const struct llama_model_params & /*params*/) { + LOG_WRN("%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__); + return nullptr; +} + +struct llama_model * common_load_model_from_hf( + const char * /*repo*/, + const char * /*model*/, + const char * /*path_model*/, + const char * /*hf_token*/, + const struct llama_model_params & /*params*/) { + LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__); + return nullptr; +} + +#endif // LLAMA_USE_CURL + +// +// Batch utils +// + +void common_batch_clear(struct llama_batch & batch) { + batch.n_tokens = 0; +} + +void common_batch_add( + struct llama_batch & batch, + llama_token id, + llama_pos pos, + const std::vector & seq_ids, + bool logits) { + GGML_ASSERT(batch.seq_id[batch.n_tokens] && "llama_batch size exceeded"); + + batch.token [batch.n_tokens] = id; + batch.pos [batch.n_tokens] = pos; + batch.n_seq_id[batch.n_tokens] = seq_ids.size(); + for (size_t i = 0; i < seq_ids.size(); ++i) { + batch.seq_id[batch.n_tokens][i] = seq_ids[i]; + } + batch.logits [batch.n_tokens] = logits; + + batch.n_tokens++; +} + +// +// Vocab utils +// + +std::vector common_tokenize( + const struct llama_context * ctx, + const std::string & text, + bool add_special, + bool parse_special) { + return common_tokenize(llama_get_model(ctx), text, add_special, parse_special); +} + +std::vector common_tokenize( + const struct llama_model * model, + const std::string & text, + bool add_special, + bool parse_special) { + // upper limit for the number of tokens + int n_tokens = text.length() + 2 * add_special; + std::vector result(n_tokens); + n_tokens = llama_tokenize(model, text.data(), text.length(), result.data(), result.size(), add_special, parse_special); + if (n_tokens < 0) { + result.resize(-n_tokens); + int check = llama_tokenize(model, text.data(), text.length(), result.data(), result.size(), add_special, parse_special); + GGML_ASSERT(check == -n_tokens); + } else { + result.resize(n_tokens); + } + return result; +} + +std::string common_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) { + std::string piece; + piece.resize(piece.capacity()); // using string internal cache, 15 bytes + '\n' + const int n_chars = llama_token_to_piece(llama_get_model(ctx), token, &piece[0], piece.size(), 0, special); + if (n_chars < 0) { + piece.resize(-n_chars); + int check = llama_token_to_piece(llama_get_model(ctx), token, &piece[0], piece.size(), 0, special); + GGML_ASSERT(check == -n_chars); + } + else { + piece.resize(n_chars); + } + + return piece; +} + +std::string common_detokenize(llama_context * ctx, const std::vector & tokens, bool special) { + std::string text; + text.resize(std::max(text.capacity(), tokens.size())); + int32_t n_chars = llama_detokenize(llama_get_model(ctx), tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special); + if (n_chars < 0) { + text.resize(-n_chars); + n_chars = llama_detokenize(llama_get_model(ctx), tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special); + GGML_ASSERT(n_chars <= (int32_t)text.size()); // whitespace trimming is performed after per-token detokenization + } + + text.resize(n_chars); + + // NOTE: the original tokenizer decodes bytes after collecting the pieces. + return text; +} + +// +// Chat template utils +// + +bool common_chat_verify_template(const std::string & tmpl) { + llama_chat_message chat[] = {{"user", "test"}}; + int res = llama_chat_apply_template(nullptr, tmpl.c_str(), chat, 1, true, nullptr, 0); + return res >= 0; +} + +std::string common_chat_apply_template(const struct llama_model * model, + const std::string & tmpl, + const std::vector & msgs, + bool add_ass) { + int alloc_size = 0; + bool fallback = false; // indicate if we must fallback to default chatml + std::vector chat; + for (auto & msg : msgs) { + chat.push_back({msg.role.c_str(), msg.content.c_str()}); + alloc_size += (msg.role.size() + msg.content.size()) * 1.25; + } + + const char * ptr_tmpl = tmpl.empty() ? nullptr : tmpl.c_str(); + std::vector buf(alloc_size); + + // run the first time to get the total output length + int32_t res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), add_ass, buf.data(), buf.size()); + + // error: chat template is not supported + if (res < 0) { + if (ptr_tmpl != nullptr) { + // if the custom "tmpl" is not supported, we throw an error + // this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template() + throw std::runtime_error("this custom template is not supported"); + } else { + // If the built-in template is not supported, we default to chatml + res = llama_chat_apply_template(nullptr, "chatml", chat.data(), chat.size(), add_ass, buf.data(), buf.size()); + fallback = true; + } + } + + // if it turns out that our buffer is too small, we resize it + if ((size_t) res > buf.size()) { + buf.resize(res); + res = llama_chat_apply_template( + fallback ? nullptr : model, + fallback ? "chatml" : ptr_tmpl, + chat.data(), chat.size(), add_ass, buf.data(), buf.size()); + } + + std::string formatted_chat(buf.data(), res); + return formatted_chat; +} + +std::string common_chat_format_single(const struct llama_model * model, + const std::string & tmpl, + const std::vector & past_msg, + const common_chat_msg & new_msg, + bool add_ass) { + std::ostringstream ss; + auto fmt_past_msg = past_msg.empty() ? "" : common_chat_apply_template(model, tmpl, past_msg, false); + std::vector chat_new(past_msg); + // if the past_msg ends with a newline, we must preserve it in the formatted version + if (add_ass && !fmt_past_msg.empty() && fmt_past_msg.back() == '\n') { + ss << "\n"; + }; + // format chat with new_msg + chat_new.push_back(new_msg); + auto fmt_new_msg = common_chat_apply_template(model, tmpl, chat_new, add_ass); + // get the diff part + ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size()); + return ss.str(); +} + +std::string common_chat_format_example(const struct llama_model * model, + const std::string & tmpl) { + std::vector msgs = { + {"system", "You are a helpful assistant"}, + {"user", "Hello"}, + {"assistant", "Hi there"}, + {"user", "How are you?"}, + }; + return common_chat_apply_template(model, tmpl, msgs, true); +} + +// +// KV cache utils +// + +void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size) { + static const char slot_chars[] = ".123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+"; + + printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d", + view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx); + + llama_kv_cache_view_cell * c_curr = view.cells; + llama_seq_id * cs_curr = view.cells_sequences; + + for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) { + if (i % row_size == 0) { + printf("\n%5d: ", i); + } + int seq_count = 0; + for (int j = 0; j < view.n_seq_max; j++) { + if (cs_curr[j] >= 0) { seq_count++; } + } + putchar(slot_chars[std::min(sizeof(slot_chars) - 2, size_t(seq_count))]); + } + + printf("\n=== Done dumping\n"); +} + +void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size) { + static const char slot_chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + + printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n", + view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx); + + std::unordered_map seqs; + llama_kv_cache_view_cell * c_curr = view.cells; + llama_seq_id * cs_curr = view.cells_sequences; + + for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) { + for (int j = 0; j < view.n_seq_max; j++) { + if (cs_curr[j] < 0) { continue; } + if (seqs.find(cs_curr[j]) == seqs.end()) { + if (seqs.size() + 1 >= sizeof(slot_chars)) { break; } + const size_t sz = seqs.size(); + seqs[cs_curr[j]] = sz; + } + } + if (seqs.size() + 1 >= sizeof(slot_chars)) { break; } + } + + printf("=== Sequence legend: "); + for (const auto & it : seqs) { + printf("%zu=%d, ", it.second, it.first); + } + printf("'+'=other sequence ids"); + + c_curr = view.cells; + cs_curr = view.cells_sequences; + for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) { + if (i % row_size == 0) { + printf("\n%5d: ", i); + } + for (int j = 0; j < view.n_seq_max; j++) { + if (cs_curr[j] >= 0) { + const auto & it = seqs.find(cs_curr[j]); + putchar(it != seqs.end() ? int(slot_chars[it->second]) : '+'); + } else { + putchar('.'); + } + } + putchar(' '); + } + + printf("\n=== Done dumping\n"); +} + +// +// Embedding utils +// + +void common_embd_normalize(const float * inp, float * out, int n, int embd_norm) { + double sum = 0.0; + + switch (embd_norm) { + case -1: // no normalisation + sum = 1.0; + break; + case 0: // max absolute + for (int i = 0; i < n; i++) { + if (sum < std::abs(inp[i])) sum = std::abs(inp[i]); + } + sum /= 32760.0; // make an int16 range + break; + case 2: // euclidean + for (int i = 0; i < n; i++) { + sum += inp[i] * inp[i]; + } + sum = std::sqrt(sum); + break; + default: // p-norm (euclidean is p-norm p=2) + for (int i = 0; i < n; i++) { + sum += std::pow(std::abs(inp[i]), embd_norm); + } + sum = std::pow(sum, 1.0 / embd_norm); + break; + } + + const float norm = sum > 0.0 ? 1.0 / sum : 0.0f; + + for (int i = 0; i < n; i++) { + out[i] = inp[i] * norm; + } +} + +float common_embd_similarity_cos(const float * embd1, const float * embd2, int n){ + double sum = 0.0; + double sum1 = 0.0; + double sum2 = 0.0; + + for (int i = 0; i < n; i++) { + sum += embd1[i] * embd2[i]; + sum1 += embd1[i] * embd1[i]; + sum2 += embd2[i] * embd2[i]; + } + + // Handle the case where one or both vectors are zero vectors + if (sum1 == 0.0 || sum2 == 0.0) { + if (sum1 == 0.0 && sum2 == 0.0) { + return 1.0f; // two zero vectors are similar + } + return 0.0f; + } + + return sum / (sqrt(sum1) * sqrt(sum2)); +} + +// +// Control vector utils +// + +static common_control_vector_data common_control_vector_load_one(const common_control_vector_load_info & load_info) { + common_control_vector_data result = { -1, {} }; + + ggml_context * ctx = nullptr; + struct gguf_init_params meta_gguf_params = { + /* .no_alloc = */ false, + /* .ctx = */ &ctx, + }; + struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params); + if (!ctx_gguf) { + LOG_ERR("%s: failed to load control vector file from %s\n", __func__, load_info.fname.c_str()); + return result; + } + + int32_t n_tensors = gguf_get_n_tensors(ctx_gguf); + if (n_tensors == 0) { + LOG_WRN("%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str()); + } + + for (int i = 0; i < n_tensors; i++) { + std::string name = gguf_get_tensor_name(ctx_gguf, i); + + int layer_idx = -1; + + // split on '.' + size_t dotpos = name.find('.'); + if (dotpos != std::string::npos && name.substr(0, dotpos) == "direction") { + try { + layer_idx = std::stoi(name.substr(dotpos + 1)); + } catch (...) { + layer_idx = -1; + } + } + if (layer_idx < 0) { + LOG_ERR("%s: invalid/unparsable direction tensor layer index in %s\n", __func__, load_info.fname.c_str()); + result.n_embd = -1; + break; + } else if (layer_idx == 0) { + LOG_ERR("%s: invalid (zero) direction tensor layer index in %s\n", __func__, load_info.fname.c_str()); + result.n_embd = -1; + break; + } + + struct ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str()); + if (tensor->type != GGML_TYPE_F32) { + LOG_ERR("%s: invalid (non-F32) direction tensor type in %s\n", __func__, load_info.fname.c_str()); + result.n_embd = -1; + break; + } + if (ggml_n_dims(tensor) != 1) { + LOG_ERR("%s: invalid (non-1D) direction tensor shape in %s\n", __func__, load_info.fname.c_str()); + result.n_embd = -1; + break; + } + + if (result.n_embd == -1) { + result.n_embd = ggml_nelements(tensor); + } else if (ggml_nelements(tensor) != result.n_embd) { + LOG_ERR("%s: direction tensor in %s does not match previous dimensions\n", __func__, load_info.fname.c_str()); + result.n_embd = -1; + break; + } + + // extend if necessary - do not store data for layer 0 (it's not used) + result.data.resize(std::max(result.data.size(), static_cast(result.n_embd * layer_idx)), 0.0f); + + const float * src = (const float *) tensor->data; + float * dst = result.data.data() + result.n_embd * (layer_idx - 1); // layer 1 at [0] + for (int j = 0; j < result.n_embd; j++) { + dst[j] += src[j] * load_info.strength; // allows multiple directions for same layer in same file + } + + } + + if (result.n_embd == -1) { + LOG_WRN("%s: skipping %s due to invalid direction tensors\n", __func__, load_info.fname.c_str()); + result.data.clear(); + } + + gguf_free(ctx_gguf); + ggml_free(ctx); + + return result; +} + +common_control_vector_data common_control_vector_load(const std::vector & load_infos) { + common_control_vector_data result = { -1, {} }; + + for (const auto & info : load_infos) { + auto cur = common_control_vector_load_one(info); + + if (cur.n_embd == -1) { + result.n_embd = -1; + break; + } + if (result.n_embd != -1 && result.n_embd != cur.n_embd) { + LOG_ERR("%s: control vectors in %s does not match previous dimensions\n", __func__, info.fname.c_str()); + result.n_embd = -1; + break; + } + + if (result.n_embd == -1) { + result = std::move(cur); + } else { + result.data.resize(std::max(result.data.size(), cur.data.size()), 0.0f); // extend if necessary + for (size_t i = 0; i < cur.data.size(); i++) { + result.data[i] += cur.data[i]; + } + } + } + + if (result.n_embd == -1) { + LOG_ERR("%s: no valid control vector files passed\n", __func__); + result.data.clear(); + } + + return result; +} + +// +// YAML utils +// + +void yaml_dump_vector_float(FILE * stream, const char * prop_name, const std::vector & data) { + if (data.empty()) { + fprintf(stream, "%s:\n", prop_name); + return; + } + + fprintf(stream, "%s: [", prop_name); + for (size_t i = 0; i < data.size() - 1; ++i) { + fprintf(stream, "%e, ", data[i]); + } + fprintf(stream, "%e]\n", data.back()); +} + +void yaml_dump_vector_int(FILE * stream, const char * prop_name, const std::vector & data) { + if (data.empty()) { + fprintf(stream, "%s:\n", prop_name); + return; + } + + fprintf(stream, "%s: [", prop_name); + for (size_t i = 0; i < data.size() - 1; ++i) { + fprintf(stream, "%d, ", data[i]); + } + fprintf(stream, "%d]\n", data.back()); +} + +void yaml_dump_string_multiline(FILE * stream, const char * prop_name, const char * data) { + std::string data_str(data == NULL ? "" : data); + + if (data_str.empty()) { + fprintf(stream, "%s:\n", prop_name); + return; + } + + size_t pos_start = 0; + size_t pos_found = 0; + + if (std::isspace(data_str[0]) || std::isspace(data_str.back())) { + data_str = std::regex_replace(data_str, std::regex("\n"), "\\n"); + data_str = std::regex_replace(data_str, std::regex("\""), "\\\""); + data_str = std::regex_replace(data_str, std::regex(R"(\\[^n"])"), R"(\$&)"); + data_str = "\"" + data_str + "\""; + fprintf(stream, "%s: %s\n", prop_name, data_str.c_str()); + return; + } + + if (data_str.find('\n') == std::string::npos) { + fprintf(stream, "%s: %s\n", prop_name, data_str.c_str()); + return; + } + + fprintf(stream, "%s: |\n", prop_name); + while ((pos_found = data_str.find('\n', pos_start)) != std::string::npos) { + fprintf(stream, " %s\n", data_str.substr(pos_start, pos_found-pos_start).c_str()); + pos_start = pos_found + 1; + } +} + +void yaml_dump_non_result_info(FILE * stream, const common_params & params, const llama_context * lctx, + const std::string & timestamp, const std::vector & prompt_tokens, const char * model_desc) { + ggml_cpu_init(); // some ARM features are detected at runtime + + const auto & sparams = params.sparams; + + fprintf(stream, "build_commit: %s\n", LLAMA_COMMIT); + fprintf(stream, "build_number: %d\n", LLAMA_BUILD_NUMBER); + fprintf(stream, "cpu_has_arm_fma: %s\n", ggml_cpu_has_arm_fma() ? "true" : "false"); + fprintf(stream, "cpu_has_avx: %s\n", ggml_cpu_has_avx() ? "true" : "false"); + fprintf(stream, "cpu_has_avx_vnni: %s\n", ggml_cpu_has_avx_vnni() ? "true" : "false"); + fprintf(stream, "cpu_has_avx2: %s\n", ggml_cpu_has_avx2() ? "true" : "false"); + fprintf(stream, "cpu_has_avx512: %s\n", ggml_cpu_has_avx512() ? "true" : "false"); + fprintf(stream, "cpu_has_avx512_vbmi: %s\n", ggml_cpu_has_avx512_vbmi() ? "true" : "false"); + fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false"); + fprintf(stream, "cpu_has_cuda: %s\n", ggml_cpu_has_cuda() ? "true" : "false"); + fprintf(stream, "cpu_has_vulkan: %s\n", ggml_cpu_has_vulkan() ? "true" : "false"); + fprintf(stream, "cpu_has_kompute: %s\n", ggml_cpu_has_kompute() ? "true" : "false"); + fprintf(stream, "cpu_has_fma: %s\n", ggml_cpu_has_fma() ? "true" : "false"); + fprintf(stream, "cpu_has_gpublas: %s\n", ggml_cpu_has_gpublas() ? "true" : "false"); + fprintf(stream, "cpu_has_neon: %s\n", ggml_cpu_has_neon() ? "true" : "false"); + fprintf(stream, "cpu_has_sve: %s\n", ggml_cpu_has_sve() ? "true" : "false"); + fprintf(stream, "cpu_has_f16c: %s\n", ggml_cpu_has_f16c() ? "true" : "false"); + fprintf(stream, "cpu_has_fp16_va: %s\n", ggml_cpu_has_fp16_va() ? "true" : "false"); + fprintf(stream, "cpu_has_riscv_v: %s\n", ggml_cpu_has_riscv_v() ? "true" : "false"); + fprintf(stream, "cpu_has_wasm_simd: %s\n", ggml_cpu_has_wasm_simd() ? "true" : "false"); + fprintf(stream, "cpu_has_blas: %s\n", ggml_cpu_has_blas() ? "true" : "false"); + fprintf(stream, "cpu_has_sse3: %s\n", ggml_cpu_has_sse3() ? "true" : "false"); + fprintf(stream, "cpu_has_vsx: %s\n", ggml_cpu_has_vsx() ? "true" : "false"); + fprintf(stream, "cpu_has_matmul_int8: %s\n", ggml_cpu_has_matmul_int8() ? "true" : "false"); + +#ifdef NDEBUG + fprintf(stream, "debug: false\n"); +#else + fprintf(stream, "debug: true\n"); +#endif // NDEBUG + + fprintf(stream, "model_desc: %s\n", model_desc); + fprintf(stream, "n_vocab: %d # output size of the final layer, 32001 for some models\n", llama_n_vocab(llama_get_model(lctx))); + +#ifdef __OPTIMIZE__ + fprintf(stream, "optimize: true\n"); +#else + fprintf(stream, "optimize: false\n"); +#endif // __OPTIMIZE__ + + fprintf(stream, "time: %s\n", timestamp.c_str()); + + fprintf(stream, "\n"); + fprintf(stream, "###############\n"); + fprintf(stream, "# User Inputs #\n"); + fprintf(stream, "###############\n"); + fprintf(stream, "\n"); + + fprintf(stream, "alias: %s # default: unknown\n", params.model_alias.c_str()); + fprintf(stream, "batch_size: %d # default: 512\n", params.n_batch); + fprintf(stream, "chunks: %d # default: -1 (unlimited)\n", params.n_chunks); + fprintf(stream, "color: %s # default: false\n", params.use_color ? "true" : "false"); + fprintf(stream, "ctx_size: %d # default: 512\n", params.n_ctx); + fprintf(stream, "dry_allowed_length: %d # default: 2\n", sparams.dry_allowed_length); + fprintf(stream, "dry_base: %.2f # default: 1.75\n", sparams.dry_base); + fprintf(stream, "dry_multiplier: %.1f # default: 0.0\n", sparams.dry_multiplier); + fprintf(stream, "dry_penalty_last_n: %d # default: -1 (0 = disable, -1 = context size)\n", sparams.dry_penalty_last_n); + fprintf(stream, "escape: %s # default: false\n", params.escape ? "true" : "false"); + fprintf(stream, "file: # never logged, see prompt instead. Can still be specified for input.\n"); + fprintf(stream, "frequency_penalty: %f # default: 0.0 \n", sparams.penalty_freq); + yaml_dump_string_multiline(stream, "grammar", sparams.grammar.c_str()); + fprintf(stream, "grammar-file: # never logged, see grammar instead. Can still be specified for input.\n"); + fprintf(stream, "hellaswag: %s # default: false\n", params.hellaswag ? "true" : "false"); + fprintf(stream, "hellaswag_tasks: %zu # default: 400\n", params.hellaswag_tasks); + fprintf(stream, "ignore_eos: %s # default: false\n", sparams.ignore_eos ? "true" : "false"); + + yaml_dump_string_multiline(stream, "in_prefix", params.input_prefix.c_str()); + fprintf(stream, "in_prefix_bos: %s # default: false\n", params.input_prefix_bos ? "true" : "false"); + yaml_dump_string_multiline(stream, "in_suffix", params.input_prefix.c_str()); + fprintf(stream, "interactive: %s # default: false\n", params.interactive ? "true" : "false"); + fprintf(stream, "interactive_first: %s # default: false\n", params.interactive_first ? "true" : "false"); + fprintf(stream, "keep: %d # default: 0\n", params.n_keep); + fprintf(stream, "logdir: %s # default: unset (no logging)\n", params.logdir.c_str()); + + fprintf(stream, "logit_bias:\n"); + for (const auto & logit_bias : sparams.logit_bias) { + fprintf(stream, " %d: %f", logit_bias.token, logit_bias.bias); + } + + fprintf(stream, "lora:\n"); + for (auto & la : params.lora_adapters) { + if (la.scale == 1.0f) { + fprintf(stream, " - %s\n", la.path.c_str()); + } + } + fprintf(stream, "lora_scaled:\n"); + for (auto & la : params.lora_adapters) { + if (la.scale != 1.0f) { + fprintf(stream, " - %s: %f\n", la.path.c_str(), la.scale); + } + } + fprintf(stream, "lora_init_without_apply: %s # default: false\n", params.lora_init_without_apply ? "true" : "false"); + fprintf(stream, "main_gpu: %d # default: 0\n", params.main_gpu); + fprintf(stream, "min_keep: %d # default: 0 (disabled)\n", sparams.min_keep); + fprintf(stream, "mirostat: %d # default: 0 (disabled)\n", sparams.mirostat); + fprintf(stream, "mirostat_ent: %f # default: 5.0\n", sparams.mirostat_tau); + fprintf(stream, "mirostat_lr: %f # default: 0.1\n", sparams.mirostat_eta); + fprintf(stream, "mlock: %s # default: false\n", params.use_mlock ? "true" : "false"); + fprintf(stream, "model: %s # default: %s\n", params.model.c_str(), DEFAULT_MODEL_PATH); + fprintf(stream, "model_draft: %s # default:\n", params.model_draft.c_str()); + fprintf(stream, "multiline_input: %s # default: false\n", params.multiline_input ? "true" : "false"); + fprintf(stream, "n_gpu_layers: %d # default: -1\n", params.n_gpu_layers); + fprintf(stream, "n_predict: %d # default: -1 (unlimited)\n", params.n_predict); + fprintf(stream, "n_probs: %d # only used by server binary, default: 0\n", sparams.n_probs); + fprintf(stream, "no_mmap: %s # default: false\n", !params.use_mmap ? "true" : "false"); + fprintf(stream, "penalize_nl: %s # default: false\n", sparams.penalize_nl ? "true" : "false"); + fprintf(stream, "ppl_output_type: %d # default: 0\n", params.ppl_output_type); + fprintf(stream, "ppl_stride: %d # default: 0\n", params.ppl_stride); + fprintf(stream, "presence_penalty: %f # default: 0.0\n", sparams.penalty_present); + yaml_dump_string_multiline(stream, "prompt", params.prompt.c_str()); + fprintf(stream, "prompt_cache: %s\n", params.path_prompt_cache.c_str()); + fprintf(stream, "prompt_cache_all: %s # default: false\n", params.prompt_cache_all ? "true" : "false"); + fprintf(stream, "prompt_cache_ro: %s # default: false\n", params.prompt_cache_ro ? "true" : "false"); + yaml_dump_vector_int(stream, "prompt_tokens", prompt_tokens); + fprintf(stream, "repeat_penalty: %f # default: 1.1\n", sparams.penalty_repeat); + + fprintf(stream, "reverse_prompt:\n"); + for (std::string ap : params.antiprompt) { + size_t pos = 0; + while ((pos = ap.find('\n', pos)) != std::string::npos) { + ap.replace(pos, 1, "\\n"); + pos += 1; + } + + fprintf(stream, " - %s\n", ap.c_str()); + } + + fprintf(stream, "rope_freq_base: %f # default: 10000.0\n", params.rope_freq_base); + fprintf(stream, "rope_freq_scale: %f # default: 1.0\n", params.rope_freq_scale); + fprintf(stream, "simple_io: %s # default: false\n", params.simple_io ? "true" : "false"); + fprintf(stream, "cont_batching: %s # default: false\n", params.cont_batching ? "true" : "false"); + fprintf(stream, "flash_attn: %s # default: false\n", params.flash_attn ? "true" : "false"); + fprintf(stream, "temp: %f # default: 0.8\n", sparams.temp); + + const std::vector tensor_split_vector(params.tensor_split, params.tensor_split + llama_max_devices()); + yaml_dump_vector_float(stream, "tensor_split", tensor_split_vector); + + fprintf(stream, "threads: %d # default: %u\n", params.cpuparams.n_threads, std::thread::hardware_concurrency()); + fprintf(stream, "top_k: %d # default: 40\n", sparams.top_k); + fprintf(stream, "top_p: %f # default: 0.95\n", sparams.top_p); + fprintf(stream, "min_p: %f # default: 0.0\n", sparams.min_p); + fprintf(stream, "xtc_probability: %f # default: 0.0\n", sparams.xtc_probability); + fprintf(stream, "xtc_threshold: %f # default: 0.1\n", sparams.xtc_threshold); + fprintf(stream, "typ_p: %f # default: 1.0\n", sparams.typ_p); + fprintf(stream, "verbose_prompt: %s # default: false\n", params.verbose_prompt ? "true" : "false"); + fprintf(stream, "display_prompt: %s # default: true\n", params.display_prompt ? "true" : "false"); +} diff --git a/common/common.h b/common/common.h new file mode 100644 index 0000000000000000000000000000000000000000..727f85baa8c24bd394b31b3023ffd9ff6173b436 --- /dev/null +++ b/common/common.h @@ -0,0 +1,598 @@ +// Various helper functions and utilities + +#pragma once + +#include "llama.h" + +#include +#include +#include + +#ifdef _WIN32 +#define DIRECTORY_SEPARATOR '\\' +#else +#define DIRECTORY_SEPARATOR '/' +#endif // _WIN32 + +#define die(msg) do { fputs("error: " msg "\n", stderr); exit(1); } while (0) +#define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", __VA_ARGS__); exit(1); } while (0) + +#define print_build_info() do { \ + fprintf(stderr, "%s: build = %d (%s)\n", __func__, LLAMA_BUILD_NUMBER, LLAMA_COMMIT); \ + fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \ +} while(0) + +#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf" + +struct common_lora_adapter_info { + std::string path; + float scale; +}; + +struct common_lora_adapter_container : common_lora_adapter_info { + struct llama_lora_adapter * adapter; +}; + +// build info +extern int LLAMA_BUILD_NUMBER; +extern char const * LLAMA_COMMIT; +extern char const * LLAMA_COMPILER; +extern char const * LLAMA_BUILD_TARGET; + +struct common_control_vector_load_info; + +// +// CPU utils +// + +struct cpu_params { + int n_threads = -1; + bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask. + bool mask_valid = false; // Default: any CPU + enum ggml_sched_priority priority = GGML_SCHED_PRIO_NORMAL; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime) + bool strict_cpu = false; // Use strict CPU placement + uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling) +}; + +int32_t cpu_get_num_physical_cores(); +int32_t cpu_get_num_math(); + +// +// Common params +// + +enum llama_example { + LLAMA_EXAMPLE_COMMON, + LLAMA_EXAMPLE_SPECULATIVE, + LLAMA_EXAMPLE_MAIN, + LLAMA_EXAMPLE_INFILL, + LLAMA_EXAMPLE_EMBEDDING, + LLAMA_EXAMPLE_PERPLEXITY, + LLAMA_EXAMPLE_RETRIEVAL, + LLAMA_EXAMPLE_PASSKEY, + LLAMA_EXAMPLE_IMATRIX, + LLAMA_EXAMPLE_BENCH, + LLAMA_EXAMPLE_SERVER, + LLAMA_EXAMPLE_CVECTOR_GENERATOR, + LLAMA_EXAMPLE_EXPORT_LORA, + LLAMA_EXAMPLE_LLAVA, + LLAMA_EXAMPLE_LOOKUP, + LLAMA_EXAMPLE_PARALLEL, + + LLAMA_EXAMPLE_COUNT, +}; + +enum common_sampler_type { + COMMON_SAMPLER_TYPE_NONE = 0, + COMMON_SAMPLER_TYPE_DRY = 1, + COMMON_SAMPLER_TYPE_TOP_K = 2, + COMMON_SAMPLER_TYPE_TOP_P = 3, + COMMON_SAMPLER_TYPE_MIN_P = 4, + //COMMON_SAMPLER_TYPE_TFS_Z = 5, + COMMON_SAMPLER_TYPE_TYPICAL_P = 6, + COMMON_SAMPLER_TYPE_TEMPERATURE = 7, + COMMON_SAMPLER_TYPE_XTC = 8, + COMMON_SAMPLER_TYPE_INFILL = 9, +}; + +// dimensionality reduction methods, used by cvector-generator +enum dimre_method { + DIMRE_METHOD_PCA, + DIMRE_METHOD_MEAN, +}; + +// sampler parameters +struct common_sampler_params { + uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler + + int32_t n_prev = 64; // number of previous tokens to remember + int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens. + int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens + int32_t top_k = 40; // <= 0 to use vocab size + float top_p = 0.95f; // 1.0 = disabled + float min_p = 0.05f; // 0.0 = disabled + float xtc_probability = 0.00f; // 0.0 = disabled + float xtc_threshold = 0.10f; // > 0.5 disables XTC + float typ_p = 1.00f; // typical_p, 1.0 = disabled + float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities + float dynatemp_range = 0.00f; // 0.0 = disabled + float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler + int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size) + float penalty_repeat = 1.00f; // 1.0 = disabled + float penalty_freq = 0.00f; // 0.0 = disabled + float penalty_present = 0.00f; // 0.0 = disabled + float dry_multiplier = 0.0f; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition: + float dry_base = 1.75f; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length) + int32_t dry_allowed_length = 2; // tokens extending repetitions beyond this receive penalty + int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size) + int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 + float mirostat_tau = 5.00f; // target entropy + float mirostat_eta = 0.10f; // learning rate + bool penalize_nl = false; // consider newlines as a repeatable token + bool ignore_eos = false; + bool no_perf = false; // disable performance metrics + + std::vector dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY + + + std::vector samplers = { + COMMON_SAMPLER_TYPE_DRY, + COMMON_SAMPLER_TYPE_TOP_K, + COMMON_SAMPLER_TYPE_TYPICAL_P, + COMMON_SAMPLER_TYPE_TOP_P, + COMMON_SAMPLER_TYPE_MIN_P, + COMMON_SAMPLER_TYPE_XTC, + COMMON_SAMPLER_TYPE_TEMPERATURE, + }; + + std::string grammar; // optional BNF-like grammar to constrain sampling + + std::vector logit_bias; // logit biases to apply + + // print the parameters into a string + std::string print() const; +}; + +struct common_params { + int32_t n_predict = -1; // new tokens to predict + int32_t n_ctx = 4096; // context size + int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS) + int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS) + int32_t n_keep = 0; // number of tokens to keep from initial prompt + int32_t n_draft = 5; // number of tokens to draft during speculative decoding + int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited) + int32_t n_parallel = 1; // number of parallel sequences to decode + int32_t n_sequences = 1; // number of sequences to decode + float p_split = 0.1f; // speculative decoding split probability + int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default) + int32_t n_gpu_layers_draft = -1; // number of layers to store in VRAM for the draft model (-1 - use default) + int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors + float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs + int32_t grp_attn_n = 1; // group-attention factor + int32_t grp_attn_w = 512; // group-attention width + int32_t n_print = -1; // print token count every n tokens (-1 = disabled) + float rope_freq_base = 0.0f; // RoPE base frequency + float rope_freq_scale = 0.0f; // RoPE frequency scaling factor + float yarn_ext_factor = -1.0f; // YaRN extrapolation mix factor + float yarn_attn_factor = 1.0f; // YaRN magnitude scaling factor + float yarn_beta_fast = 32.0f; // YaRN low correction dim + float yarn_beta_slow = 1.0f; // YaRN high correction dim + int32_t yarn_orig_ctx = 0; // YaRN original context length + float defrag_thold = -1.0f; // KV cache defragmentation threshold + + struct cpu_params cpuparams; + struct cpu_params cpuparams_batch; + struct cpu_params draft_cpuparams; + struct cpu_params draft_cpuparams_batch; + + ggml_backend_sched_eval_callback cb_eval = nullptr; + void * cb_eval_user_data = nullptr; + + ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED; + + enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs + enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED; + enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings + enum llama_attention_type attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED; // attention type for embeddings + + struct common_sampler_params sparams; + + std::string model = ""; // model path // NOLINT + std::string model_draft = ""; // draft model for speculative decoding // NOLINT + std::string model_alias = "unknown"; // model alias // NOLINT + std::string model_url = ""; // model url to download // NOLINT + std::string hf_token = ""; // HF token // NOLINT + std::string hf_repo = ""; // HF repo // NOLINT + std::string hf_file = ""; // HF file // NOLINT + std::string prompt = ""; // NOLINT + std::string prompt_file = ""; // store the external prompt file name // NOLINT + std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state // NOLINT + std::string input_prefix = ""; // string to prefix user inputs with // NOLINT + std::string input_suffix = ""; // string to suffix user inputs with // NOLINT + std::string logdir = ""; // directory in which to save YAML log files // NOLINT + std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding // NOLINT + std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding // NOLINT + std::string logits_file = ""; // file for saving *all* logits // NOLINT + std::string rpc_servers = ""; // comma separated list of RPC servers // NOLINT + + std::vector in_files; // all input files + std::vector antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts) + std::vector kv_overrides; + + bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_lora_adapter_apply) + std::vector lora_adapters; // lora adapter path with user defined scale + + std::vector control_vectors; // control vector with user defined scale + + int32_t verbosity = 0; + int32_t control_vector_layer_start = -1; // layer range for control vector + int32_t control_vector_layer_end = -1; // layer range for control vector + + int32_t ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used. + int32_t ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line + // (which is more convenient to use for plotting) + // + bool hellaswag = false; // compute HellaSwag score over random tasks from datafile supplied in prompt + size_t hellaswag_tasks = 400; // number of tasks to use when computing the HellaSwag score + + bool winogrande = false; // compute Winogrande score over random tasks from datafile supplied in prompt + size_t winogrande_tasks = 0; // number of tasks to use when computing the Winogrande score. If 0, all tasks will be computed + + bool multiple_choice = false; // compute TruthfulQA score over random tasks from datafile supplied in prompt + size_t multiple_choice_tasks = 0; // number of tasks to use when computing the TruthfulQA score. If 0, all tasks will be computed + + bool kl_divergence = false; // compute KL divergence + + bool usage = false; // print usage + bool use_color = false; // use color to distinguish generations and inputs + bool special = false; // enable special token output + bool interactive = false; // interactive mode + bool interactive_first = false; // wait for user input immediately + bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix) + bool prompt_cache_all = false; // save user input and generations to prompt cache + bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it + + bool escape = true; // escape "\n", "\r", "\t", "\'", "\"", and "\\" + bool multiline_input = false; // reverse the usage of `\` + bool simple_io = false; // improves compatibility with subprocesses and limited consoles + bool cont_batching = true; // insert new sequences for decoding on-the-fly + bool flash_attn = false; // flash attention + bool no_perf = false; // disable performance metrics + bool ctx_shift = true; // context shift on inifinite text generation + + bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix + bool logits_all = false; // return logits for all tokens in the batch + bool use_mmap = true; // use mmap for faster loads + bool use_mlock = false; // use mlock to keep model in memory + bool verbose_prompt = false; // print prompt tokens before generation + bool display_prompt = true; // print prompt before generation + bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes + bool no_kv_offload = false; // disable KV offloading + bool warmup = true; // warmup run + bool check_tensors = false; // validate tensor data + + std::string cache_type_k = "f16"; // KV cache data type for the K + std::string cache_type_v = "f16"; // KV cache data type for the V + + // multimodal models (see examples/llava) + std::string mmproj = ""; // path to multimodal projector // NOLINT + std::vector image; // path to image file(s) + + // embedding + bool embedding = false; // get only sentence embedding + int32_t embd_normalize = 2; // normalisation for embeddings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm) + std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix + std::string embd_sep = "\n"; // separator of embeddings + bool reranking = false; // enable reranking support on server + + // server params + int32_t port = 8080; // server listens on this network port + int32_t timeout_read = 600; // http read timeout in seconds + int32_t timeout_write = timeout_read; // http write timeout in seconds + int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool) + int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting + + std::string hostname = "127.0.0.1"; + std::string public_path = ""; // NOLINT + std::string chat_template = ""; // NOLINT + bool enable_chat_template = true; + + std::vector api_keys; + + std::string ssl_file_key = ""; // NOLINT + std::string ssl_file_cert = ""; // NOLINT + + // "advanced" endpoints are disabled by default for better security + bool webui = true; + bool endpoint_slots = false; + bool endpoint_props = false; // only control POST requests, not GET + bool endpoint_metrics = false; + + bool log_json = false; + + std::string slot_save_path; + + float slot_prompt_similarity = 0.5f; + + // batched-bench params + bool is_pp_shared = false; + + std::vector n_pp; + std::vector n_tg; + std::vector n_pl; + + // retrieval params + std::vector context_files; // context files to embed + + int32_t chunk_size = 64; // chunk size for context embedding + + std::string chunk_separator = "\n"; // chunk separator for context embedding + + // passkey params + int32_t n_junk = 250; // number of times to repeat the junk text + int32_t i_pos = -1; // position of the passkey in the junk text + + // imatrix params + std::string out_file = "imatrix.dat"; // save the resulting imatrix to this file + + int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations + int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations + int32_t i_chunk = 0; // start processing from this chunk + + bool process_output = false; // collect data for the output tensor + bool compute_ppl = true; // whether to compute perplexity + + // cvector-generator params + int n_pca_batch = 100; + int n_pca_iterations = 1000; + dimre_method cvector_dimre_method = DIMRE_METHOD_PCA; + std::string cvector_outfile = "control_vector.gguf"; + std::string cvector_positive_file = "examples/cvector-generator/positive.txt"; + std::string cvector_negative_file = "examples/cvector-generator/negative.txt"; + + bool spm_infill = false; // suffix/prefix/middle pattern for infill + + std::string lora_outfile = "ggml-lora-merged-f16.gguf"; + + // batched-bench params + bool batched_bench_output_jsonl = false; +}; + +// call once at the start of a program if it uses libcommon +// initializes the logging system and prints info about the build +void common_init(); + +std::string common_params_get_system_info(const common_params & params); + +bool parse_cpu_range(const std::string & range, bool(&boolmask)[GGML_MAX_N_THREADS]); +bool parse_cpu_mask(const std::string & mask, bool(&boolmask)[GGML_MAX_N_THREADS]); +void postprocess_cpu_params(cpu_params & cpuparams, const cpu_params * role_model = nullptr); +bool set_process_priority(enum ggml_sched_priority prio); + +// +// String utils +// + +#ifdef __GNUC__ +#ifdef __MINGW32__ +#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__))) +#else +#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__))) +#endif +#else +#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) +#endif + +LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2) +std::string string_format(const char * fmt, ...); + +std::string string_strip(const std::string & str); +std::string string_get_sortable_timestamp(); + +void string_replace_all(std::string & s, const std::string & search, const std::string & replace); + +template +static std::vector string_split(const std::string & str, char delim) { + static_assert(!std::is_same::value, "Please use the specialized version for std::string"); + std::vector values; + std::istringstream str_stream(str); + std::string token; + while (std::getline(str_stream, token, delim)) { + T value; + std::istringstream token_stream(token); + token_stream >> value; + values.push_back(value); + } + return values; +} + +template<> +std::vector string_split(const std::string & input, char separator) +{ + std::vector parts; + size_t begin_pos = 0; + size_t separator_pos = input.find(separator); + while (separator_pos != std::string::npos) { + std::string part = input.substr(begin_pos, separator_pos - begin_pos); + parts.emplace_back(part); + begin_pos = separator_pos + 1; + separator_pos = input.find(separator, begin_pos); + } + parts.emplace_back(input.substr(begin_pos, separator_pos - begin_pos)); + return parts; +} + +bool string_parse_kv_override(const char * data, std::vector & overrides); +void string_process_escapes(std::string & input); + +std::string string_from(bool value); +std::string string_from(const std::vector & values); +std::string string_from(const struct llama_context * ctx, const std::vector & tokens); +std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch); + +// +// Filesystem utils +// + +bool fs_validate_filename(const std::string & filename); +bool fs_create_directory_with_parents(const std::string & path); + +std::string fs_get_cache_directory(); +std::string fs_get_cache_file(const std::string & filename); + +// +// Model utils +// + +struct common_init_result { + struct llama_model * model = nullptr; + struct llama_context * context = nullptr; + std::vector lora_adapters; +}; + +struct common_init_result common_init_from_params(common_params & params); + +struct llama_model_params common_model_params_to_llama (const common_params & params); +struct llama_context_params common_context_params_to_llama(const common_params & params); +struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params); + +struct llama_model * common_load_model_from_url(const char * model_url, const char * path_model, const char * hf_token, const struct llama_model_params & params); +struct llama_model * common_load_model_from_hf(const char * repo, const char * file, const char * path_model, const char * hf_token, const struct llama_model_params & params); + +// clear LoRA adapters from context, then apply new list of adapters +void common_lora_adapters_apply(struct llama_context * ctx, std::vector & lora_adapters); + +// Batch utils + +void common_batch_clear(struct llama_batch & batch); + +void common_batch_add( + struct llama_batch & batch, + llama_token id, + llama_pos pos, + const std::vector & seq_ids, + bool logits); + +// +// Vocab utils +// + +// tokenizes a string into a vector of tokens +// should work similar to Python's `tokenizer.encode` +std::vector common_tokenize( + const struct llama_context * ctx, + const std::string & text, + bool add_special, + bool parse_special = false); + +std::vector common_tokenize( + const struct llama_model * model, + const std::string & text, + bool add_special, + bool parse_special = false); + +// tokenizes a token into a piece, optionally renders special/control tokens +// should work similar to Python's `tokenizer.id_to_piece` +std::string common_token_to_piece( + const struct llama_context * ctx, + llama_token token, + bool special = true); + +// detokenizes a vector of tokens into a string +// should work similar to Python's `tokenizer.decode` +// optionally renders special/control tokens +std::string common_detokenize( + llama_context * ctx, + const std::vector & tokens, + bool special = true); + +// +// Chat template utils +// + +// same with llama_chat_message, but uses std::string +struct common_chat_msg { + std::string role; + std::string content; +}; + +// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid +bool common_chat_verify_template(const std::string & tmpl); + +// CPP wrapper for llama_chat_apply_template +// If the built-in template is not supported, we default to chatml +// If the custom "tmpl" is not supported, we throw an error +std::string common_chat_apply_template(const struct llama_model * model, + const std::string & tmpl, + const std::vector & chat, + bool add_ass); + +// Format single message, while taking into account the position of that message in chat history +std::string common_chat_format_single(const struct llama_model * model, + const std::string & tmpl, + const std::vector & past_msg, + const common_chat_msg & new_msg, + bool add_ass); + +// Returns an example of formatted chat +std::string common_chat_format_example(const struct llama_model * model, + const std::string & tmpl); + +// +// KV cache utils +// + +// Dump the KV cache view with the number of sequences per cell. +void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size = 80); + +// Dump the KV cache view showing individual sequences in each cell (long output). +void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size = 40); + +// +// Embedding utils +// + +void common_embd_normalize(const float * inp, float * out, int n, int embd_norm = 2); + +float common_embd_similarity_cos(const float * embd1, const float * embd2, int n); + +// +// Control vector utils +// + +struct common_control_vector_data { + int n_embd; + + // stores data for layers [1, n_layer] where n_layer = data.size() / n_embd + std::vector data; +}; + +struct common_control_vector_load_info { + float strength; + + std::string fname; +}; + +// Load control vectors, scale each by strength, and add them together. +// On error, returns {-1, empty} +common_control_vector_data common_control_vector_load(const std::vector & load_infos); + +// +// Split utils +// + +static const char * const LLM_KV_SPLIT_NO = "split.no"; +static const char * const LLM_KV_SPLIT_COUNT = "split.count"; +static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count"; + +// +// YAML utils +// + +void yaml_dump_vector_float (FILE * stream, const char * prop_name, const std::vector & data); +void yaml_dump_vector_int (FILE * stream, const char * prop_name, const std::vector & data); +void yaml_dump_string_multiline(FILE * stream, const char * prop_name, const char * data); + +void yaml_dump_non_result_info( + FILE * stream, const common_params & params, const llama_context * lctx, + const std::string & timestamp, const std::vector & prompt_tokens, const char * model_desc); diff --git a/common/console.cpp b/common/console.cpp new file mode 100644 index 0000000000000000000000000000000000000000..078a8d678d933bec79eda408ff45858758c16f37 --- /dev/null +++ b/common/console.cpp @@ -0,0 +1,504 @@ +#include "console.h" +#include +#include + +#if defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include +#include +#include +#ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING +#define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x0004 +#endif +#else +#include +#include +#include +#include +#include +#include +#include +#include +#endif + +#define ANSI_COLOR_RED "\x1b[31m" +#define ANSI_COLOR_GREEN "\x1b[32m" +#define ANSI_COLOR_YELLOW "\x1b[33m" +#define ANSI_COLOR_BLUE "\x1b[34m" +#define ANSI_COLOR_MAGENTA "\x1b[35m" +#define ANSI_COLOR_CYAN "\x1b[36m" +#define ANSI_COLOR_RESET "\x1b[0m" +#define ANSI_BOLD "\x1b[1m" + +namespace console { + + // + // Console state + // + + static bool advanced_display = false; + static bool simple_io = true; + static display_t current_display = reset; + + static FILE* out = stdout; + +#if defined (_WIN32) + static void* hConsole; +#else + static FILE* tty = nullptr; + static termios initial_state; +#endif + + // + // Init and cleanup + // + + void init(bool use_simple_io, bool use_advanced_display) { + advanced_display = use_advanced_display; + simple_io = use_simple_io; +#if defined(_WIN32) + // Windows-specific console initialization + DWORD dwMode = 0; + hConsole = GetStdHandle(STD_OUTPUT_HANDLE); + if (hConsole == INVALID_HANDLE_VALUE || !GetConsoleMode(hConsole, &dwMode)) { + hConsole = GetStdHandle(STD_ERROR_HANDLE); + if (hConsole != INVALID_HANDLE_VALUE && (!GetConsoleMode(hConsole, &dwMode))) { + hConsole = nullptr; + simple_io = true; + } + } + if (hConsole) { + // Check conditions combined to reduce nesting + if (advanced_display && !(dwMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING) && + !SetConsoleMode(hConsole, dwMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING)) { + advanced_display = false; + } + // Set console output codepage to UTF8 + SetConsoleOutputCP(CP_UTF8); + } + HANDLE hConIn = GetStdHandle(STD_INPUT_HANDLE); + if (hConIn != INVALID_HANDLE_VALUE && GetConsoleMode(hConIn, &dwMode)) { + // Set console input codepage to UTF16 + _setmode(_fileno(stdin), _O_WTEXT); + + // Set ICANON (ENABLE_LINE_INPUT) and ECHO (ENABLE_ECHO_INPUT) + if (simple_io) { + dwMode |= ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT; + } else { + dwMode &= ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT); + } + if (!SetConsoleMode(hConIn, dwMode)) { + simple_io = true; + } + } + if (simple_io) { + _setmode(_fileno(stdin), _O_U8TEXT); + } +#else + // POSIX-specific console initialization + if (!simple_io) { + struct termios new_termios; + tcgetattr(STDIN_FILENO, &initial_state); + new_termios = initial_state; + new_termios.c_lflag &= ~(ICANON | ECHO); + new_termios.c_cc[VMIN] = 1; + new_termios.c_cc[VTIME] = 0; + tcsetattr(STDIN_FILENO, TCSANOW, &new_termios); + + tty = fopen("/dev/tty", "w+"); + if (tty != nullptr) { + out = tty; + } + } + + setlocale(LC_ALL, ""); +#endif + } + + void cleanup() { + // Reset console display + set_display(reset); + +#if !defined(_WIN32) + // Restore settings on POSIX systems + if (!simple_io) { + if (tty != nullptr) { + out = stdout; + fclose(tty); + tty = nullptr; + } + tcsetattr(STDIN_FILENO, TCSANOW, &initial_state); + } +#endif + } + + // + // Display and IO + // + + // Keep track of current display and only emit ANSI code if it changes + void set_display(display_t display) { + if (advanced_display && current_display != display) { + fflush(stdout); + switch(display) { + case reset: + fprintf(out, ANSI_COLOR_RESET); + break; + case prompt: + fprintf(out, ANSI_COLOR_YELLOW); + break; + case user_input: + fprintf(out, ANSI_BOLD ANSI_COLOR_GREEN); + break; + case error: + fprintf(out, ANSI_BOLD ANSI_COLOR_RED); + } + current_display = display; + fflush(out); + } + } + + static char32_t getchar32() { +#if defined(_WIN32) + HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE); + wchar_t high_surrogate = 0; + + while (true) { + INPUT_RECORD record; + DWORD count; + if (!ReadConsoleInputW(hConsole, &record, 1, &count) || count == 0) { + return WEOF; + } + + if (record.EventType == KEY_EVENT && record.Event.KeyEvent.bKeyDown) { + wchar_t wc = record.Event.KeyEvent.uChar.UnicodeChar; + if (wc == 0) { + continue; + } + + if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate + high_surrogate = wc; + continue; + } + if ((wc >= 0xDC00) && (wc <= 0xDFFF)) { // Check if wc is a low surrogate + if (high_surrogate != 0) { // Check if we have a high surrogate + return ((high_surrogate - 0xD800) << 10) + (wc - 0xDC00) + 0x10000; + } + } + + high_surrogate = 0; // Reset the high surrogate + return static_cast(wc); + } + } +#else + wchar_t wc = getwchar(); + if (static_cast(wc) == WEOF) { + return WEOF; + } + +#if WCHAR_MAX == 0xFFFF + if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate + wchar_t low_surrogate = getwchar(); + if ((low_surrogate >= 0xDC00) && (low_surrogate <= 0xDFFF)) { // Check if the next wchar is a low surrogate + return (static_cast(wc & 0x03FF) << 10) + (low_surrogate & 0x03FF) + 0x10000; + } + } + if ((wc >= 0xD800) && (wc <= 0xDFFF)) { // Invalid surrogate pair + return 0xFFFD; // Return the replacement character U+FFFD + } +#endif + + return static_cast(wc); +#endif + } + + static void pop_cursor() { +#if defined(_WIN32) + if (hConsole != NULL) { + CONSOLE_SCREEN_BUFFER_INFO bufferInfo; + GetConsoleScreenBufferInfo(hConsole, &bufferInfo); + + COORD newCursorPosition = bufferInfo.dwCursorPosition; + if (newCursorPosition.X == 0) { + newCursorPosition.X = bufferInfo.dwSize.X - 1; + newCursorPosition.Y -= 1; + } else { + newCursorPosition.X -= 1; + } + + SetConsoleCursorPosition(hConsole, newCursorPosition); + return; + } +#endif + putc('\b', out); + } + + static int estimateWidth(char32_t codepoint) { +#if defined(_WIN32) + (void)codepoint; + return 1; +#else + return wcwidth(codepoint); +#endif + } + + static int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) { +#if defined(_WIN32) + CONSOLE_SCREEN_BUFFER_INFO bufferInfo; + if (!GetConsoleScreenBufferInfo(hConsole, &bufferInfo)) { + // go with the default + return expectedWidth; + } + COORD initialPosition = bufferInfo.dwCursorPosition; + DWORD nNumberOfChars = length; + WriteConsole(hConsole, utf8_codepoint, nNumberOfChars, &nNumberOfChars, NULL); + + CONSOLE_SCREEN_BUFFER_INFO newBufferInfo; + GetConsoleScreenBufferInfo(hConsole, &newBufferInfo); + + // Figure out our real position if we're in the last column + if (utf8_codepoint[0] != 0x09 && initialPosition.X == newBufferInfo.dwSize.X - 1) { + DWORD nNumberOfChars; + WriteConsole(hConsole, &" \b", 2, &nNumberOfChars, NULL); + GetConsoleScreenBufferInfo(hConsole, &newBufferInfo); + } + + int width = newBufferInfo.dwCursorPosition.X - initialPosition.X; + if (width < 0) { + width += newBufferInfo.dwSize.X; + } + return width; +#else + // We can trust expectedWidth if we've got one + if (expectedWidth >= 0 || tty == nullptr) { + fwrite(utf8_codepoint, length, 1, out); + return expectedWidth; + } + + fputs("\033[6n", tty); // Query cursor position + int x1; + int y1; + int x2; + int y2; + int results = 0; + results = fscanf(tty, "\033[%d;%dR", &y1, &x1); + + fwrite(utf8_codepoint, length, 1, tty); + + fputs("\033[6n", tty); // Query cursor position + results += fscanf(tty, "\033[%d;%dR", &y2, &x2); + + if (results != 4) { + return expectedWidth; + } + + int width = x2 - x1; + if (width < 0) { + // Calculate the width considering text wrapping + struct winsize w; + ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); + width += w.ws_col; + } + return width; +#endif + } + + static void replace_last(char ch) { +#if defined(_WIN32) + pop_cursor(); + put_codepoint(&ch, 1, 1); +#else + fprintf(out, "\b%c", ch); +#endif + } + + static void append_utf8(char32_t ch, std::string & out) { + if (ch <= 0x7F) { + out.push_back(static_cast(ch)); + } else if (ch <= 0x7FF) { + out.push_back(static_cast(0xC0 | ((ch >> 6) & 0x1F))); + out.push_back(static_cast(0x80 | (ch & 0x3F))); + } else if (ch <= 0xFFFF) { + out.push_back(static_cast(0xE0 | ((ch >> 12) & 0x0F))); + out.push_back(static_cast(0x80 | ((ch >> 6) & 0x3F))); + out.push_back(static_cast(0x80 | (ch & 0x3F))); + } else if (ch <= 0x10FFFF) { + out.push_back(static_cast(0xF0 | ((ch >> 18) & 0x07))); + out.push_back(static_cast(0x80 | ((ch >> 12) & 0x3F))); + out.push_back(static_cast(0x80 | ((ch >> 6) & 0x3F))); + out.push_back(static_cast(0x80 | (ch & 0x3F))); + } else { + // Invalid Unicode code point + } + } + + // Helper function to remove the last UTF-8 character from a string + static void pop_back_utf8_char(std::string & line) { + if (line.empty()) { + return; + } + + size_t pos = line.length() - 1; + + // Find the start of the last UTF-8 character (checking up to 4 bytes back) + for (size_t i = 0; i < 3 && pos > 0; ++i, --pos) { + if ((line[pos] & 0xC0) != 0x80) { + break; // Found the start of the character + } + } + line.erase(pos); + } + + static bool readline_advanced(std::string & line, bool multiline_input) { + if (out != stdout) { + fflush(stdout); + } + + line.clear(); + std::vector widths; + bool is_special_char = false; + bool end_of_stream = false; + + char32_t input_char; + while (true) { + fflush(out); // Ensure all output is displayed before waiting for input + input_char = getchar32(); + + if (input_char == '\r' || input_char == '\n') { + break; + } + + if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D*/) { + end_of_stream = true; + break; + } + + if (is_special_char) { + set_display(user_input); + replace_last(line.back()); + is_special_char = false; + } + + if (input_char == '\033') { // Escape sequence + char32_t code = getchar32(); + if (code == '[' || code == 0x1B) { + // Discard the rest of the escape sequence + while ((code = getchar32()) != (char32_t) WEOF) { + if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~') { + break; + } + } + } + } else if (input_char == 0x08 || input_char == 0x7F) { // Backspace + if (!widths.empty()) { + int count; + do { + count = widths.back(); + widths.pop_back(); + // Move cursor back, print space, and move cursor back again + for (int i = 0; i < count; i++) { + replace_last(' '); + pop_cursor(); + } + pop_back_utf8_char(line); + } while (count == 0 && !widths.empty()); + } + } else { + int offset = line.length(); + append_utf8(input_char, line); + int width = put_codepoint(line.c_str() + offset, line.length() - offset, estimateWidth(input_char)); + if (width < 0) { + width = 0; + } + widths.push_back(width); + } + + if (!line.empty() && (line.back() == '\\' || line.back() == '/')) { + set_display(prompt); + replace_last(line.back()); + is_special_char = true; + } + } + + bool has_more = multiline_input; + if (is_special_char) { + replace_last(' '); + pop_cursor(); + + char last = line.back(); + line.pop_back(); + if (last == '\\') { + line += '\n'; + fputc('\n', out); + has_more = !has_more; + } else { + // llama will just eat the single space, it won't act as a space + if (line.length() == 1 && line.back() == ' ') { + line.clear(); + pop_cursor(); + } + has_more = false; + } + } else { + if (end_of_stream) { + has_more = false; + } else { + line += '\n'; + fputc('\n', out); + } + } + + fflush(out); + return has_more; + } + + static bool readline_simple(std::string & line, bool multiline_input) { +#if defined(_WIN32) + std::wstring wline; + if (!std::getline(std::wcin, wline)) { + // Input stream is bad or EOF received + line.clear(); + GenerateConsoleCtrlEvent(CTRL_C_EVENT, 0); + return false; + } + + int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wline[0], (int)wline.size(), NULL, 0, NULL, NULL); + line.resize(size_needed); + WideCharToMultiByte(CP_UTF8, 0, &wline[0], (int)wline.size(), &line[0], size_needed, NULL, NULL); +#else + if (!std::getline(std::cin, line)) { + // Input stream is bad or EOF received + line.clear(); + return false; + } +#endif + if (!line.empty()) { + char last = line.back(); + if (last == '/') { // Always return control on '/' symbol + line.pop_back(); + return false; + } + if (last == '\\') { // '\\' changes the default action + line.pop_back(); + multiline_input = !multiline_input; + } + } + line += '\n'; + + // By default, continue input if multiline_input is set + return multiline_input; + } + + bool readline(std::string & line, bool multiline_input) { + set_display(user_input); + + if (simple_io) { + return readline_simple(line, multiline_input); + } + return readline_advanced(line, multiline_input); + } + +} diff --git a/common/console.h b/common/console.h new file mode 100644 index 0000000000000000000000000000000000000000..ec175269b9d8af48803d0b6e618d008a9ab99b4d --- /dev/null +++ b/common/console.h @@ -0,0 +1,19 @@ +// Console functions + +#pragma once + +#include + +namespace console { + enum display_t { + reset = 0, + prompt, + user_input, + error + }; + + void init(bool use_simple_io, bool use_advanced_display); + void cleanup(); + void set_display(display_t display); + bool readline(std::string & line, bool multiline_input); +} diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dadc18c8b352f678326c8c8da6a506dfdcca05a4 --- /dev/null +++ b/common/json-schema-to-grammar.cpp @@ -0,0 +1,1045 @@ +#include "json-schema-to-grammar.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using json = nlohmann::ordered_json; + +template +static std::string join(Iterator begin, Iterator end, const std::string & separator); + +static std::string repeat(const std::string & str, size_t n); + +static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "") { + auto has_max = max_items != std::numeric_limits::max(); + + if (min_items == 0 && max_items == 1) { + return item_rule + "?"; + } + + if (separator_rule.empty()) { + if (min_items == 1 && !has_max) { + return item_rule + "+"; + } else if (min_items == 0 && !has_max) { + return item_rule + "*"; + } else { + return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}"; + } + } + + auto result = item_rule + " " + build_repetition("(" + separator_rule + " " + item_rule + ")", min_items == 0 ? 0 : min_items - 1, has_max ? max_items - 1 : max_items); + if (min_items == 0) { + result = "(" + result + ")?"; + } + return result; +} + +/* Minimalistic replacement for std::string_view, which is only available from C++17 onwards */ +class string_view { + const std::string & _str; + const size_t _start; + const size_t _end; +public: + string_view(const std::string & str, size_t start = 0, size_t end = std::string::npos) : _str(str), _start(start), _end(end == std::string::npos ? str.length() : end) {} + + size_t size() const { + return _end - _start; + } + + size_t length() const { + return size(); + } + + operator std::string() const { + return str(); + } + + std::string str() const { + return _str.substr(_start, _end - _start); + } + + string_view substr(size_t pos, size_t len = std::string::npos) const { + return string_view(_str, _start + pos, len == std::string::npos ? _end : _start + pos + len); + } + + char operator[](size_t pos) const { + auto index = _start + pos; + if (index >= _end) { + throw std::out_of_range("string_view index out of range"); + } + return _str[_start + pos]; + } + + bool operator==(const string_view & other) const { + std::string this_str = *this; + std::string other_str = other; + return this_str == other_str; + } +}; + +static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) { + auto has_min = min_value != std::numeric_limits::min(); + auto has_max = max_value != std::numeric_limits::max(); + + auto digit_range = [&](char from, char to) { + out << "["; + if (from == to) { + out << from; + } else { + out << from << "-" << to; + } + out << "]"; + }; + auto more_digits = [&](int min_digits, int max_digits) { + out << "[0-9]"; + if (min_digits == max_digits && min_digits == 1) { + return; + } + out << "{"; + out << min_digits; + if (max_digits != min_digits) { + out << ","; + if (max_digits != std::numeric_limits::max()) { + out << max_digits; + } + } + out << "}"; + }; + std::function uniform_range = + [&](const string_view & from, const string_view & to) { + size_t i = 0; + while (i < from.length() && i < to.length() && from[i] == to[i]) { + i++; + } + if (i > 0) { + out << "\"" << from.substr(0, i).str() << "\""; + } + if (i < from.length() && i < to.length()) { + if (i > 0) { + out << " "; + } + auto sub_len = from.length() - i - 1; + if (sub_len > 0) { + auto from_sub = from.substr(i + 1); + auto to_sub = to.substr(i + 1); + auto sub_zeros = repeat("0", sub_len); + auto sub_nines = repeat("9", sub_len); + + auto to_reached = false; + out << "("; + if (from_sub == sub_zeros) { + digit_range(from[i], to[i] - 1); + out << " "; + more_digits(sub_len, sub_len); + } else { + out << "[" << from[i] << "] "; + out << "("; + uniform_range(from_sub, sub_nines); + out << ")"; + if (from[i] < to[i] - 1) { + out << " | "; + if (to_sub == sub_nines) { + digit_range(from[i] + 1, to[i]); + to_reached = true; + } else { + digit_range(from[i] + 1, to[i] - 1); + } + out << " "; + more_digits(sub_len, sub_len); + } + } + if (!to_reached) { + out << " | "; + digit_range(to[i], to[i]); + out << " "; + uniform_range(sub_zeros, to_sub); + } + out << ")"; + } else { + out << "[" << from[i] << "-" << to[i] << "]"; + } + } + }; + + if (has_min && has_max) { + if (min_value < 0 && max_value < 0) { + out << "\"-\" ("; + _build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true); + out << ")"; + return; + } + + if (min_value < 0) { + out << "\"-\" ("; + _build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true); + out << ") | "; + min_value = 0; + } + + auto min_s = std::to_string(min_value); + auto max_s = std::to_string(max_value); + auto min_digits = min_s.length(); + auto max_digits = max_s.length(); + + for (auto digits = min_digits; digits < max_digits; digits++) { + uniform_range(min_s, repeat("9", digits)); + min_s = "1" + repeat("0", digits); + out << " | "; + } + uniform_range(min_s, max_s); + return; + } + + auto less_decimals = std::max(decimals_left - 1, 1); + + if (has_min) { + if (min_value < 0) { + out << "\"-\" ("; + _build_min_max_int(std::numeric_limits::min(), -min_value, out, decimals_left, /* top_level= */ false); + out << ") | [0] | [1-9] "; + more_digits(0, decimals_left - 1); + } else if (min_value == 0) { + if (top_level) { + out << "[0] | [1-9] "; + more_digits(0, less_decimals); + } else { + more_digits(1, decimals_left); + } + } else if (min_value <= 9) { + char c = '0' + min_value; + auto range_start = top_level ? '1' : '0'; + if (c > range_start) { + digit_range(range_start, c - 1); + out << " "; + more_digits(1, less_decimals); + out << " | "; + } + digit_range(c, '9'); + out << " "; + more_digits(0, less_decimals); + } else { + auto min_s = std::to_string(min_value); + auto len = min_s.length(); + auto c = min_s[0]; + + if (c > '1') { + digit_range(top_level ? '1' : '0', c - 1); + out << " "; + more_digits(len, less_decimals); + out << " | "; + } + digit_range(c, c); + out << " ("; + _build_min_max_int(std::stoi(min_s.substr(1)), std::numeric_limits::max(), out, less_decimals, /* top_level= */ false); + out << ")"; + if (c < '9') { + out << " | "; + digit_range(c + 1, '9'); + out << " "; + more_digits(len - 1, less_decimals); + } + } + return; + } + + if (has_max) { + if (max_value >= 0) { + if (top_level) { + out << "\"-\" [1-9] "; + more_digits(0, less_decimals); + out << " | "; + } + _build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true); + } else { + out << "\"-\" ("; + _build_min_max_int(-max_value, std::numeric_limits::max(), out, decimals_left, /* top_level= */ false); + out << ")"; + } + return; + } + + throw std::runtime_error("At least one of min_value or max_value must be set"); +} + +const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}"; + +struct BuiltinRule { + std::string content; + std::vector deps; +}; + +std::unordered_map PRIMITIVE_RULES = { + {"boolean", {"(\"true\" | \"false\") space", {}}}, + {"decimal-part", {"[0-9]{1,16}", {}}}, + {"integral-part", {"[0] | [1-9] [0-9]{0,15}", {}}}, + {"number", {"(\"-\"? integral-part) (\".\" decimal-part)? ([eE] [-+]? integral-part)? space", {"integral-part", "decimal-part"}}}, + {"integer", {"(\"-\"? integral-part) space", {"integral-part"}}}, + {"value", {"object | array | string | number | boolean | null", {"object", "array", "string", "number", "boolean", "null"}}}, + {"object", {"\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space", {"string", "value"}}}, + {"array", {"\"[\" space ( value (\",\" space value)* )? \"]\" space", {"value"}}}, + {"uuid", {"\"\\\"\" [0-9a-fA-F]{8} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{12} \"\\\"\" space", {}}}, + {"char", {"[^\"\\\\\\x7F\\x00-\\x1F] | [\\\\] ([\"\\\\bfnrt] | \"u\" [0-9a-fA-F]{4})", {}}}, + {"string", {"\"\\\"\" char* \"\\\"\" space", {"char"}}}, + {"null", {"\"null\" space", {}}}, +}; + +std::unordered_map STRING_FORMAT_RULES = { + {"date", {"[0-9]{4} \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}}, + {"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9]{3} )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}}, + {"date-time", {"date \"T\" time", {"date", "time"}}}, + {"date-string", {"\"\\\"\" date \"\\\"\" space", {"date"}}}, + {"time-string", {"\"\\\"\" time \"\\\"\" space", {"time"}}}, + {"date-time-string", {"\"\\\"\" date-time \"\\\"\" space", {"date-time"}}} +}; + +static bool is_reserved_name(const std::string & name) { + static std::unordered_set RESERVED_NAMES; + if (RESERVED_NAMES.empty()) { + RESERVED_NAMES.insert("root"); + for (const auto &p : PRIMITIVE_RULES) RESERVED_NAMES.insert(p.first); + for (const auto &p : STRING_FORMAT_RULES) RESERVED_NAMES.insert(p.first); + } + return RESERVED_NAMES.find(name) != RESERVED_NAMES.end(); +} + +std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+"); +std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"]"); +std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]"); +std::unordered_map GRAMMAR_LITERAL_ESCAPES = { + {'\r', "\\r"}, {'\n', "\\n"}, {'"', "\\\""}, {'-', "\\-"}, {']', "\\]"} +}; + +std::unordered_set NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'}; +std::unordered_set ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'}; + +template +std::string join(Iterator begin, Iterator end, const std::string & separator) { + std::ostringstream result; + if (begin != end) { + result << *begin; + for (Iterator it = begin + 1; it != end; ++it) { + result << separator << *it; + } + } + return result.str(); +} + +static std::vector split(const std::string & str, const std::string & delimiter) { + std::vector tokens; + size_t start = 0; + size_t end = str.find(delimiter); + + while (end != std::string::npos) { + tokens.push_back(str.substr(start, end - start)); + start = end + delimiter.length(); + end = str.find(delimiter, start); + } + + tokens.push_back(str.substr(start)); + + return tokens; +} + +static std::string repeat(const std::string & str, size_t n) { + if (n == 0) { + return ""; + } + + std::string result; + result.reserve(str.length() * n); + + for (size_t i = 0; i < n; ++i) { + result += str; + } + + return result; +} + +static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function & replacement) { + std::smatch match; + std::string result; + + std::string::const_iterator searchStart(input.cbegin()); + std::string::const_iterator searchEnd(input.cend()); + + while (std::regex_search(searchStart, searchEnd, match, regex)) { + result.append(searchStart, searchStart + match.position()); + result.append(replacement(match)); + searchStart = match.suffix().first; + } + + result.append(searchStart, searchEnd); + + return result; +} + +static std::string format_literal(const std::string & literal) { + std::string escaped = replacePattern(literal, GRAMMAR_LITERAL_ESCAPE_RE, [&](const std::smatch & match) { + char c = match.str()[0]; + return GRAMMAR_LITERAL_ESCAPES.at(c); + }); + return "\"" + escaped + "\""; +} + +class SchemaConverter { +private: + std::function _fetch_json; + bool _dotall; + std::map _rules; + std::unordered_map _refs; + std::unordered_set _refs_being_resolved; + std::vector _errors; + std::vector _warnings; + + std::string _add_rule(const std::string & name, const std::string & rule) { + std::string esc_name = regex_replace(name, INVALID_RULE_CHARS_RE, "-"); + if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) { + _rules[esc_name] = rule; + return esc_name; + } else { + int i = 0; + while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) { + i++; + } + std::string key = esc_name + std::to_string(i); + _rules[key] = rule; + return key; + } + } + + std::string _generate_union_rule(const std::string & name, const std::vector & alt_schemas) { + std::vector rules; + for (size_t i = 0; i < alt_schemas.size(); i++) { + rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i))); + } + return join(rules.begin(), rules.end(), " | "); + } + + std::string _visit_pattern(const std::string & pattern, const std::string & name) { + if (!(pattern.front() == '^' && pattern.back() == '$')) { + _errors.push_back("Pattern must start with '^' and end with '$'"); + return ""; + } + std::string sub_pattern = pattern.substr(1, pattern.length() - 2); + std::unordered_map sub_rule_ids; + + size_t i = 0; + size_t length = sub_pattern.length(); + + using literal_or_rule = std::pair; + auto to_rule = [&](const literal_or_rule & ls) { + auto is_literal = ls.second; + auto s = ls.first; + return is_literal ? "\"" + s + "\"" : s; + }; + std::function transform = [&]() -> literal_or_rule { + size_t start = i; + std::vector seq; + + auto get_dot = [&]() { + std::string rule; + if (_dotall) { + rule = "[\\U00000000-\\U0010FFFF]"; + } else { + rule = "[^\\x0A\\x0D]"; + } + return _add_rule("dot", rule); + }; + + // Joins the sequence, merging consecutive literals together. + auto join_seq = [&]() { + std::vector ret; + + std::string literal; + auto flush_literal = [&]() { + if (literal.empty()) { + return false; + } + ret.emplace_back(literal, true); + literal.clear(); + return true; + }; + + for (const auto & item : seq) { + auto is_literal = item.second; + if (is_literal) { + literal += item.first; + } else { + flush_literal(); + ret.push_back(item); + } + } + flush_literal(); + + std::vector results; + for (const auto & item : ret) { + results.push_back(to_rule(item)); + } + return std::make_pair(join(results.begin(), results.end(), " "), false); + }; + + while (i < length) { + char c = sub_pattern[i]; + if (c == '.') { + seq.emplace_back(get_dot(), false); + i++; + } else if (c == '(') { + i++; + if (i < length) { + if (sub_pattern[i] == '?') { + _warnings.push_back("Unsupported pattern syntax"); + } + } + seq.emplace_back("(" + to_rule(transform()) + ")", false); + } else if (c == ')') { + i++; + if (start > 0 && sub_pattern[start - 1] != '(') { + _errors.push_back("Unbalanced parentheses"); + } + return join_seq(); + } else if (c == '[') { + std::string square_brackets = std::string(1, c); + i++; + while (i < length && sub_pattern[i] != ']') { + if (sub_pattern[i] == '\\') { + square_brackets += sub_pattern.substr(i, 2); + i += 2; + } else { + square_brackets += sub_pattern[i]; + i++; + } + } + if (i >= length) { + _errors.push_back("Unbalanced square brackets"); + } + square_brackets += ']'; + i++; + seq.emplace_back(square_brackets, false); + } else if (c == '|') { + seq.emplace_back("|", false); + i++; + } else if (c == '*' || c == '+' || c == '?') { + seq.back() = std::make_pair(to_rule(seq.back()) + c, false); + i++; + } else if (c == '{') { + std::string curly_brackets = std::string(1, c); + i++; + while (i < length && sub_pattern[i] != '}') { + curly_brackets += sub_pattern[i]; + i++; + } + if (i >= length) { + _errors.push_back("Unbalanced curly brackets"); + } + curly_brackets += '}'; + i++; + auto nums = split(curly_brackets.substr(1, curly_brackets.length() - 2), ","); + int min_times = 0; + int max_times = std::numeric_limits::max(); + try { + if (nums.size() == 1) { + min_times = max_times = std::stoi(nums[0]); + } else if (nums.size() != 2) { + _errors.push_back("Wrong number of values in curly brackets"); + } else { + if (!nums[0].empty()) { + min_times = std::stoi(nums[0]); + } + if (!nums[1].empty()) { + max_times = std::stoi(nums[1]); + } + } + } catch (const std::invalid_argument & e) { + _errors.push_back("Invalid number in curly brackets"); + return std::make_pair("", false); + } + auto &last = seq.back(); + auto &sub = last.first; + auto sub_is_literal = last.second; + + if (!sub_is_literal) { + std::string & sub_id = sub_rule_ids[sub]; + if (sub_id.empty()) { + sub_id = _add_rule(name + "-" + std::to_string(sub_rule_ids.size()), sub); + } + sub = sub_id; + } + seq.back().first = build_repetition( + sub_is_literal ? "\"" + sub + "\"" : sub, + min_times, + max_times, + "" + ); + seq.back().second = false; + } else { + std::string literal; + auto is_non_literal = [&](char c) { + return NON_LITERAL_SET.find(c) != NON_LITERAL_SET.end(); + }; + while (i < length) { + if (sub_pattern[i] == '\\' && i < length - 1) { + char next = sub_pattern[i + 1]; + if (ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.find(next) != ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.end()) { + i++; + literal += sub_pattern[i]; + i++; + } else { + literal += sub_pattern.substr(i, 2); + i += 2; + } + } else if (sub_pattern[i] == '"') { + literal += "\\\""; + i++; + } else if (!is_non_literal(sub_pattern[i]) && + (i == length - 1 || literal.empty() || sub_pattern[i + 1] == '.' || !is_non_literal(sub_pattern[i + 1]))) { + literal += sub_pattern[i]; + i++; + } else { + break; + } + } + if (!literal.empty()) { + seq.emplace_back(literal, true); + } + } + } + return join_seq(); + }; + return _add_rule(name, "\"\\\"\" (" + to_rule(transform()) + ") \"\\\"\" space"); + } + + /* + Returns a rule that matches a JSON string that is none of the provided strings + + not_strings({"a"}) + -> ["] ( [a] char+ | [^"a] char* )? ["] space + not_strings({"and", "also"}) + -> ["] ( [a] ([l] ([s] ([o] char+ | [^"o] char*) | [^"s] char*) | [n] ([d] char+ | [^"d] char*) | [^"ln] char*) | [^"a] char* )? ["] space + */ + std::string _not_strings(const std::vector & strings) { + + struct TrieNode { + std::map children; + bool is_end_of_string; + + TrieNode() : is_end_of_string(false) {} + + void insert(const std::string & string) { + auto node = this; + for (char c : string) { + node = &node->children[c]; + } + node->is_end_of_string = true; + } + }; + + TrieNode trie; + for (const auto & s : strings) { + trie.insert(s); + } + + std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char")); + std::ostringstream out; + out << "[\"] ( "; + std::function visit = [&](const TrieNode & node) { + std::ostringstream rejects; + auto first = true; + for (const auto & kv : node.children) { + rejects << kv.first; + if (first) { + first = false; + } else { + out << " | "; + } + out << "[" << kv.first << "]"; + if (!kv.second.children.empty()) { + out << " ("; + visit(kv.second); + out << ")"; + } else if (kv.second.is_end_of_string) { + out << " " << char_rule << "+"; + } + } + if (!node.children.empty()) { + if (!first) { + out << " | "; + } + out << "[^\"" << rejects.str() << "] " << char_rule << "*"; + } + }; + visit(trie); + + out << " )"; + if (!trie.is_end_of_string) { + out << "?"; + } + out << " [\"] space"; + return out.str(); + } + + std::string _resolve_ref(const std::string & ref) { + std::string ref_name = ref.substr(ref.find_last_of('/') + 1); + if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) { + _refs_being_resolved.insert(ref); + json resolved = _refs[ref]; + ref_name = visit(resolved, ref_name); + _refs_being_resolved.erase(ref); + } + return ref_name; + } + + std::string _build_object_rule( + const std::vector> & properties, + const std::unordered_set & required, + const std::string & name, + const json & additional_properties) + { + std::vector required_props; + std::vector optional_props; + std::unordered_map prop_kv_rule_names; + std::vector prop_names; + for (const auto & kv : properties) { + const auto &prop_name = kv.first; + const auto &prop_schema = kv.second; + + std::string prop_rule_name = visit(prop_schema, name + (name.empty() ? "" : "-") + prop_name); + prop_kv_rule_names[prop_name] = _add_rule( + name + (name.empty() ? "" : "-") + prop_name + "-kv", + format_literal(json(prop_name).dump()) + " space \":\" space " + prop_rule_name + ); + if (required.find(prop_name) != required.end()) { + required_props.push_back(prop_name); + } else { + optional_props.push_back(prop_name); + } + prop_names.push_back(prop_name); + } + if ((additional_properties.is_boolean() && additional_properties.get()) || additional_properties.is_object()) { + std::string sub_name = name + (name.empty() ? "" : "-") + "additional"; + std::string value_rule = + additional_properties.is_object() ? visit(additional_properties, sub_name + "-value") + : _add_primitive("value", PRIMITIVE_RULES.at("value")); + + auto key_rule = + prop_names.empty() ? _add_primitive("string", PRIMITIVE_RULES.at("string")) + : _add_rule(sub_name + "-k", _not_strings(prop_names)); + std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule); + prop_kv_rule_names["*"] = kv_rule; + optional_props.push_back("*"); + } + + std::string rule = "\"{\" space "; + for (size_t i = 0; i < required_props.size(); i++) { + if (i > 0) { + rule += " \",\" space "; + } + rule += prop_kv_rule_names[required_props[i]]; + } + + if (!optional_props.empty()) { + rule += " ("; + if (!required_props.empty()) { + rule += " \",\" space ( "; + } + + std::function &, bool)> get_recursive_refs = [&](const std::vector & ks, bool first_is_optional) { + std::string res; + if (ks.empty()) { + return res; + } + std::string k = ks[0]; + std::string kv_rule_name = prop_kv_rule_names[k]; + std::string comma_ref = "( \",\" space " + kv_rule_name + " )"; + if (first_is_optional) { + res = comma_ref + (k == "*" ? "*" : "?"); + } else { + res = kv_rule_name + (k == "*" ? " " + comma_ref + "*" : ""); + } + if (ks.size() > 1) { + res += " " + _add_rule( + name + (name.empty() ? "" : "-") + k + "-rest", + get_recursive_refs(std::vector(ks.begin() + 1, ks.end()), true) + ); + } + return res; + }; + + for (size_t i = 0; i < optional_props.size(); i++) { + if (i > 0) { + rule += " | "; + } + rule += get_recursive_refs(std::vector(optional_props.begin() + i, optional_props.end()), false); + } + if (!required_props.empty()) { + rule += " )"; + } + rule += " )?"; + } + + rule += " \"}\" space"; + + return rule; + } + + std::string _add_primitive(const std::string & name, const BuiltinRule & rule) { + auto n = _add_rule(name, rule.content); + for (const auto & dep : rule.deps) { + BuiltinRule dep_rule; + auto it = PRIMITIVE_RULES.find(dep); + if (it == PRIMITIVE_RULES.end()) { + it = STRING_FORMAT_RULES.find(dep); + if (it == STRING_FORMAT_RULES.end()) { + _errors.push_back("Rule " + dep + " not known"); + continue; + } + } + if (_rules.find(dep) == _rules.end()) { + _add_primitive(dep, it->second); + } + } + return n; + } + +public: + SchemaConverter( + const std::function & fetch_json, + bool dotall) + : _fetch_json(fetch_json), _dotall(dotall) + { + _rules["space"] = SPACE_RULE; + } + + void resolve_refs(json & schema, const std::string & url) { + /* + * Resolves all $ref fields in the given schema, fetching any remote schemas, + * replacing each $ref with absolute reference URL and populates _refs with the + * respective referenced (sub)schema dictionaries. + */ + std::function visit_refs = [&](json & n) { + if (n.is_array()) { + for (auto & x : n) { + visit_refs(x); + } + } else if (n.is_object()) { + if (n.contains("$ref")) { + std::string ref = n["$ref"]; + if (_refs.find(ref) == _refs.end()) { + json target; + if (ref.find("https://") == 0) { + std::string base_url = ref.substr(0, ref.find('#')); + auto it = _refs.find(base_url); + if (it != _refs.end()) { + target = it->second; + } else { + // Fetch the referenced schema and resolve its refs + auto referenced = _fetch_json(ref); + resolve_refs(referenced, base_url); + _refs[base_url] = referenced; + } + if (ref.find('#') == std::string::npos || ref.substr(ref.find('#') + 1).empty()) { + return; + } + } else if (ref.find("#/") == 0) { + target = schema; + n["$ref"] = url + ref; + ref = url + ref; + } else { + _errors.push_back("Unsupported ref: " + ref); + return; + } + std::string pointer = ref.substr(ref.find('#') + 1); + std::vector tokens = split(pointer, "/"); + for (size_t i = 1; i < tokens.size(); ++i) { + std::string sel = tokens[i]; + if (target.is_null() || !target.contains(sel)) { + _errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump()); + return; + } + target = target[sel]; + } + _refs[ref] = target; + } + } else { + for (auto & kv : n.items()) { + visit_refs(kv.value()); + } + } + } + }; + + visit_refs(schema); + } + + std::string _generate_constant_rule(const json & value) { + return format_literal(value.dump()); + } + + std::string visit(const json & schema, const std::string & name) { + json schema_type = schema.contains("type") ? schema["type"] : json(); + std::string schema_format = schema.contains("format") ? schema["format"].get() : ""; + std::string rule_name = is_reserved_name(name) ? name + "-" : name.empty() ? "root" : name; + + if (schema.contains("$ref")) { + return _add_rule(rule_name, _resolve_ref(schema["$ref"])); + } else if (schema.contains("oneOf") || schema.contains("anyOf")) { + std::vector alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get>() : schema["anyOf"].get>(); + return _add_rule(rule_name, _generate_union_rule(name, alt_schemas)); + } else if (schema_type.is_array()) { + std::vector schema_types; + for (const auto & t : schema_type) { + json schema_copy(schema); + schema_copy["type"] = t; + schema_types.push_back(schema_copy); + } + return _add_rule(rule_name, _generate_union_rule(name, schema_types)); + } else if (schema.contains("const")) { + return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space"); + } else if (schema.contains("enum")) { + std::vector enum_values; + for (const auto & v : schema["enum"]) { + enum_values.push_back(_generate_constant_rule(v)); + } + return _add_rule(rule_name, "(" + join(enum_values.begin(), enum_values.end(), " | ") + ") space"); + } else if ((schema_type.is_null() || schema_type == "object") + && (schema.contains("properties") || + (schema.contains("additionalProperties") && schema["additionalProperties"] != true))) { + std::unordered_set required; + if (schema.contains("required") && schema["required"].is_array()) { + for (const auto & item : schema["required"]) { + if (item.is_string()) { + required.insert(item.get()); + } + } + } + std::vector> properties; + if (schema.contains("properties")) { + for (const auto & prop : schema["properties"].items()) { + properties.emplace_back(prop.key(), prop.value()); + } + } + return _add_rule(rule_name, + _build_object_rule( + properties, required, name, + schema.contains("additionalProperties") ? schema["additionalProperties"] : json())); + } else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) { + std::unordered_set required; + std::vector> properties; + std::string hybrid_name = name; + std::function add_component = [&](const json & comp_schema, bool is_required) { + if (comp_schema.contains("$ref")) { + add_component(_refs[comp_schema["$ref"]], is_required); + } else if (comp_schema.contains("properties")) { + for (const auto & prop : comp_schema["properties"].items()) { + properties.emplace_back(prop.key(), prop.value()); + if (is_required) { + required.insert(prop.key()); + } + } + } else { + // todo warning + } + }; + for (auto & t : schema["allOf"]) { + if (t.contains("anyOf")) { + for (auto & tt : t["anyOf"]) { + add_component(tt, false); + } + } else { + add_component(t, true); + } + } + return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json())); + } else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) { + json items = schema.contains("items") ? schema["items"] : schema["prefixItems"]; + if (items.is_array()) { + std::string rule = "\"[\" space "; + for (size_t i = 0; i < items.size(); i++) { + if (i > 0) { + rule += " \",\" space "; + } + rule += visit(items[i], name + (name.empty() ? "" : "-") + "tuple-" + std::to_string(i)); + } + rule += " \"]\" space"; + return _add_rule(rule_name, rule); + } else { + std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item"); + int min_items = schema.contains("minItems") ? schema["minItems"].get() : 0; + json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json(); + int max_items = max_items_json.is_number_integer() ? max_items_json.get() : std::numeric_limits::max(); + + return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space"); + } + } else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) { + return _visit_pattern(schema["pattern"], rule_name); + } else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) { + return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid")); + } else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) { + auto prim_name = schema_format + "-string"; + return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name))); + } else if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) { + std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char")); + int min_len = schema.contains("minLength") ? schema["minLength"].get() : 0; + int max_len = schema.contains("maxLength") ? schema["maxLength"].get() : std::numeric_limits::max(); + return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space"); + } else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) { + int min_value = std::numeric_limits::min(); + int max_value = std::numeric_limits::max(); + if (schema.contains("minimum")) { + min_value = schema["minimum"].get(); + } else if (schema.contains("exclusiveMinimum")) { + min_value = schema["exclusiveMinimum"].get() + 1; + } + if (schema.contains("maximum")) { + max_value = schema["maximum"].get(); + } else if (schema.contains("exclusiveMaximum")) { + max_value = schema["exclusiveMaximum"].get() - 1; + } + std::stringstream out; + out << "("; + _build_min_max_int(min_value, max_value, out); + out << ") space"; + return _add_rule(rule_name, out.str()); + } else if (schema.empty() || schema_type == "object") { + return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object"))); + } else { + if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get()) == PRIMITIVE_RULES.end()) { + _errors.push_back("Unrecognized schema: " + schema.dump()); + return ""; + } + // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero + return _add_primitive(rule_name == "root" ? "root" : schema_type.get(), PRIMITIVE_RULES.at(schema_type.get())); + } + } + + void check_errors() { + if (!_errors.empty()) { + throw std::runtime_error("JSON schema conversion failed:\n" + join(_errors.begin(), _errors.end(), "\n")); + } + if (!_warnings.empty()) { + fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", join(_warnings.begin(), _warnings.end(), "; ").c_str()); + } + } + + std::string format_grammar() { + std::stringstream ss; + for (const auto & kv : _rules) { + ss << kv.first << " ::= " << kv.second << std::endl; + } + return ss.str(); + } +}; + +std::string json_schema_to_grammar(const json & schema) { + SchemaConverter converter([](const std::string &) { return json::object(); }, /* dotall= */ false); + auto copy = schema; + converter.resolve_refs(copy, "input"); + converter.visit(copy, ""); + converter.check_errors(); + return converter.format_grammar(); +} diff --git a/common/json-schema-to-grammar.h b/common/json-schema-to-grammar.h new file mode 100644 index 0000000000000000000000000000000000000000..41623b34645287ab14a24bcb729c79141129cf77 --- /dev/null +++ b/common/json-schema-to-grammar.h @@ -0,0 +1,8 @@ +#pragma once + +#include "ggml.h" +// Change JSON_ASSERT from assert() to GGML_ASSERT: +#define JSON_ASSERT GGML_ASSERT +#include "json.hpp" + +std::string json_schema_to_grammar(const nlohmann::ordered_json& schema); diff --git a/common/json.hpp b/common/json.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a858728c4ceb8a7048da422a96d38d61d3062d62 --- /dev/null +++ b/common/json.hpp @@ -0,0 +1,24766 @@ +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + +/****************************************************************************\ + * Note on documentation: The source files contain links to the online * + * documentation of the public API at https://json.nlohmann.me. This URL * + * contains the most recent documentation and should also be applicable to * + * previous versions; documentation for deprecated functions is not * + * removed, but marked deprecated. See "Generate documentation" section in * + * file docs/README.md. * +\****************************************************************************/ + +#ifndef INCLUDE_NLOHMANN_JSON_HPP_ +#define INCLUDE_NLOHMANN_JSON_HPP_ + +#include // all_of, find, for_each +#include // nullptr_t, ptrdiff_t, size_t +#include // hash, less +#include // initializer_list +#ifndef JSON_NO_IO + #include // istream, ostream +#endif // JSON_NO_IO +#include // random_access_iterator_tag +#include // unique_ptr +#include // string, stoi, to_string +#include // declval, forward, move, pair, swap +#include // vector + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// This file contains all macro definitions affecting or depending on the ABI + +#ifndef JSON_SKIP_LIBRARY_VERSION_CHECK + #if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH) + #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 11 || NLOHMANN_JSON_VERSION_PATCH != 3 + #warning "Already included a different version of the library!" + #endif + #endif +#endif + +#define NLOHMANN_JSON_VERSION_MAJOR 3 // NOLINT(modernize-macro-to-enum) +#define NLOHMANN_JSON_VERSION_MINOR 11 // NOLINT(modernize-macro-to-enum) +#define NLOHMANN_JSON_VERSION_PATCH 3 // NOLINT(modernize-macro-to-enum) + +#ifndef JSON_DIAGNOSTICS + #define JSON_DIAGNOSTICS 0 +#endif + +#ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON + #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0 +#endif + +#if JSON_DIAGNOSTICS + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS _diag +#else + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS +#endif + +#if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON + #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp +#else + #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_NO_VERSION + #define NLOHMANN_JSON_NAMESPACE_NO_VERSION 0 +#endif + +// Construct the namespace ABI tags component +#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b +#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \ + NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) + +#define NLOHMANN_JSON_ABI_TAGS \ + NLOHMANN_JSON_ABI_TAGS_CONCAT( \ + NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \ + NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON) + +// Construct the namespace version component +#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \ + _v ## major ## _ ## minor ## _ ## patch +#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(major, minor, patch) \ + NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) + +#if NLOHMANN_JSON_NAMESPACE_NO_VERSION +#define NLOHMANN_JSON_NAMESPACE_VERSION +#else +#define NLOHMANN_JSON_NAMESPACE_VERSION \ + NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(NLOHMANN_JSON_VERSION_MAJOR, \ + NLOHMANN_JSON_VERSION_MINOR, \ + NLOHMANN_JSON_VERSION_PATCH) +#endif + +// Combine namespace components +#define NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) a ## b +#define NLOHMANN_JSON_NAMESPACE_CONCAT(a, b) \ + NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) + +#ifndef NLOHMANN_JSON_NAMESPACE +#define NLOHMANN_JSON_NAMESPACE \ + nlohmann::NLOHMANN_JSON_NAMESPACE_CONCAT( \ + NLOHMANN_JSON_ABI_TAGS, \ + NLOHMANN_JSON_NAMESPACE_VERSION) +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_BEGIN +#define NLOHMANN_JSON_NAMESPACE_BEGIN \ + namespace nlohmann \ + { \ + inline namespace NLOHMANN_JSON_NAMESPACE_CONCAT( \ + NLOHMANN_JSON_ABI_TAGS, \ + NLOHMANN_JSON_NAMESPACE_VERSION) \ + { +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_END +#define NLOHMANN_JSON_NAMESPACE_END \ + } /* namespace (inline namespace) NOLINT(readability/namespace) */ \ + } // namespace nlohmann +#endif + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // transform +#include // array +#include // forward_list +#include // inserter, front_inserter, end +#include // map +#include // string +#include // tuple, make_tuple +#include // is_arithmetic, is_same, is_enum, underlying_type, is_convertible +#include // unordered_map +#include // pair, declval +#include // valarray + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // nullptr_t +#include // exception +#if JSON_DIAGNOSTICS + #include // accumulate +#endif +#include // runtime_error +#include // to_string +#include // vector + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // array +#include // size_t +#include // uint8_t +#include // string + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // declval, pair +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template struct make_void +{ + using type = void; +}; +template using void_t = typename make_void::type; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +// https://en.cppreference.com/w/cpp/experimental/is_detected +struct nonesuch +{ + nonesuch() = delete; + ~nonesuch() = delete; + nonesuch(nonesuch const&) = delete; + nonesuch(nonesuch const&&) = delete; + void operator=(nonesuch const&) = delete; + void operator=(nonesuch&&) = delete; +}; + +template class Op, + class... Args> +struct detector +{ + using value_t = std::false_type; + using type = Default; +}; + +template class Op, class... Args> +struct detector>, Op, Args...> +{ + using value_t = std::true_type; + using type = Op; +}; + +template class Op, class... Args> +using is_detected = typename detector::value_t; + +template class Op, class... Args> +struct is_detected_lazy : is_detected { }; + +template class Op, class... Args> +using detected_t = typename detector::type; + +template class Op, class... Args> +using detected_or = detector; + +template class Op, class... Args> +using detected_or_t = typename detected_or::type; + +template class Op, class... Args> +using is_detected_exact = std::is_same>; + +template class Op, class... Args> +using is_detected_convertible = + std::is_convertible, To>; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + + +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2016-2021 Evan Nemerson +// SPDX-License-Identifier: MIT + +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + */ + +#if !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < 15) +#if defined(JSON_HEDLEY_VERSION) + #undef JSON_HEDLEY_VERSION +#endif +#define JSON_HEDLEY_VERSION 15 + +#if defined(JSON_HEDLEY_STRINGIFY_EX) + #undef JSON_HEDLEY_STRINGIFY_EX +#endif +#define JSON_HEDLEY_STRINGIFY_EX(x) #x + +#if defined(JSON_HEDLEY_STRINGIFY) + #undef JSON_HEDLEY_STRINGIFY +#endif +#define JSON_HEDLEY_STRINGIFY(x) JSON_HEDLEY_STRINGIFY_EX(x) + +#if defined(JSON_HEDLEY_CONCAT_EX) + #undef JSON_HEDLEY_CONCAT_EX +#endif +#define JSON_HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(JSON_HEDLEY_CONCAT) + #undef JSON_HEDLEY_CONCAT +#endif +#define JSON_HEDLEY_CONCAT(a,b) JSON_HEDLEY_CONCAT_EX(a,b) + +#if defined(JSON_HEDLEY_CONCAT3_EX) + #undef JSON_HEDLEY_CONCAT3_EX +#endif +#define JSON_HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(JSON_HEDLEY_CONCAT3) + #undef JSON_HEDLEY_CONCAT3 +#endif +#define JSON_HEDLEY_CONCAT3(a,b,c) JSON_HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(JSON_HEDLEY_VERSION_ENCODE) + #undef JSON_HEDLEY_VERSION_ENCODE +#endif +#define JSON_HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(JSON_HEDLEY_VERSION_DECODE_MAJOR) + #undef JSON_HEDLEY_VERSION_DECODE_MAJOR +#endif +#define JSON_HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(JSON_HEDLEY_VERSION_DECODE_MINOR) + #undef JSON_HEDLEY_VERSION_DECODE_MINOR +#endif +#define JSON_HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(JSON_HEDLEY_VERSION_DECODE_REVISION) + #undef JSON_HEDLEY_VERSION_DECODE_REVISION +#endif +#define JSON_HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(JSON_HEDLEY_GNUC_VERSION) + #undef JSON_HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) + #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) + #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(JSON_HEDLEY_GNUC_VERSION_CHECK) + #undef JSON_HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_GNUC_VERSION) + #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GNUC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_MSVC_VERSION) + #undef JSON_HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(JSON_HEDLEY_MSVC_VERSION_CHECK) + #undef JSON_HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(JSON_HEDLEY_MSVC_VERSION) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(JSON_HEDLEY_INTEL_VERSION) + #undef JSON_HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) + #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) + #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(JSON_HEDLEY_INTEL_VERSION_CHECK) + #undef JSON_HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_INTEL_VERSION) + #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_INTEL_CL_VERSION) + #undef JSON_HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) + #define JSON_HEDLEY_INTEL_CL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(JSON_HEDLEY_INTEL_CL_VERSION_CHECK) + #undef JSON_HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_INTEL_CL_VERSION) + #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_CL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_PGI_VERSION) + #undef JSON_HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) + #define JSON_HEDLEY_PGI_VERSION JSON_HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(JSON_HEDLEY_PGI_VERSION_CHECK) + #undef JSON_HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_PGI_VERSION) + #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PGI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_SUNPRO_VERSION) + #undef JSON_HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(JSON_HEDLEY_SUNPRO_VERSION_CHECK) + #undef JSON_HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_SUNPRO_VERSION) + #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_SUNPRO_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) + #undef JSON_HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) + #define JSON_HEDLEY_EMSCRIPTEN_VERSION JSON_HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK) + #undef JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) + #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_EMSCRIPTEN_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_ARM_VERSION) + #undef JSON_HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) + #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) + #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(JSON_HEDLEY_ARM_VERSION_CHECK) + #undef JSON_HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_ARM_VERSION) + #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_ARM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_IBM_VERSION) + #undef JSON_HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(JSON_HEDLEY_IBM_VERSION_CHECK) + #undef JSON_HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_IBM_VERSION) + #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IBM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_VERSION) + #undef JSON_HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +#if (__TI_COMPILER_VERSION__ >= 16000000) + #define JSON_HEDLEY_TI_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif +#endif + +#if defined(JSON_HEDLEY_TI_VERSION_CHECK) + #undef JSON_HEDLEY_TI_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_VERSION) + #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL2000_VERSION) + #undef JSON_HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) + #define JSON_HEDLEY_TI_CL2000_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL2000_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL2000_VERSION) + #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL2000_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL430_VERSION) + #undef JSON_HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) + #define JSON_HEDLEY_TI_CL430_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL430_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL430_VERSION) + #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL430_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION) + #undef JSON_HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) + #define JSON_HEDLEY_TI_ARMCL_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION_CHECK) + #undef JSON_HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION) + #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_ARMCL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL6X_VERSION) + #undef JSON_HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) + #define JSON_HEDLEY_TI_CL6X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL6X_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL6X_VERSION) + #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL6X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL7X_VERSION) + #undef JSON_HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) + #define JSON_HEDLEY_TI_CL7X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL7X_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL7X_VERSION) + #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL7X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION) + #undef JSON_HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) + #define JSON_HEDLEY_TI_CLPRU_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION) + #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CLPRU_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_CRAY_VERSION) + #undef JSON_HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) + #if defined(_RELEASE_PATCHLEVEL) + #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) + #else + #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) + #endif +#endif + +#if defined(JSON_HEDLEY_CRAY_VERSION_CHECK) + #undef JSON_HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_CRAY_VERSION) + #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_CRAY_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_IAR_VERSION) + #undef JSON_HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) + #if __VER__ > 1000 + #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) + #else + #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) + #endif +#endif + +#if defined(JSON_HEDLEY_IAR_VERSION_CHECK) + #undef JSON_HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_IAR_VERSION) + #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IAR_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TINYC_VERSION) + #undef JSON_HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) + #define JSON_HEDLEY_TINYC_VERSION JSON_HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(JSON_HEDLEY_TINYC_VERSION_CHECK) + #undef JSON_HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TINYC_VERSION) + #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TINYC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_DMC_VERSION) + #undef JSON_HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) + #define JSON_HEDLEY_DMC_VERSION JSON_HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(JSON_HEDLEY_DMC_VERSION_CHECK) + #undef JSON_HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_DMC_VERSION) + #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_DMC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_COMPCERT_VERSION) + #undef JSON_HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) + #define JSON_HEDLEY_COMPCERT_VERSION JSON_HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(JSON_HEDLEY_COMPCERT_VERSION_CHECK) + #undef JSON_HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_COMPCERT_VERSION) + #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_COMPCERT_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_PELLES_VERSION) + #undef JSON_HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) + #define JSON_HEDLEY_PELLES_VERSION JSON_HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(JSON_HEDLEY_PELLES_VERSION_CHECK) + #undef JSON_HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_PELLES_VERSION) + #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PELLES_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_MCST_LCC_VERSION) + #undef JSON_HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) + #define JSON_HEDLEY_MCST_LCC_VERSION JSON_HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(JSON_HEDLEY_MCST_LCC_VERSION_CHECK) + #undef JSON_HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_MCST_LCC_VERSION) + #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_MCST_LCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_GCC_VERSION) + #undef JSON_HEDLEY_GCC_VERSION +#endif +#if \ + defined(JSON_HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(JSON_HEDLEY_INTEL_VERSION) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_ARM_VERSION) && \ + !defined(JSON_HEDLEY_CRAY_VERSION) && \ + !defined(JSON_HEDLEY_TI_VERSION) && \ + !defined(JSON_HEDLEY_TI_ARMCL_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL430_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL2000_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL6X_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL7X_VERSION) && \ + !defined(JSON_HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(JSON_HEDLEY_MCST_LCC_VERSION) + #define JSON_HEDLEY_GCC_VERSION JSON_HEDLEY_GNUC_VERSION +#endif + +#if defined(JSON_HEDLEY_GCC_VERSION_CHECK) + #undef JSON_HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_GCC_VERSION) + #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(JSON_HEDLEY_IAR_VERSION) || JSON_HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) + #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) + #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS) + #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_IAR_VERSION) && \ + (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(JSON_HEDLEY_MSVC_VERSION) || JSON_HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) + #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) + #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_BUILTIN) + #undef JSON_HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else + #define JSON_HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_BUILTIN) + #undef JSON_HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else + #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_BUILTIN) + #undef JSON_HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else + #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_FEATURE) + #undef JSON_HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else + #define JSON_HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_FEATURE) + #undef JSON_HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else + #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_FEATURE) + #undef JSON_HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else + #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_EXTENSION) + #undef JSON_HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else + #define JSON_HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_EXTENSION) + #undef JSON_HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else + #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_EXTENSION) + #undef JSON_HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else + #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_WARNING) + #undef JSON_HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else + #define JSON_HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_WARNING) + #undef JSON_HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else + #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_WARNING) + #undef JSON_HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else + #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) + #define JSON_HEDLEY_PRAGMA(value) _Pragma(#value) +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_PRAGMA(value) __pragma(value) +#else + #define JSON_HEDLEY_PRAGMA(value) +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_PUSH) + #undef JSON_HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(JSON_HEDLEY_DIAGNOSTIC_POP) + #undef JSON_HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) + #define JSON_HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else + #define JSON_HEDLEY_DIAGNOSTIC_PUSH + #define JSON_HEDLEY_DIAGNOSTIC_POP +#endif + +/* JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat") +# if JSON_HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if JSON_HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(JSON_HEDLEY_CONST_CAST) + #undef JSON_HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define JSON_HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + JSON_HEDLEY_HAS_WARNING("-Wcast-qual") || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + JSON_HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define JSON_HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_REINTERPRET_CAST) + #undef JSON_HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else + #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_STATIC_CAST) + #undef JSON_HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else + #define JSON_HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_CPP_CAST) + #undef JSON_HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if JSON_HEDLEY_HAS_WARNING("-Wold-style-cast") +# define JSON_HEDLEY_CPP_CAST(T, expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# elif JSON_HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define JSON_HEDLEY_CPP_CAST(T, expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define JSON_HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wdeprecated-declarations") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-attributes") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wcast-qual") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunused-function") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(1,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(JSON_HEDLEY_DEPRECATED) + #undef JSON_HEDLEY_DEPRECATED +#endif +#if defined(JSON_HEDLEY_DEPRECATED_FOR) + #undef JSON_HEDLEY_DEPRECATED_FOR +#endif +#if \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (JSON_HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(JSON_HEDLEY_IAR_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) + #define JSON_HEDLEY_DEPRECATED(since) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DEPRECATED(since) _Pragma("deprecated") + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else + #define JSON_HEDLEY_DEPRECATED(since) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(JSON_HEDLEY_UNAVAILABLE) + #undef JSON_HEDLEY_UNAVAILABLE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(warning) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else + #define JSON_HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT) + #undef JSON_HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT_MSG) + #undef JSON_HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) + #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) + #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ + #define JSON_HEDLEY_WARN_UNUSED_RESULT _Check_return_ + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else + #define JSON_HEDLEY_WARN_UNUSED_RESULT + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(JSON_HEDLEY_SENTINEL) + #undef JSON_HEDLEY_SENTINEL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else + #define JSON_HEDLEY_SENTINEL(position) +#endif + +#if defined(JSON_HEDLEY_NO_RETURN) + #undef JSON_HEDLEY_NO_RETURN +#endif +#if JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_NO_RETURN __noreturn +#elif \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + #define JSON_HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) + #define JSON_HEDLEY_NO_RETURN JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) + #define JSON_HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) + #define JSON_HEDLEY_NO_RETURN __attribute((noreturn)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) + #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) +#else + #define JSON_HEDLEY_NO_RETURN +#endif + +#if defined(JSON_HEDLEY_NO_ESCAPE) + #undef JSON_HEDLEY_NO_ESCAPE +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(noescape) + #define JSON_HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else + #define JSON_HEDLEY_NO_ESCAPE +#endif + +#if defined(JSON_HEDLEY_UNREACHABLE) + #undef JSON_HEDLEY_UNREACHABLE +#endif +#if defined(JSON_HEDLEY_UNREACHABLE_RETURN) + #undef JSON_HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(JSON_HEDLEY_ASSUME) + #undef JSON_HEDLEY_ASSUME +#endif +#if \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_ASSUME(expr) __assume(expr) +#elif JSON_HEDLEY_HAS_BUILTIN(__builtin_assume) + #define JSON_HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) + #if defined(__cplusplus) + #define JSON_HEDLEY_ASSUME(expr) std::_nassert(expr) + #else + #define JSON_HEDLEY_ASSUME(expr) _nassert(expr) + #endif +#endif +#if \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(JSON_HEDLEY_ARM_VERSION))) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(JSON_HEDLEY_ASSUME) + #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0) +#endif +#if !defined(JSON_HEDLEY_ASSUME) + #if defined(JSON_HEDLEY_UNREACHABLE) + #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (JSON_HEDLEY_UNREACHABLE(), 1))) + #else + #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, expr) + #endif +#endif +#if defined(JSON_HEDLEY_UNREACHABLE) + #if \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (JSON_HEDLEY_STATIC_CAST(void, JSON_HEDLEY_ASSUME(0)), (value)) + #else + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) JSON_HEDLEY_UNREACHABLE() + #endif +#else + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(JSON_HEDLEY_UNREACHABLE) + #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0) +#endif + +JSON_HEDLEY_DIAGNOSTIC_PUSH +#if JSON_HEDLEY_HAS_WARNING("-Wpedantic") + #pragma clang diagnostic ignored "-Wpedantic" +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) + #pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if JSON_HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) + #if defined(__clang__) + #pragma clang diagnostic ignored "-Wvariadic-macros" + #elif defined(JSON_HEDLEY_GCC_VERSION) + #pragma GCC diagnostic ignored "-Wvariadic-macros" + #endif +#endif +#if defined(JSON_HEDLEY_NON_NULL) + #undef JSON_HEDLEY_NON_NULL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define JSON_HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else + #define JSON_HEDLEY_NON_NULL(...) +#endif +JSON_HEDLEY_DIAGNOSTIC_POP + +#if defined(JSON_HEDLEY_PRINTF_FORMAT) + #undef JSON_HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(format) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(6,0,0) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(JSON_HEDLEY_CONSTEXPR) + #undef JSON_HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) + #if __cplusplus >= 201103L + #define JSON_HEDLEY_CONSTEXPR JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) + #endif +#endif +#if !defined(JSON_HEDLEY_CONSTEXPR) + #define JSON_HEDLEY_CONSTEXPR +#endif + +#if defined(JSON_HEDLEY_PREDICT) + #undef JSON_HEDLEY_PREDICT +#endif +#if defined(JSON_HEDLEY_LIKELY) + #undef JSON_HEDLEY_LIKELY +#endif +#if defined(JSON_HEDLEY_UNLIKELY) + #undef JSON_HEDLEY_UNLIKELY +#endif +#if defined(JSON_HEDLEY_UNPREDICTABLE) + #undef JSON_HEDLEY_UNPREDICTABLE +#endif +#if JSON_HEDLEY_HAS_BUILTIN(__builtin_unpredictable) + #define JSON_HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(JSON_HEDLEY_PGI_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define JSON_HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (JSON_HEDLEY_STATIC_CAST(void, expected), (expr))) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define JSON_HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define JSON_HEDLEY_PREDICT(expr, expected, probability) (JSON_HEDLEY_STATIC_CAST(void, expected), (expr)) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define JSON_HEDLEY_LIKELY(expr) (!!(expr)) +# define JSON_HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(JSON_HEDLEY_UNPREDICTABLE) + #define JSON_HEDLEY_UNPREDICTABLE(expr) JSON_HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(JSON_HEDLEY_MALLOC) + #undef JSON_HEDLEY_MALLOC +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(malloc) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_MALLOC __attribute__((__malloc__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_MALLOC __declspec(restrict) +#else + #define JSON_HEDLEY_MALLOC +#endif + +#if defined(JSON_HEDLEY_PURE) + #undef JSON_HEDLEY_PURE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(pure) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PURE __attribute__((__pure__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define JSON_HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define JSON_HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define JSON_HEDLEY_PURE +#endif + +#if defined(JSON_HEDLEY_CONST) + #undef JSON_HEDLEY_CONST +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(const) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_CONST __attribute__((__const__)) +#elif \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_CONST _Pragma("no_side_effect") +#else + #define JSON_HEDLEY_CONST JSON_HEDLEY_PURE +#endif + +#if defined(JSON_HEDLEY_RESTRICT) + #undef JSON_HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) + #define JSON_HEDLEY_RESTRICT restrict +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_RESTRICT __restrict +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) + #define JSON_HEDLEY_RESTRICT _Restrict +#else + #define JSON_HEDLEY_RESTRICT +#endif + +#if defined(JSON_HEDLEY_INLINE) + #undef JSON_HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) + #define JSON_HEDLEY_INLINE inline +#elif \ + defined(JSON_HEDLEY_GCC_VERSION) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(6,2,0) + #define JSON_HEDLEY_INLINE __inline__ +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_INLINE __inline +#else + #define JSON_HEDLEY_INLINE +#endif + +#if defined(JSON_HEDLEY_ALWAYS_INLINE) + #undef JSON_HEDLEY_ALWAYS_INLINE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define JSON_HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) JSON_HEDLEY_INLINE +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define JSON_HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define JSON_HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define JSON_HEDLEY_ALWAYS_INLINE JSON_HEDLEY_INLINE +#endif + +#if defined(JSON_HEDLEY_NEVER_INLINE) + #undef JSON_HEDLEY_NEVER_INLINE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(noinline) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(10,2,0) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) + #define JSON_HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) + #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) +#else + #define JSON_HEDLEY_NEVER_INLINE +#endif + +#if defined(JSON_HEDLEY_PRIVATE) + #undef JSON_HEDLEY_PRIVATE +#endif +#if defined(JSON_HEDLEY_PUBLIC) + #undef JSON_HEDLEY_PUBLIC +#endif +#if defined(JSON_HEDLEY_IMPORT) + #undef JSON_HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define JSON_HEDLEY_PRIVATE +# define JSON_HEDLEY_PUBLIC __declspec(dllexport) +# define JSON_HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + JSON_HEDLEY_HAS_ATTRIBUTE(visibility) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define JSON_HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define JSON_HEDLEY_PRIVATE +# define JSON_HEDLEY_PUBLIC +# endif +# define JSON_HEDLEY_IMPORT extern +#endif + +#if defined(JSON_HEDLEY_NO_THROW) + #undef JSON_HEDLEY_NO_THROW +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define JSON_HEDLEY_NO_THROW __declspec(nothrow) +#else + #define JSON_HEDLEY_NO_THROW +#endif + +#if defined(JSON_HEDLEY_FALL_THROUGH) + #undef JSON_HEDLEY_FALL_THROUGH +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) + #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) + #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ + #define JSON_HEDLEY_FALL_THROUGH __fallthrough +#else + #define JSON_HEDLEY_FALL_THROUGH +#endif + +#if defined(JSON_HEDLEY_RETURNS_NON_NULL) + #undef JSON_HEDLEY_RETURNS_NON_NULL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ + #define JSON_HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else + #define JSON_HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(JSON_HEDLEY_ARRAY_PARAM) + #undef JSON_HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_TINYC_VERSION) + #define JSON_HEDLEY_ARRAY_PARAM(name) (name) +#else + #define JSON_HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(JSON_HEDLEY_IS_CONSTANT) + #undef JSON_HEDLEY_IS_CONSTANT +#endif +#if defined(JSON_HEDLEY_REQUIRE_CONSTEXPR) + #undef JSON_HEDLEY_REQUIRE_CONSTEXPR +#endif +/* JSON_HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(JSON_HEDLEY_IS_CONSTEXPR_) + #undef JSON_HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + JSON_HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + JSON_HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,24) +#if defined(__INTPTR_TYPE__) + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +#else + #include + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +#endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(JSON_HEDLEY_SUNPRO_VERSION) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_IAR_VERSION)) || \ + (JSON_HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(JSON_HEDLEY_IAR_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,3,0) +#if defined(__INTPTR_TYPE__) + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +#else + #include + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +#endif +# elif \ + defined(JSON_HEDLEY_GCC_VERSION) || \ + defined(JSON_HEDLEY_INTEL_VERSION) || \ + defined(JSON_HEDLEY_TINYC_VERSION) || \ + defined(JSON_HEDLEY_TI_ARMCL_VERSION) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(JSON_HEDLEY_TI_CL2000_VERSION) || \ + defined(JSON_HEDLEY_TI_CL6X_VERSION) || \ + defined(JSON_HEDLEY_TI_CL7X_VERSION) || \ + defined(JSON_HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define JSON_HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ +((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(JSON_HEDLEY_IS_CONSTEXPR_) + #if !defined(JSON_HEDLEY_IS_CONSTANT) + #define JSON_HEDLEY_IS_CONSTANT(expr) JSON_HEDLEY_IS_CONSTEXPR_(expr) + #endif + #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (JSON_HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else + #if !defined(JSON_HEDLEY_IS_CONSTANT) + #define JSON_HEDLEY_IS_CONSTANT(expr) (0) + #endif + #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(JSON_HEDLEY_BEGIN_C_DECLS) + #undef JSON_HEDLEY_BEGIN_C_DECLS +#endif +#if defined(JSON_HEDLEY_END_C_DECLS) + #undef JSON_HEDLEY_END_C_DECLS +#endif +#if defined(JSON_HEDLEY_C_DECL) + #undef JSON_HEDLEY_C_DECL +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_BEGIN_C_DECLS extern "C" { + #define JSON_HEDLEY_END_C_DECLS } + #define JSON_HEDLEY_C_DECL extern "C" +#else + #define JSON_HEDLEY_BEGIN_C_DECLS + #define JSON_HEDLEY_END_C_DECLS + #define JSON_HEDLEY_C_DECL +#endif + +#if defined(JSON_HEDLEY_STATIC_ASSERT) + #undef JSON_HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (JSON_HEDLEY_HAS_FEATURE(c_static_assert) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + JSON_HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(JSON_HEDLEY_NULL) + #undef JSON_HEDLEY_NULL +#endif +#if defined(__cplusplus) + #if __cplusplus >= 201103L + #define JSON_HEDLEY_NULL JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) + #elif defined(NULL) + #define JSON_HEDLEY_NULL NULL + #else + #define JSON_HEDLEY_NULL JSON_HEDLEY_STATIC_CAST(void*, 0) + #endif +#elif defined(NULL) + #define JSON_HEDLEY_NULL NULL +#else + #define JSON_HEDLEY_NULL ((void*) 0) +#endif + +#if defined(JSON_HEDLEY_MESSAGE) + #undef JSON_HEDLEY_MESSAGE +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define JSON_HEDLEY_MESSAGE(msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + JSON_HEDLEY_PRAGMA(message msg) \ + JSON_HEDLEY_DIAGNOSTIC_POP +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message msg) +#elif JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(_CRI message msg) +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#else +# define JSON_HEDLEY_MESSAGE(msg) +#endif + +#if defined(JSON_HEDLEY_WARNING) + #undef JSON_HEDLEY_WARNING +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define JSON_HEDLEY_WARNING(msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + JSON_HEDLEY_PRAGMA(clang warning msg) \ + JSON_HEDLEY_DIAGNOSTIC_POP +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(GCC warning msg) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#else +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_MESSAGE(msg) +#endif + +#if defined(JSON_HEDLEY_REQUIRE) + #undef JSON_HEDLEY_REQUIRE +#endif +#if defined(JSON_HEDLEY_REQUIRE_MSG) + #undef JSON_HEDLEY_REQUIRE_MSG +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if JSON_HEDLEY_HAS_WARNING("-Wgcc-compat") +# define JSON_HEDLEY_REQUIRE(expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define JSON_HEDLEY_REQUIRE(expr) +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(JSON_HEDLEY_FLAGS) + #undef JSON_HEDLEY_FLAGS +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || JSON_HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) + #define JSON_HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else + #define JSON_HEDLEY_FLAGS +#endif + +#if defined(JSON_HEDLEY_FLAGS_CAST) + #undef JSON_HEDLEY_FLAGS_CAST +#endif +#if JSON_HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define JSON_HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + JSON_HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define JSON_HEDLEY_FLAGS_CAST(T, expr) JSON_HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(JSON_HEDLEY_EMPTY_BASES) + #undef JSON_HEDLEY_EMPTY_BASES +#endif +#if \ + (JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !JSON_HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else + #define JSON_HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) + #undef JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) + #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else + #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_CLANG_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_BUILTIN) + #undef JSON_HEDLEY_CLANG_HAS_BUILTIN +#endif +#define JSON_HEDLEY_CLANG_HAS_BUILTIN(builtin) JSON_HEDLEY_HAS_BUILTIN(builtin) + +#if defined(JSON_HEDLEY_CLANG_HAS_FEATURE) + #undef JSON_HEDLEY_CLANG_HAS_FEATURE +#endif +#define JSON_HEDLEY_CLANG_HAS_FEATURE(feature) JSON_HEDLEY_HAS_FEATURE(feature) + +#if defined(JSON_HEDLEY_CLANG_HAS_EXTENSION) + #undef JSON_HEDLEY_CLANG_HAS_EXTENSION +#endif +#define JSON_HEDLEY_CLANG_HAS_EXTENSION(extension) JSON_HEDLEY_HAS_EXTENSION(extension) + +#if defined(JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_WARNING) + #undef JSON_HEDLEY_CLANG_HAS_WARNING +#endif +#define JSON_HEDLEY_CLANG_HAS_WARNING(warning) JSON_HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < X) */ + + +// This file contains all internal macro definitions (except those affecting ABI) +// You MUST include macro_unscope.hpp at the end of json.hpp to undef all of them + +// #include + + +// exclude unsupported compilers +#if !defined(JSON_SKIP_UNSUPPORTED_COMPILER_CHECK) + #if defined(__clang__) + #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400 + #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #elif defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER)) + #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800 + #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #endif +#endif + +// C++ language standard detection +// if the user manually specified the used c++ version this is skipped +#if !defined(JSON_HAS_CPP_20) && !defined(JSON_HAS_CPP_17) && !defined(JSON_HAS_CPP_14) && !defined(JSON_HAS_CPP_11) + #if (defined(__cplusplus) && __cplusplus >= 202002L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) + #define JSON_HAS_CPP_20 + #define JSON_HAS_CPP_17 + #define JSON_HAS_CPP_14 + #elif (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 + #define JSON_HAS_CPP_17 + #define JSON_HAS_CPP_14 + #elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) + #define JSON_HAS_CPP_14 + #endif + // the cpp 11 flag is always specified because it is the minimal required version + #define JSON_HAS_CPP_11 +#endif + +#ifdef __has_include + #if __has_include() + #include + #endif +#endif + +#if !defined(JSON_HAS_FILESYSTEM) && !defined(JSON_HAS_EXPERIMENTAL_FILESYSTEM) + #ifdef JSON_HAS_CPP_17 + #if defined(__cpp_lib_filesystem) + #define JSON_HAS_FILESYSTEM 1 + #elif defined(__cpp_lib_experimental_filesystem) + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #elif !defined(__has_include) + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #elif __has_include() + #define JSON_HAS_FILESYSTEM 1 + #elif __has_include() + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #endif + + // std::filesystem does not work on MinGW GCC 8: https://sourceforge.net/p/mingw-w64/bugs/737/ + #if defined(__MINGW32__) && defined(__GNUC__) && __GNUC__ == 8 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before GCC 8: https://en.cppreference.com/w/cpp/compiler_support + #if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 8 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before Clang 7: https://en.cppreference.com/w/cpp/compiler_support + #if defined(__clang_major__) && __clang_major__ < 7 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before MSVC 19.14: https://en.cppreference.com/w/cpp/compiler_support + #if defined(_MSC_VER) && _MSC_VER < 1914 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before iOS 13 + #if defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < 130000 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before macOS Catalina + #if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 101500 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + #endif +#endif + +#ifndef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 0 +#endif + +#ifndef JSON_HAS_FILESYSTEM + #define JSON_HAS_FILESYSTEM 0 +#endif + +#ifndef JSON_HAS_THREE_WAY_COMPARISON + #if defined(__cpp_impl_three_way_comparison) && __cpp_impl_three_way_comparison >= 201907L \ + && defined(__cpp_lib_three_way_comparison) && __cpp_lib_three_way_comparison >= 201907L + #define JSON_HAS_THREE_WAY_COMPARISON 1 + #else + #define JSON_HAS_THREE_WAY_COMPARISON 0 + #endif +#endif + +#ifndef JSON_HAS_RANGES + // ranges header shipping in GCC 11.1.0 (released 2021-04-27) has syntax error + #if defined(__GLIBCXX__) && __GLIBCXX__ == 20210427 + #define JSON_HAS_RANGES 0 + #elif defined(__cpp_lib_ranges) + #define JSON_HAS_RANGES 1 + #else + #define JSON_HAS_RANGES 0 + #endif +#endif + +#ifndef JSON_HAS_STATIC_RTTI + #if !defined(_HAS_STATIC_RTTI) || _HAS_STATIC_RTTI != 0 + #define JSON_HAS_STATIC_RTTI 1 + #else + #define JSON_HAS_STATIC_RTTI 0 + #endif +#endif + +#ifdef JSON_HAS_CPP_17 + #define JSON_INLINE_VARIABLE inline +#else + #define JSON_INLINE_VARIABLE +#endif + +#if JSON_HEDLEY_HAS_ATTRIBUTE(no_unique_address) + #define JSON_NO_UNIQUE_ADDRESS [[no_unique_address]] +#else + #define JSON_NO_UNIQUE_ADDRESS +#endif + +// disable documentation warnings on clang +#if defined(__clang__) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wdocumentation" + #pragma clang diagnostic ignored "-Wdocumentation-unknown-command" +#endif + +// allow disabling exceptions +#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(JSON_NOEXCEPTION) + #define JSON_THROW(exception) throw exception + #define JSON_TRY try + #define JSON_CATCH(exception) catch(exception) + #define JSON_INTERNAL_CATCH(exception) catch(exception) +#else + #include + #define JSON_THROW(exception) std::abort() + #define JSON_TRY if(true) + #define JSON_CATCH(exception) if(false) + #define JSON_INTERNAL_CATCH(exception) if(false) +#endif + +// override exception macros +#if defined(JSON_THROW_USER) + #undef JSON_THROW + #define JSON_THROW JSON_THROW_USER +#endif +#if defined(JSON_TRY_USER) + #undef JSON_TRY + #define JSON_TRY JSON_TRY_USER +#endif +#if defined(JSON_CATCH_USER) + #undef JSON_CATCH + #define JSON_CATCH JSON_CATCH_USER + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_CATCH_USER +#endif +#if defined(JSON_INTERNAL_CATCH_USER) + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_INTERNAL_CATCH_USER +#endif + +// allow overriding assert +#if !defined(JSON_ASSERT) + #include // assert + #define JSON_ASSERT(x) assert(x) +#endif + +// allow to access some private functions (needed by the test suite) +#if defined(JSON_TESTS_PRIVATE) + #define JSON_PRIVATE_UNLESS_TESTED public +#else + #define JSON_PRIVATE_UNLESS_TESTED private +#endif + +/*! +@brief macro to briefly define a mapping between an enum and JSON +@def NLOHMANN_JSON_SERIALIZE_ENUM +@since version 3.4.0 +*/ +#define NLOHMANN_JSON_SERIALIZE_ENUM(ENUM_TYPE, ...) \ + template \ + inline void to_json(BasicJsonType& j, const ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [e](const std::pair& ej_pair) -> bool \ + { \ + return ej_pair.first == e; \ + }); \ + j = ((it != std::end(m)) ? it : std::begin(m))->second; \ + } \ + template \ + inline void from_json(const BasicJsonType& j, ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [&j](const std::pair& ej_pair) -> bool \ + { \ + return ej_pair.second == j; \ + }); \ + e = ((it != std::end(m)) ? it : std::begin(m))->first; \ + } + +// Ugly macros to avoid uglier copy-paste when specializing basic_json. They +// may be removed in the future once the class is split. + +#define NLOHMANN_BASIC_JSON_TPL_DECLARATION \ + template class ObjectType, \ + template class ArrayType, \ + class StringType, class BooleanType, class NumberIntegerType, \ + class NumberUnsignedType, class NumberFloatType, \ + template class AllocatorType, \ + template class JSONSerializer, \ + class BinaryType, \ + class CustomBaseClass> + +#define NLOHMANN_BASIC_JSON_TPL \ + basic_json + +// Macros to simplify conversion from/to types + +#define NLOHMANN_JSON_EXPAND( x ) x +#define NLOHMANN_JSON_GET_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, _61, _62, _63, _64, NAME,...) NAME +#define NLOHMANN_JSON_PASTE(...) NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_GET_MACRO(__VA_ARGS__, \ + NLOHMANN_JSON_PASTE64, \ + NLOHMANN_JSON_PASTE63, \ + NLOHMANN_JSON_PASTE62, \ + NLOHMANN_JSON_PASTE61, \ + NLOHMANN_JSON_PASTE60, \ + NLOHMANN_JSON_PASTE59, \ + NLOHMANN_JSON_PASTE58, \ + NLOHMANN_JSON_PASTE57, \ + NLOHMANN_JSON_PASTE56, \ + NLOHMANN_JSON_PASTE55, \ + NLOHMANN_JSON_PASTE54, \ + NLOHMANN_JSON_PASTE53, \ + NLOHMANN_JSON_PASTE52, \ + NLOHMANN_JSON_PASTE51, \ + NLOHMANN_JSON_PASTE50, \ + NLOHMANN_JSON_PASTE49, \ + NLOHMANN_JSON_PASTE48, \ + NLOHMANN_JSON_PASTE47, \ + NLOHMANN_JSON_PASTE46, \ + NLOHMANN_JSON_PASTE45, \ + NLOHMANN_JSON_PASTE44, \ + NLOHMANN_JSON_PASTE43, \ + NLOHMANN_JSON_PASTE42, \ + NLOHMANN_JSON_PASTE41, \ + NLOHMANN_JSON_PASTE40, \ + NLOHMANN_JSON_PASTE39, \ + NLOHMANN_JSON_PASTE38, \ + NLOHMANN_JSON_PASTE37, \ + NLOHMANN_JSON_PASTE36, \ + NLOHMANN_JSON_PASTE35, \ + NLOHMANN_JSON_PASTE34, \ + NLOHMANN_JSON_PASTE33, \ + NLOHMANN_JSON_PASTE32, \ + NLOHMANN_JSON_PASTE31, \ + NLOHMANN_JSON_PASTE30, \ + NLOHMANN_JSON_PASTE29, \ + NLOHMANN_JSON_PASTE28, \ + NLOHMANN_JSON_PASTE27, \ + NLOHMANN_JSON_PASTE26, \ + NLOHMANN_JSON_PASTE25, \ + NLOHMANN_JSON_PASTE24, \ + NLOHMANN_JSON_PASTE23, \ + NLOHMANN_JSON_PASTE22, \ + NLOHMANN_JSON_PASTE21, \ + NLOHMANN_JSON_PASTE20, \ + NLOHMANN_JSON_PASTE19, \ + NLOHMANN_JSON_PASTE18, \ + NLOHMANN_JSON_PASTE17, \ + NLOHMANN_JSON_PASTE16, \ + NLOHMANN_JSON_PASTE15, \ + NLOHMANN_JSON_PASTE14, \ + NLOHMANN_JSON_PASTE13, \ + NLOHMANN_JSON_PASTE12, \ + NLOHMANN_JSON_PASTE11, \ + NLOHMANN_JSON_PASTE10, \ + NLOHMANN_JSON_PASTE9, \ + NLOHMANN_JSON_PASTE8, \ + NLOHMANN_JSON_PASTE7, \ + NLOHMANN_JSON_PASTE6, \ + NLOHMANN_JSON_PASTE5, \ + NLOHMANN_JSON_PASTE4, \ + NLOHMANN_JSON_PASTE3, \ + NLOHMANN_JSON_PASTE2, \ + NLOHMANN_JSON_PASTE1)(__VA_ARGS__)) +#define NLOHMANN_JSON_PASTE2(func, v1) func(v1) +#define NLOHMANN_JSON_PASTE3(func, v1, v2) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE2(func, v2) +#define NLOHMANN_JSON_PASTE4(func, v1, v2, v3) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE3(func, v2, v3) +#define NLOHMANN_JSON_PASTE5(func, v1, v2, v3, v4) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE4(func, v2, v3, v4) +#define NLOHMANN_JSON_PASTE6(func, v1, v2, v3, v4, v5) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE5(func, v2, v3, v4, v5) +#define NLOHMANN_JSON_PASTE7(func, v1, v2, v3, v4, v5, v6) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE6(func, v2, v3, v4, v5, v6) +#define NLOHMANN_JSON_PASTE8(func, v1, v2, v3, v4, v5, v6, v7) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE7(func, v2, v3, v4, v5, v6, v7) +#define NLOHMANN_JSON_PASTE9(func, v1, v2, v3, v4, v5, v6, v7, v8) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE8(func, v2, v3, v4, v5, v6, v7, v8) +#define NLOHMANN_JSON_PASTE10(func, v1, v2, v3, v4, v5, v6, v7, v8, v9) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE9(func, v2, v3, v4, v5, v6, v7, v8, v9) +#define NLOHMANN_JSON_PASTE11(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE10(func, v2, v3, v4, v5, v6, v7, v8, v9, v10) +#define NLOHMANN_JSON_PASTE12(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE11(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) +#define NLOHMANN_JSON_PASTE13(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE12(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) +#define NLOHMANN_JSON_PASTE14(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE13(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) +#define NLOHMANN_JSON_PASTE15(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE14(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) +#define NLOHMANN_JSON_PASTE16(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE15(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) +#define NLOHMANN_JSON_PASTE17(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE16(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) +#define NLOHMANN_JSON_PASTE18(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE17(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) +#define NLOHMANN_JSON_PASTE19(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE18(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) +#define NLOHMANN_JSON_PASTE20(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE19(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) +#define NLOHMANN_JSON_PASTE21(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE20(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) +#define NLOHMANN_JSON_PASTE22(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE21(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) +#define NLOHMANN_JSON_PASTE23(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE22(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) +#define NLOHMANN_JSON_PASTE24(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE23(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) +#define NLOHMANN_JSON_PASTE25(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE24(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) +#define NLOHMANN_JSON_PASTE26(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE25(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) +#define NLOHMANN_JSON_PASTE27(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE26(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) +#define NLOHMANN_JSON_PASTE28(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE27(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) +#define NLOHMANN_JSON_PASTE29(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE28(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) +#define NLOHMANN_JSON_PASTE30(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE29(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) +#define NLOHMANN_JSON_PASTE31(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE30(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) +#define NLOHMANN_JSON_PASTE32(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE31(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) +#define NLOHMANN_JSON_PASTE33(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE32(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) +#define NLOHMANN_JSON_PASTE34(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE33(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) +#define NLOHMANN_JSON_PASTE35(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE34(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) +#define NLOHMANN_JSON_PASTE36(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE35(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) +#define NLOHMANN_JSON_PASTE37(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE36(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) +#define NLOHMANN_JSON_PASTE38(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE37(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) +#define NLOHMANN_JSON_PASTE39(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE38(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) +#define NLOHMANN_JSON_PASTE40(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE39(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) +#define NLOHMANN_JSON_PASTE41(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE40(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) +#define NLOHMANN_JSON_PASTE42(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE41(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) +#define NLOHMANN_JSON_PASTE43(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE42(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) +#define NLOHMANN_JSON_PASTE44(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE43(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) +#define NLOHMANN_JSON_PASTE45(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE44(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) +#define NLOHMANN_JSON_PASTE46(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE45(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) +#define NLOHMANN_JSON_PASTE47(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE46(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) +#define NLOHMANN_JSON_PASTE48(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE47(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) +#define NLOHMANN_JSON_PASTE49(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE48(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) +#define NLOHMANN_JSON_PASTE50(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE49(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) +#define NLOHMANN_JSON_PASTE51(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE50(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) +#define NLOHMANN_JSON_PASTE52(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE51(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) +#define NLOHMANN_JSON_PASTE53(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE52(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) +#define NLOHMANN_JSON_PASTE54(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE53(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) +#define NLOHMANN_JSON_PASTE55(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE54(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) +#define NLOHMANN_JSON_PASTE56(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE55(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) +#define NLOHMANN_JSON_PASTE57(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE56(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) +#define NLOHMANN_JSON_PASTE58(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE57(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) +#define NLOHMANN_JSON_PASTE59(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE58(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) +#define NLOHMANN_JSON_PASTE60(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE59(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) +#define NLOHMANN_JSON_PASTE61(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE60(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) +#define NLOHMANN_JSON_PASTE62(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE61(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) +#define NLOHMANN_JSON_PASTE63(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE62(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) +#define NLOHMANN_JSON_PASTE64(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE63(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) + +#define NLOHMANN_JSON_TO(v1) nlohmann_json_j[#v1] = nlohmann_json_t.v1; +#define NLOHMANN_JSON_FROM(v1) nlohmann_json_j.at(#v1).get_to(nlohmann_json_t.v1); +#define NLOHMANN_JSON_FROM_WITH_DEFAULT(v1) nlohmann_json_t.v1 = nlohmann_json_j.value(#v1, nlohmann_json_default_obj.v1); + +/*! +@brief macro +@def NLOHMANN_DEFINE_TYPE_INTRUSIVE +@since version 3.9.0 +*/ +#define NLOHMANN_DEFINE_TYPE_INTRUSIVE(Type, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Type, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_INTRUSIVE_ONLY_SERIALIZE(Type, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } + +/*! +@brief macro +@def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE +@since version 3.9.0 +*/ +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Type, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(Type, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + +// inspired from https://stackoverflow.com/a/26745591 +// allows to call any std function as if (e.g. with begin): +// using std::begin; begin(x); +// +// it allows using the detected idiom to retrieve the return type +// of such an expression +#define NLOHMANN_CAN_CALL_STD_FUNC_IMPL(std_name) \ + namespace detail { \ + using std::std_name; \ + \ + template \ + using result_of_##std_name = decltype(std_name(std::declval()...)); \ + } \ + \ + namespace detail2 { \ + struct std_name##_tag \ + { \ + }; \ + \ + template \ + std_name##_tag std_name(T&&...); \ + \ + template \ + using result_of_##std_name = decltype(std_name(std::declval()...)); \ + \ + template \ + struct would_call_std_##std_name \ + { \ + static constexpr auto const value = ::nlohmann::detail:: \ + is_detected_exact::value; \ + }; \ + } /* namespace detail2 */ \ + \ + template \ + struct would_call_std_##std_name : detail2::would_call_std_##std_name \ + { \ + } + +#ifndef JSON_USE_IMPLICIT_CONVERSIONS + #define JSON_USE_IMPLICIT_CONVERSIONS 1 +#endif + +#if JSON_USE_IMPLICIT_CONVERSIONS + #define JSON_EXPLICIT +#else + #define JSON_EXPLICIT explicit +#endif + +#ifndef JSON_DISABLE_ENUM_SERIALIZATION + #define JSON_DISABLE_ENUM_SERIALIZATION 0 +#endif + +#ifndef JSON_USE_GLOBAL_UDLS + #define JSON_USE_GLOBAL_UDLS 1 +#endif + +#if JSON_HAS_THREE_WAY_COMPARISON + #include // partial_ordering +#endif + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/////////////////////////// +// JSON type enumeration // +/////////////////////////// + +/*! +@brief the JSON type enumeration + +This enumeration collects the different JSON types. It is internally used to +distinguish the stored values, and the functions @ref basic_json::is_null(), +@ref basic_json::is_object(), @ref basic_json::is_array(), +@ref basic_json::is_string(), @ref basic_json::is_boolean(), +@ref basic_json::is_number() (with @ref basic_json::is_number_integer(), +@ref basic_json::is_number_unsigned(), and @ref basic_json::is_number_float()), +@ref basic_json::is_discarded(), @ref basic_json::is_primitive(), and +@ref basic_json::is_structured() rely on it. + +@note There are three enumeration entries (number_integer, number_unsigned, and +number_float), because the library distinguishes these three types for numbers: +@ref basic_json::number_unsigned_t is used for unsigned integers, +@ref basic_json::number_integer_t is used for signed integers, and +@ref basic_json::number_float_t is used for floating-point numbers or to +approximate integers which do not fit in the limits of their respective type. + +@sa see @ref basic_json::basic_json(const value_t value_type) -- create a JSON +value with the default value for a given type + +@since version 1.0.0 +*/ +enum class value_t : std::uint8_t +{ + null, ///< null value + object, ///< object (unordered set of name/value pairs) + array, ///< array (ordered collection of values) + string, ///< string value + boolean, ///< boolean value + number_integer, ///< number value (signed integer) + number_unsigned, ///< number value (unsigned integer) + number_float, ///< number value (floating-point) + binary, ///< binary array (ordered collection of bytes) + discarded ///< discarded by the parser callback function +}; + +/*! +@brief comparison operator for JSON types + +Returns an ordering that is similar to Python: +- order: null < boolean < number < object < array < string < binary +- furthermore, each type is not smaller than itself +- discarded values are not comparable +- binary is represented as a b"" string in python and directly comparable to a + string; however, making a binary array directly comparable with a string would + be surprising behavior in a JSON file. + +@since version 1.0.0 +*/ +#if JSON_HAS_THREE_WAY_COMPARISON + inline std::partial_ordering operator<=>(const value_t lhs, const value_t rhs) noexcept // *NOPAD* +#else + inline bool operator<(const value_t lhs, const value_t rhs) noexcept +#endif +{ + static constexpr std::array order = {{ + 0 /* null */, 3 /* object */, 4 /* array */, 5 /* string */, + 1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */, + 6 /* binary */ + } + }; + + const auto l_index = static_cast(lhs); + const auto r_index = static_cast(rhs); +#if JSON_HAS_THREE_WAY_COMPARISON + if (l_index < order.size() && r_index < order.size()) + { + return order[l_index] <=> order[r_index]; // *NOPAD* + } + return std::partial_ordering::unordered; +#else + return l_index < order.size() && r_index < order.size() && order[l_index] < order[r_index]; +#endif +} + +// GCC selects the built-in operator< over an operator rewritten from +// a user-defined spaceship operator +// Clang, MSVC, and ICC select the rewritten candidate +// (see GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105200) +#if JSON_HAS_THREE_WAY_COMPARISON && defined(__GNUC__) +inline bool operator<(const value_t lhs, const value_t rhs) noexcept +{ + return std::is_lt(lhs <=> rhs); // *NOPAD* +} +#endif + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/*! +@brief replace all occurrences of a substring by another string + +@param[in,out] s the string to manipulate; changed so that all + occurrences of @a f are replaced with @a t +@param[in] f the substring to replace with @a t +@param[in] t the string to replace @a f + +@pre The search string @a f must not be empty. **This precondition is +enforced with an assertion.** + +@since version 2.0.0 +*/ +template +inline void replace_substring(StringType& s, const StringType& f, + const StringType& t) +{ + JSON_ASSERT(!f.empty()); + for (auto pos = s.find(f); // find first occurrence of f + pos != StringType::npos; // make sure f was found + s.replace(pos, f.size(), t), // replace with t, and + pos = s.find(f, pos + t.size())) // find next occurrence of f + {} +} + +/*! + * @brief string escaping as described in RFC 6901 (Sect. 4) + * @param[in] s string to escape + * @return escaped string + * + * Note the order of escaping "~" to "~0" and "/" to "~1" is important. + */ +template +inline StringType escape(StringType s) +{ + replace_substring(s, StringType{"~"}, StringType{"~0"}); + replace_substring(s, StringType{"/"}, StringType{"~1"}); + return s; +} + +/*! + * @brief string unescaping as described in RFC 6901 (Sect. 4) + * @param[in] s string to unescape + * @return unescaped string + * + * Note the order of escaping "~1" to "/" and "~0" to "~" is important. + */ +template +static void unescape(StringType& s) +{ + replace_substring(s, StringType{"~1"}, StringType{"/"}); + replace_substring(s, StringType{"~0"}, StringType{"~"}); +} + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // size_t + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/// struct to capture the start position of the current token +struct position_t +{ + /// the total number of characters read + std::size_t chars_read_total = 0; + /// the number of characters read in the current line + std::size_t chars_read_current_line = 0; + /// the number of lines read + std::size_t lines_read = 0; + + /// conversion to size_t to preserve SAX interface + constexpr operator size_t() const + { + return chars_read_total; + } +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2018 The Abseil Authors +// SPDX-License-Identifier: MIT + + + +#include // array +#include // size_t +#include // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type +#include // index_sequence, make_index_sequence, index_sequence_for + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template +using uncvref_t = typename std::remove_cv::type>::type; + +#ifdef JSON_HAS_CPP_14 + +// the following utilities are natively available in C++14 +using std::enable_if_t; +using std::index_sequence; +using std::make_index_sequence; +using std::index_sequence_for; + +#else + +// alias templates to reduce boilerplate +template +using enable_if_t = typename std::enable_if::type; + +// The following code is taken from https://github.com/abseil/abseil-cpp/blob/10cb35e459f5ecca5b2ff107635da0bfa41011b4/absl/utility/utility.h +// which is part of Google Abseil (https://github.com/abseil/abseil-cpp), licensed under the Apache License 2.0. + +//// START OF CODE FROM GOOGLE ABSEIL + +// integer_sequence +// +// Class template representing a compile-time integer sequence. An instantiation +// of `integer_sequence` has a sequence of integers encoded in its +// type through its template arguments (which is a common need when +// working with C++11 variadic templates). `absl::integer_sequence` is designed +// to be a drop-in replacement for C++14's `std::integer_sequence`. +// +// Example: +// +// template< class T, T... Ints > +// void user_function(integer_sequence); +// +// int main() +// { +// // user_function's `T` will be deduced to `int` and `Ints...` +// // will be deduced to `0, 1, 2, 3, 4`. +// user_function(make_integer_sequence()); +// } +template +struct integer_sequence +{ + using value_type = T; + static constexpr std::size_t size() noexcept + { + return sizeof...(Ints); + } +}; + +// index_sequence +// +// A helper template for an `integer_sequence` of `size_t`, +// `absl::index_sequence` is designed to be a drop-in replacement for C++14's +// `std::index_sequence`. +template +using index_sequence = integer_sequence; + +namespace utility_internal +{ + +template +struct Extend; + +// Note that SeqSize == sizeof...(Ints). It's passed explicitly for efficiency. +template +struct Extend, SeqSize, 0> +{ + using type = integer_sequence < T, Ints..., (Ints + SeqSize)... >; +}; + +template +struct Extend, SeqSize, 1> +{ + using type = integer_sequence < T, Ints..., (Ints + SeqSize)..., 2 * SeqSize >; +}; + +// Recursion helper for 'make_integer_sequence'. +// 'Gen::type' is an alias for 'integer_sequence'. +template +struct Gen +{ + using type = + typename Extend < typename Gen < T, N / 2 >::type, N / 2, N % 2 >::type; +}; + +template +struct Gen +{ + using type = integer_sequence; +}; + +} // namespace utility_internal + +// Compile-time sequences of integers + +// make_integer_sequence +// +// This template alias is equivalent to +// `integer_sequence`, and is designed to be a drop-in +// replacement for C++14's `std::make_integer_sequence`. +template +using make_integer_sequence = typename utility_internal::Gen::type; + +// make_index_sequence +// +// This template alias is equivalent to `index_sequence<0, 1, ..., N-1>`, +// and is designed to be a drop-in replacement for C++14's +// `std::make_index_sequence`. +template +using make_index_sequence = make_integer_sequence; + +// index_sequence_for +// +// Converts a typename pack into an index sequence of the same length, and +// is designed to be a drop-in replacement for C++14's +// `std::index_sequence_for()` +template +using index_sequence_for = make_index_sequence; + +//// END OF CODE FROM GOOGLE ABSEIL + +#endif + +// dispatch utility (taken from ranges-v3) +template struct priority_tag : priority_tag < N - 1 > {}; +template<> struct priority_tag<0> {}; + +// taken from ranges-v3 +template +struct static_const +{ + static JSON_INLINE_VARIABLE constexpr T value{}; +}; + +#ifndef JSON_HAS_CPP_17 + template + constexpr T static_const::value; +#endif + +template +inline constexpr std::array make_array(Args&& ... args) +{ + return std::array {{static_cast(std::forward(args))...}}; +} + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // numeric_limits +#include // false_type, is_constructible, is_integral, is_same, true_type +#include // declval +#include // tuple +#include // char_traits + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // random_access_iterator_tag + +// #include + +// #include + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template +struct iterator_types {}; + +template +struct iterator_types < + It, + void_t> +{ + using difference_type = typename It::difference_type; + using value_type = typename It::value_type; + using pointer = typename It::pointer; + using reference = typename It::reference; + using iterator_category = typename It::iterator_category; +}; + +// This is required as some compilers implement std::iterator_traits in a way that +// doesn't work with SFINAE. See https://github.com/nlohmann/json/issues/1341. +template +struct iterator_traits +{ +}; + +template +struct iterator_traits < T, enable_if_t < !std::is_pointer::value >> + : iterator_types +{ +}; + +template +struct iterator_traits::value>> +{ + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using difference_type = ptrdiff_t; + using pointer = T*; + using reference = T&; +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN + +NLOHMANN_CAN_CALL_STD_FUNC_IMPL(begin); + +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN + +NLOHMANN_CAN_CALL_STD_FUNC_IMPL(end); + +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + +#ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_ + #define INCLUDE_NLOHMANN_JSON_FWD_HPP_ + + #include // int64_t, uint64_t + #include // map + #include // allocator + #include // string + #include // vector + + // #include + + + /*! + @brief namespace for Niels Lohmann + @see https://github.com/nlohmann + @since version 1.0.0 + */ + NLOHMANN_JSON_NAMESPACE_BEGIN + + /*! + @brief default JSONSerializer template argument + + This serializer ignores the template arguments and uses ADL + ([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl)) + for serialization. + */ + template + struct adl_serializer; + + /// a class to store JSON values + /// @sa https://json.nlohmann.me/api/basic_json/ + template class ObjectType = + std::map, + template class ArrayType = std::vector, + class StringType = std::string, class BooleanType = bool, + class NumberIntegerType = std::int64_t, + class NumberUnsignedType = std::uint64_t, + class NumberFloatType = double, + template class AllocatorType = std::allocator, + template class JSONSerializer = + adl_serializer, + class BinaryType = std::vector, // cppcheck-suppress syntaxError + class CustomBaseClass = void> + class basic_json; + + /// @brief JSON Pointer defines a string syntax for identifying a specific value within a JSON document + /// @sa https://json.nlohmann.me/api/json_pointer/ + template + class json_pointer; + + /*! + @brief default specialization + @sa https://json.nlohmann.me/api/json/ + */ + using json = basic_json<>; + + /// @brief a minimal map-like container that preserves insertion order + /// @sa https://json.nlohmann.me/api/ordered_map/ + template + struct ordered_map; + + /// @brief specialization that maintains the insertion order of object keys + /// @sa https://json.nlohmann.me/api/ordered_json/ + using ordered_json = basic_json; + + NLOHMANN_JSON_NAMESPACE_END + +#endif // INCLUDE_NLOHMANN_JSON_FWD_HPP_ + + +NLOHMANN_JSON_NAMESPACE_BEGIN +/*! +@brief detail namespace with internal helper functions + +This namespace collects functions that should not be exposed, +implementations of some @ref basic_json methods, and meta-programming helpers. + +@since version 2.1.0 +*/ +namespace detail +{ + +///////////// +// helpers // +///////////// + +// Note to maintainers: +// +// Every trait in this file expects a non CV-qualified type. +// The only exceptions are in the 'aliases for detected' section +// (i.e. those of the form: decltype(T::member_function(std::declval()))) +// +// In this case, T has to be properly CV-qualified to constraint the function arguments +// (e.g. to_json(BasicJsonType&, const T&)) + +template struct is_basic_json : std::false_type {}; + +NLOHMANN_BASIC_JSON_TPL_DECLARATION +struct is_basic_json : std::true_type {}; + +// used by exceptions create() member functions +// true_type for pointer to possibly cv-qualified basic_json or std::nullptr_t +// false_type otherwise +template +struct is_basic_json_context : + std::integral_constant < bool, + is_basic_json::type>::type>::value + || std::is_same::value > +{}; + +////////////////////// +// json_ref helpers // +////////////////////// + +template +class json_ref; + +template +struct is_json_ref : std::false_type {}; + +template +struct is_json_ref> : std::true_type {}; + +////////////////////////// +// aliases for detected // +////////////////////////// + +template +using mapped_type_t = typename T::mapped_type; + +template +using key_type_t = typename T::key_type; + +template +using value_type_t = typename T::value_type; + +template +using difference_type_t = typename T::difference_type; + +template +using pointer_t = typename T::pointer; + +template +using reference_t = typename T::reference; + +template +using iterator_category_t = typename T::iterator_category; + +template +using to_json_function = decltype(T::to_json(std::declval()...)); + +template +using from_json_function = decltype(T::from_json(std::declval()...)); + +template +using get_template_function = decltype(std::declval().template get()); + +// trait checking if JSONSerializer::from_json(json const&, udt&) exists +template +struct has_from_json : std::false_type {}; + +// trait checking if j.get is valid +// use this trait instead of std::is_constructible or std::is_convertible, +// both rely on, or make use of implicit conversions, and thus fail when T +// has several constructors/operator= (see https://github.com/nlohmann/json/issues/958) +template +struct is_getable +{ + static constexpr bool value = is_detected::value; +}; + +template +struct has_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +// This trait checks if JSONSerializer::from_json(json const&) exists +// this overload is used for non-default-constructible user-defined-types +template +struct has_non_default_from_json : std::false_type {}; + +template +struct has_non_default_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +// This trait checks if BasicJsonType::json_serializer::to_json exists +// Do not evaluate the trait when T is a basic_json type, to avoid template instantiation infinite recursion. +template +struct has_to_json : std::false_type {}; + +template +struct has_to_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +template +using detect_key_compare = typename T::key_compare; + +template +struct has_key_compare : std::integral_constant::value> {}; + +// obtains the actual object key comparator +template +struct actual_object_comparator +{ + using object_t = typename BasicJsonType::object_t; + using object_comparator_t = typename BasicJsonType::default_object_comparator_t; + using type = typename std::conditional < has_key_compare::value, + typename object_t::key_compare, object_comparator_t>::type; +}; + +template +using actual_object_comparator_t = typename actual_object_comparator::type; + +///////////////// +// char_traits // +///////////////// + +// Primary template of char_traits calls std char_traits +template +struct char_traits : std::char_traits +{}; + +// Explicitly define char traits for unsigned char since it is not standard +template<> +struct char_traits : std::char_traits +{ + using char_type = unsigned char; + using int_type = uint64_t; + + // Redefine to_int_type function + static int_type to_int_type(char_type c) noexcept + { + return static_cast(c); + } + + static char_type to_char_type(int_type i) noexcept + { + return static_cast(i); + } + + static constexpr int_type eof() noexcept + { + return static_cast(EOF); + } +}; + +// Explicitly define char traits for signed char since it is not standard +template<> +struct char_traits : std::char_traits +{ + using char_type = signed char; + using int_type = uint64_t; + + // Redefine to_int_type function + static int_type to_int_type(char_type c) noexcept + { + return static_cast(c); + } + + static char_type to_char_type(int_type i) noexcept + { + return static_cast(i); + } + + static constexpr int_type eof() noexcept + { + return static_cast(EOF); + } +}; + +/////////////////// +// is_ functions // +/////////////////// + +// https://en.cppreference.com/w/cpp/types/conjunction +template struct conjunction : std::true_type { }; +template struct conjunction : B { }; +template +struct conjunction +: std::conditional(B::value), conjunction, B>::type {}; + +// https://en.cppreference.com/w/cpp/types/negation +template struct negation : std::integral_constant < bool, !B::value > { }; + +// Reimplementation of is_constructible and is_default_constructible, due to them being broken for +// std::pair and std::tuple until LWG 2367 fix (see https://cplusplus.github.io/LWG/lwg-defects.html#2367). +// This causes compile errors in e.g. clang 3.5 or gcc 4.9. +template +struct is_default_constructible : std::is_default_constructible {}; + +template +struct is_default_constructible> + : conjunction, is_default_constructible> {}; + +template +struct is_default_constructible> + : conjunction, is_default_constructible> {}; + +template +struct is_default_constructible> + : conjunction...> {}; + +template +struct is_default_constructible> + : conjunction...> {}; + +template +struct is_constructible : std::is_constructible {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_iterator_traits : std::false_type {}; + +template +struct is_iterator_traits> +{ + private: + using traits = iterator_traits; + + public: + static constexpr auto value = + is_detected::value && + is_detected::value && + is_detected::value && + is_detected::value && + is_detected::value; +}; + +template +struct is_range +{ + private: + using t_ref = typename std::add_lvalue_reference::type; + + using iterator = detected_t; + using sentinel = detected_t; + + // to be 100% correct, it should use https://en.cppreference.com/w/cpp/iterator/input_or_output_iterator + // and https://en.cppreference.com/w/cpp/iterator/sentinel_for + // but reimplementing these would be too much work, as a lot of other concepts are used underneath + static constexpr auto is_iterator_begin = + is_iterator_traits>::value; + + public: + static constexpr bool value = !std::is_same::value && !std::is_same::value && is_iterator_begin; +}; + +template +using iterator_t = enable_if_t::value, result_of_begin())>>; + +template +using range_value_t = value_type_t>>; + +// The following implementation of is_complete_type is taken from +// https://blogs.msdn.microsoft.com/vcblog/2015/12/02/partial-support-for-expression-sfinae-in-vs-2015-update-1/ +// and is written by Xiang Fan who agreed to using it in this library. + +template +struct is_complete_type : std::false_type {}; + +template +struct is_complete_type : std::true_type {}; + +template +struct is_compatible_object_type_impl : std::false_type {}; + +template +struct is_compatible_object_type_impl < + BasicJsonType, CompatibleObjectType, + enable_if_t < is_detected::value&& + is_detected::value >> +{ + using object_t = typename BasicJsonType::object_t; + + // macOS's is_constructible does not play well with nonesuch... + static constexpr bool value = + is_constructible::value && + is_constructible::value; +}; + +template +struct is_compatible_object_type + : is_compatible_object_type_impl {}; + +template +struct is_constructible_object_type_impl : std::false_type {}; + +template +struct is_constructible_object_type_impl < + BasicJsonType, ConstructibleObjectType, + enable_if_t < is_detected::value&& + is_detected::value >> +{ + using object_t = typename BasicJsonType::object_t; + + static constexpr bool value = + (is_default_constructible::value && + (std::is_move_assignable::value || + std::is_copy_assignable::value) && + (is_constructible::value && + std::is_same < + typename object_t::mapped_type, + typename ConstructibleObjectType::mapped_type >::value)) || + (has_from_json::value || + has_non_default_from_json < + BasicJsonType, + typename ConstructibleObjectType::mapped_type >::value); +}; + +template +struct is_constructible_object_type + : is_constructible_object_type_impl {}; + +template +struct is_compatible_string_type +{ + static constexpr auto value = + is_constructible::value; +}; + +template +struct is_constructible_string_type +{ + // launder type through decltype() to fix compilation failure on ICPC +#ifdef __INTEL_COMPILER + using laundered_type = decltype(std::declval()); +#else + using laundered_type = ConstructibleStringType; +#endif + + static constexpr auto value = + conjunction < + is_constructible, + is_detected_exact>::value; +}; + +template +struct is_compatible_array_type_impl : std::false_type {}; + +template +struct is_compatible_array_type_impl < + BasicJsonType, CompatibleArrayType, + enable_if_t < + is_detected::value&& + is_iterator_traits>>::value&& +// special case for types like std::filesystem::path whose iterator's value_type are themselves +// c.f. https://github.com/nlohmann/json/pull/3073 + !std::is_same>::value >> +{ + static constexpr bool value = + is_constructible>::value; +}; + +template +struct is_compatible_array_type + : is_compatible_array_type_impl {}; + +template +struct is_constructible_array_type_impl : std::false_type {}; + +template +struct is_constructible_array_type_impl < + BasicJsonType, ConstructibleArrayType, + enable_if_t::value >> + : std::true_type {}; + +template +struct is_constructible_array_type_impl < + BasicJsonType, ConstructibleArrayType, + enable_if_t < !std::is_same::value&& + !is_compatible_string_type::value&& + is_default_constructible::value&& +(std::is_move_assignable::value || + std::is_copy_assignable::value)&& +is_detected::value&& +is_iterator_traits>>::value&& +is_detected::value&& +// special case for types like std::filesystem::path whose iterator's value_type are themselves +// c.f. https://github.com/nlohmann/json/pull/3073 +!std::is_same>::value&& + is_complete_type < + detected_t>::value >> +{ + using value_type = range_value_t; + + static constexpr bool value = + std::is_same::value || + has_from_json::value || + has_non_default_from_json < + BasicJsonType, + value_type >::value; +}; + +template +struct is_constructible_array_type + : is_constructible_array_type_impl {}; + +template +struct is_compatible_integer_type_impl : std::false_type {}; + +template +struct is_compatible_integer_type_impl < + RealIntegerType, CompatibleNumberIntegerType, + enable_if_t < std::is_integral::value&& + std::is_integral::value&& + !std::is_same::value >> +{ + // is there an assert somewhere on overflows? + using RealLimits = std::numeric_limits; + using CompatibleLimits = std::numeric_limits; + + static constexpr auto value = + is_constructible::value && + CompatibleLimits::is_integer && + RealLimits::is_signed == CompatibleLimits::is_signed; +}; + +template +struct is_compatible_integer_type + : is_compatible_integer_type_impl {}; + +template +struct is_compatible_type_impl: std::false_type {}; + +template +struct is_compatible_type_impl < + BasicJsonType, CompatibleType, + enable_if_t::value >> +{ + static constexpr bool value = + has_to_json::value; +}; + +template +struct is_compatible_type + : is_compatible_type_impl {}; + +template +struct is_constructible_tuple : std::false_type {}; + +template +struct is_constructible_tuple> : conjunction...> {}; + +template +struct is_json_iterator_of : std::false_type {}; + +template +struct is_json_iterator_of : std::true_type {}; + +template +struct is_json_iterator_of : std::true_type +{}; + +// checks if a given type T is a template specialization of Primary +template