Skriller0208 commited on
Commit
613af8d
·
verified ·
1 Parent(s): 4c0b97f

Upload 814 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .devops/cublas.Dockerfile +28 -0
  2. .devops/main-cuda.Dockerfile +40 -0
  3. .devops/main.Dockerfile +19 -0
  4. .github/workflows/bindings-go.yml +22 -0
  5. .github/workflows/bindings-ruby.yml.disabled +23 -0
  6. .github/workflows/build.yml +669 -0
  7. .github/workflows/docker.yml +57 -0
  8. .github/workflows/examples.yml +48 -0
  9. .gitignore +54 -0
  10. .gitmodules +0 -0
  11. AUTHORS +301 -0
  12. CMakeLists.txt +185 -0
  13. LICENSE +21 -0
  14. Makefile +2 -1
  15. Package.swift +60 -0
  16. README.md +829 -10
  17. README_sycl.md +249 -0
  18. bindings/CMakeLists.txt +19 -0
  19. bindings/go/.gitignore +2 -0
  20. bindings/go/LICENSE +21 -0
  21. bindings/go/Makefile +64 -0
  22. bindings/go/README.md +100 -0
  23. bindings/go/doc.go +5 -0
  24. bindings/go/examples/go-model-download/context.go +30 -0
  25. bindings/go/examples/go-model-download/main.go +208 -0
  26. bindings/go/examples/go-whisper/color.go +22 -0
  27. bindings/go/examples/go-whisper/flags.go +147 -0
  28. bindings/go/examples/go-whisper/main.go +43 -0
  29. bindings/go/examples/go-whisper/process.go +132 -0
  30. bindings/go/go.mod +16 -0
  31. bindings/go/go.sum +23 -0
  32. bindings/go/params.go +192 -0
  33. bindings/go/pkg/whisper/consts.go +28 -0
  34. bindings/go/pkg/whisper/context.go +331 -0
  35. bindings/go/pkg/whisper/context_test.go +55 -0
  36. bindings/go/pkg/whisper/doc.go +4 -0
  37. bindings/go/pkg/whisper/interface.go +102 -0
  38. bindings/go/pkg/whisper/model.go +101 -0
  39. bindings/go/samples/jfk.wav +0 -0
  40. bindings/go/whisper.go +468 -0
  41. bindings/go/whisper_test.go +113 -0
  42. bindings/java/.idea/uiDesigner.xml +124 -0
  43. bindings/java/README.md +71 -0
  44. bindings/java/build.gradle +133 -0
  45. bindings/java/gradle.properties +6 -0
  46. bindings/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  47. bindings/java/gradle/wrapper/gradle-wrapper.properties +6 -0
  48. bindings/java/gradlew +244 -0
  49. bindings/java/gradlew.bat +92 -0
  50. bindings/java/settings.gradle +1 -0
.devops/cublas.Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ # This needs to generally match the container host's environment.
4
+ ARG CUDA_VERSION=11.7.1
5
+
6
+ # Target the CUDA build image
7
+ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_CUDA_DEV_CONTAINER} as build
10
+
11
+ # Unless otherwise specified, we make a fat build.
12
+ ARG CUDA_DOCKER_ARCH=all
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y build-essential git cmake
16
+
17
+ WORKDIR /app
18
+
19
+ COPY . .
20
+
21
+ # Set nvcc architecture
22
+ ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
23
+ # Enable cuBLAS
24
+ ENV GGML_CUDA=1
25
+
26
+ RUN make
27
+
28
+ ENTRYPOINT ["/app/main"]
.devops/main-cuda.Dockerfile ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG CUDA_VERSION=12.3.1
4
+ # Target the CUDA build image
5
+ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+ # Target the CUDA runtime image
7
+ ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
10
+ WORKDIR /app
11
+
12
+ # Unless otherwise specified, we make a fat build.
13
+ ARG CUDA_DOCKER_ARCH=all
14
+ # Set nvcc architecture
15
+ ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
16
+ # Enable cuBLAS
17
+ ENV GGML_CUDA=1
18
+
19
+ RUN apt-get update && \
20
+ apt-get install -y build-essential \
21
+ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
22
+
23
+ # Ref: https://stackoverflow.com/a/53464012
24
+ ENV CUDA_MAIN_VERSION=12.3
25
+ ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
26
+
27
+ COPY .. .
28
+ RUN make
29
+
30
+ FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
31
+ ENV CUDA_MAIN_VERSION=12.3
32
+ ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
33
+ WORKDIR /app
34
+
35
+ RUN apt-get update && \
36
+ apt-get install -y curl ffmpeg \
37
+ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
38
+
39
+ COPY --from=build /app /app
40
+ ENTRYPOINT [ "bash", "-c" ]
.devops/main.Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ubuntu:22.04 AS build
2
+ WORKDIR /app
3
+
4
+ RUN apt-get update && \
5
+ apt-get install -y build-essential \
6
+ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
7
+
8
+ COPY .. .
9
+ RUN make
10
+
11
+ FROM ubuntu:22.04 AS runtime
12
+ WORKDIR /app
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y curl ffmpeg \
16
+ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
17
+
18
+ COPY --from=build /app /app
19
+ ENTRYPOINT [ "bash", "-c" ]
.github/workflows/bindings-go.yml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bindings Tests (Go)
2
+ on:
3
+ push:
4
+ paths:
5
+ - bindings/go/**
6
+ - whisper.h
7
+ pull_request:
8
+ paths:
9
+ - bindings/go/**
10
+ - whisper.h
11
+
12
+ jobs:
13
+ ubuntu-latest:
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - uses: actions/setup-go@v3
17
+ with:
18
+ go-version: '^1.19'
19
+ - uses: actions/checkout@v1
20
+ - run: |
21
+ cd bindings/go
22
+ make test
.github/workflows/bindings-ruby.yml.disabled ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # TODO: fix this workflow file, disabled for now
2
+ name: Bindings Tests (Ruby)
3
+ on:
4
+ push:
5
+ paths:
6
+ - bindings/ruby/**
7
+ - whisper.h
8
+ pull_request:
9
+ paths:
10
+ - bindings/ruby/**
11
+ - whisper.h
12
+
13
+ jobs:
14
+ ubuntu-latest:
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: ruby/setup-ruby@v1
18
+ with:
19
+ ruby-version: '3.0'
20
+ - uses: actions/checkout@v1
21
+ - run: |
22
+ cd bindings/ruby/ext
23
+ ruby extconf.rb && make
.github/workflows/build.yml ADDED
@@ -0,0 +1,669 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI
2
+ on: [push, pull_request]
3
+
4
+ env:
5
+ ubuntu_image: "ubuntu:22.04"
6
+
7
+ jobs:
8
+ ubuntu-latest:
9
+ runs-on: ubuntu-latest
10
+
11
+ strategy:
12
+ fail-fast: false
13
+ matrix:
14
+ arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
15
+
16
+ steps:
17
+ - name: Clone
18
+ uses: actions/checkout@v4
19
+
20
+ - name: Set up QEMU
21
+ uses: docker/setup-qemu-action@v3
22
+
23
+ - name: Build ${{ matrix.arch }}
24
+ run: |
25
+ docker run --platform ${{ matrix.arch }} --rm \
26
+ -v ${{ github.workspace }}:/workspace \
27
+ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
28
+ set -e
29
+ apt update
30
+ apt install -y build-essential libsdl2-dev
31
+ make
32
+ make stream'
33
+
34
+ macOS-latest:
35
+ runs-on: macOS-latest
36
+
37
+ steps:
38
+ - name: Clone
39
+ uses: actions/checkout@v4
40
+
41
+ - name: Dependencies
42
+ run: |
43
+ brew update
44
+ brew install sdl2
45
+
46
+ - name: Build
47
+ run: |
48
+ make
49
+ make stream
50
+
51
+ freeBSD-latest:
52
+ runs-on: macos-12
53
+
54
+ steps:
55
+ - name: Clone
56
+ uses: actions/checkout@v4
57
+
58
+ - name: Build
59
+ uses: cross-platform-actions/action@v0.24.0
60
+ with:
61
+ operating_system: freebsd
62
+ version: '13.3'
63
+ run: |
64
+ sudo pkg update
65
+ sudo pkg install -y gmake sdl2
66
+ gmake
67
+ gmake stream
68
+
69
+ ubuntu-latest-gcc:
70
+ runs-on: ubuntu-latest
71
+
72
+ strategy:
73
+ fail-fast: false
74
+ matrix:
75
+ build: [Debug, Release]
76
+ arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
77
+
78
+ steps:
79
+ - name: Clone
80
+ uses: actions/checkout@v4
81
+
82
+ - name: Set up QEMU
83
+ uses: docker/setup-qemu-action@v3
84
+
85
+ - name: Build ${{ matrix.arch }}
86
+ run: |
87
+ docker run --platform ${{ matrix.arch }} --rm \
88
+ -v ${{ github.workspace }}:/workspace \
89
+ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
90
+ set -e
91
+ apt update
92
+ apt install -y build-essential cmake libsdl2-dev
93
+ cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
94
+ make
95
+ ctest -L gh --output-on-failure'
96
+
97
+ ubuntu-latest-clang:
98
+ runs-on: ubuntu-latest
99
+
100
+ strategy:
101
+ fail-fast: false
102
+ matrix:
103
+ build: [Debug, Release]
104
+ #arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
105
+ # TODO: arm/v7 disabled due to clang bug
106
+ # https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990
107
+ arch: [linux/amd64, linux/arm64, linux/ppc64le]
108
+
109
+ steps:
110
+ - name: Clone
111
+ uses: actions/checkout@v4
112
+
113
+ - name: Set up QEMU
114
+ uses: docker/setup-qemu-action@v3
115
+
116
+ - name: Build ${{ matrix.arch }}
117
+ run: |
118
+ docker run --platform ${{ matrix.arch }} --rm \
119
+ -v ${{ github.workspace }}:/workspace \
120
+ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
121
+ set -e
122
+ apt update
123
+ apt install -y clang build-essential cmake libsdl2-dev
124
+ cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
125
+ make
126
+ ctest -L gh --output-on-failure'
127
+
128
+ ubuntu-latest-gcc-sanitized:
129
+ runs-on: ubuntu-latest
130
+
131
+ strategy:
132
+ fail-fast: false
133
+ matrix:
134
+ sanitizer: [ADDRESS, THREAD, UNDEFINED]
135
+ arch: [linux/amd64]
136
+
137
+ steps:
138
+ - name: Clone
139
+ uses: actions/checkout@v4
140
+
141
+ - name: Set up QEMU
142
+ uses: docker/setup-qemu-action@v3
143
+
144
+ - name: Build ${{ matrix.arch }}
145
+ run: |
146
+ docker run --platform ${{ matrix.arch }} --rm \
147
+ -v ${{ github.workspace }}:/workspace \
148
+ -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
149
+ set -e
150
+ apt update
151
+ apt install -y build-essential cmake
152
+ cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
153
+ make
154
+ ctest -L gh --output-on-failure'
155
+
156
+ ubuntu-22-cmake-sycl:
157
+ runs-on: ubuntu-22.04
158
+
159
+ strategy:
160
+ fail-fast: false
161
+ matrix:
162
+ dwhisper_sycl: [ON]
163
+ dcmake_c_compiler: [icx]
164
+ dcmake_cxx_compiler: [icpx]
165
+ arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
166
+
167
+ continue-on-error: true
168
+
169
+ steps:
170
+ - name: Clone
171
+ uses: actions/checkout@v4
172
+
173
+ - name: add oneAPI to apt
174
+ shell: bash
175
+ run: |
176
+ cd /tmp
177
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
178
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
179
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
180
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
181
+
182
+ - name: install oneAPI dpcpp compiler
183
+ shell: bash
184
+ run: |
185
+ sudo apt update
186
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
187
+
188
+ - name: install oneAPI MKL library
189
+ shell: bash
190
+ run: |
191
+ sudo apt install intel-oneapi-mkl-devel
192
+
193
+ - name: Clone
194
+ id: checkout
195
+ uses: actions/checkout@v4
196
+
197
+ - name: Build
198
+ id: cmake_build
199
+ run: |
200
+ source /opt/intel/oneapi/setvars.sh
201
+ mkdir build
202
+ cd build
203
+ cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
204
+ cmake --build . --config Release -j $(nproc)
205
+
206
+ ubuntu-22-cmake-sycl-fp16:
207
+ runs-on: ubuntu-22.04
208
+
209
+ strategy:
210
+ fail-fast: false
211
+ matrix:
212
+ dwhisper_sycl: [ON]
213
+ dcmake_c_compiler: [icx]
214
+ dcmake_cxx_compiler: [icpx]
215
+ arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
216
+
217
+ continue-on-error: true
218
+
219
+ steps:
220
+ - name: Clone
221
+ uses: actions/checkout@v4
222
+
223
+ - name: add oneAPI to apt
224
+ shell: bash
225
+ run: |
226
+ cd /tmp
227
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
228
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
229
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
230
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
231
+
232
+ - name: install oneAPI dpcpp compiler
233
+ shell: bash
234
+ run: |
235
+ sudo apt update
236
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
237
+
238
+ - name: install oneAPI MKL library
239
+ shell: bash
240
+ run: |
241
+ sudo apt install intel-oneapi-mkl-devel
242
+
243
+ - name: Clone
244
+ id: checkout
245
+ uses: actions/checkout@v4
246
+
247
+ - name: Build
248
+ id: cmake_build
249
+ run: |
250
+ source /opt/intel/oneapi/setvars.sh
251
+ mkdir build
252
+ cd build
253
+ cmake -DGGML_SYCL_F16=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
254
+ cmake --build . --config Release -j $(nproc)
255
+
256
+ windows-msys2:
257
+ runs-on: windows-latest
258
+
259
+ strategy:
260
+ fail-fast: false
261
+ matrix:
262
+ include:
263
+ - { sys: UCRT64, env: ucrt-x86_64, build: Release }
264
+ - { sys: CLANG64, env: clang-x86_64, build: Release }
265
+
266
+ steps:
267
+ - name: Clone
268
+ uses: actions/checkout@v4
269
+
270
+ - name: Setup ${{ matrix.sys }}
271
+ uses: msys2/setup-msys2@v2
272
+ with:
273
+ update: true
274
+ msystem: ${{matrix.sys}}
275
+ install: >-
276
+ base-devel
277
+ mingw-w64-${{matrix.env}}-toolchain
278
+ mingw-w64-${{matrix.env}}-cmake
279
+ mingw-w64-${{matrix.env}}-SDL2
280
+ mingw-w64-${{matrix.env}}-openblas
281
+
282
+ - name: Build using make
283
+ shell: msys2 {0}
284
+ run: |
285
+ make -j $(nproc)
286
+
287
+ - name: Clean after building using make
288
+ shell: msys2 {0}
289
+ run: |
290
+ make clean
291
+
292
+ - name: Build using make w/ OpenBLAS
293
+ shell: msys2 {0}
294
+ run: |
295
+ make GGML_OPENBLAS=1 -j $(nproc)
296
+
297
+ - name: Build using CMake
298
+ shell: msys2 {0}
299
+ run: |
300
+ cmake -B build
301
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
302
+
303
+ - name: Clean after building using CMake
304
+ shell: msys2 {0}
305
+ run: |
306
+ rm -rf build
307
+
308
+ - name: Build using CMake w/ OpenBLAS
309
+ shell: msys2 {0}
310
+ run: |
311
+ cmake -B build -DGGML_OPENBLAS=ON
312
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
313
+
314
+ windows:
315
+ runs-on: windows-latest
316
+
317
+ strategy:
318
+ matrix:
319
+ build: [Release]
320
+ arch: [Win32, x64]
321
+ sdl2: [ON]
322
+ include:
323
+ - arch: Win32
324
+ s2arc: x86
325
+ jnaPath: win32-x86
326
+ - arch: x64
327
+ s2arc: x64
328
+ jnaPath: win32-x86-64
329
+ - sdl2: ON
330
+ s2ver: 2.28.5
331
+
332
+ steps:
333
+ - name: Clone
334
+ uses: actions/checkout@v4
335
+
336
+ - name: Add msbuild to PATH
337
+ uses: microsoft/setup-msbuild@v2
338
+
339
+ - name: Fetch SDL2 and set SDL2_DIR
340
+ if: matrix.sdl2 == 'ON'
341
+ run: |
342
+ C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
343
+ 7z x sdl2.zip
344
+ echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
345
+
346
+ - name: Configure
347
+ run: >
348
+ cmake -S . -B ./build -A ${{ matrix.arch }}
349
+ -DCMAKE_BUILD_TYPE=${{ matrix.build }}
350
+ -DWHISPER_SDL2=${{ matrix.sdl2 }}
351
+
352
+ - name: Build
353
+ run: |
354
+ cd ./build
355
+ msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
356
+
357
+ - name: Copy SDL2.dll
358
+ if: matrix.sdl2 == 'ON'
359
+ run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
360
+
361
+ - name: Upload dll
362
+ uses: actions/upload-artifact@v4
363
+ with:
364
+ name: ${{ matrix.jnaPath }}_whisper.dll
365
+ path: build/bin/${{ matrix.build }}/whisper.dll
366
+
367
+ - name: Upload binaries
368
+ if: matrix.sdl2 == 'ON'
369
+ uses: actions/upload-artifact@v4
370
+ with:
371
+ name: whisper-bin-${{ matrix.arch }}
372
+ path: build/bin/${{ matrix.build }}
373
+
374
+ windows-blas:
375
+ runs-on: windows-latest
376
+
377
+ strategy:
378
+ matrix:
379
+ build: [Release]
380
+ arch: [Win32, x64]
381
+ blas: [ON]
382
+ sdl2: [ON]
383
+ include:
384
+ - arch: Win32
385
+ obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x86.zip
386
+ s2arc: x86
387
+ - arch: x64
388
+ obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x64.zip
389
+ s2arc: x64
390
+ - sdl2: ON
391
+ s2ver: 2.28.5
392
+
393
+ steps:
394
+ - name: Clone
395
+ uses: actions/checkout@v4
396
+
397
+ - name: Add msbuild to PATH
398
+ uses: microsoft/setup-msbuild@v2
399
+
400
+ - name: Fetch OpenBLAS
401
+ if: matrix.blas == 'ON'
402
+ run: |
403
+ C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
404
+ 7z x blas.zip -oblas -y
405
+ copy blas/include/cblas.h .
406
+ copy blas/include/openblas_config.h .
407
+ echo "OPENBLAS_PATH=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
408
+
409
+ - name: Fetch SDL2 and set SDL2_DIR
410
+ if: matrix.sdl2 == 'ON'
411
+ run: |
412
+ C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
413
+ 7z x sdl2.zip
414
+ echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
415
+
416
+ - name: Configure
417
+ run: >
418
+ cmake -S . -B ./build -A ${{ matrix.arch }}
419
+ -DCMAKE_BUILD_TYPE=${{ matrix.build }}
420
+ -DGGML_OPENBLAS=${{ matrix.blas }}
421
+ -DCMAKE_LIBRARY_PATH="$env:OPENBLAS_PATH/lib"
422
+ -DWHISPER_SDL2=${{ matrix.sdl2 }}
423
+
424
+ - name: Build
425
+ run: |
426
+ cd ./build
427
+ msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
428
+
429
+ - name: Copy libopenblas.dll
430
+ if: matrix.blas == 'ON'
431
+ run: copy "$env:OPENBLAS_PATH/bin/libopenblas.dll" build/bin/${{ matrix.build }}
432
+
433
+ - name: Copy SDL2.dll
434
+ if: matrix.sdl2 == 'ON'
435
+ run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
436
+
437
+ - name: Upload binaries
438
+ if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
439
+ uses: actions/upload-artifact@v4
440
+ with:
441
+ name: whisper-blas-bin-${{ matrix.arch }}
442
+ path: build/bin/${{ matrix.build }}
443
+
444
+ windows-cublas:
445
+ runs-on: windows-2019
446
+
447
+ strategy:
448
+ matrix:
449
+ build: [Release]
450
+ arch: [x64]
451
+ cublas: [ON]
452
+ sdl2: [ON]
453
+ cuda-toolkit: [12.2.0, 11.8.0]
454
+ include:
455
+ - arch: x64
456
+ s2arc: x64
457
+ - sdl2: ON
458
+ s2ver: 2.28.5
459
+
460
+ steps:
461
+ - name: Clone
462
+ uses: actions/checkout@v4
463
+
464
+ - name: Add msbuild to PATH
465
+ uses: microsoft/setup-msbuild@v2
466
+
467
+ - name: Install CUDA Toolkit
468
+ id: cuda-toolkit
469
+ uses: Jimver/cuda-toolkit@v0.2.15
470
+ with:
471
+ cuda: '${{ matrix.cuda-toolkit }}'
472
+
473
+ - name: Fetch SDL2 and set SDL2_DIR
474
+ if: matrix.sdl2 == 'ON'
475
+ run: |
476
+ C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
477
+ 7z x sdl2.zip
478
+ echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
479
+
480
+ - name: Configure
481
+ run: >
482
+ cmake -S . -B ./build -A ${{ matrix.arch }}
483
+ -DCMAKE_BUILD_TYPE=${{ matrix.build }}
484
+ -DGGML_CUDA=${{ matrix.cublas }}
485
+ -DWHISPER_SDL2=${{ matrix.sdl2 }}
486
+
487
+ - name: Build ${{ matrix.cuda-toolkit }}
488
+ run: |
489
+ cd ./build
490
+ cmake --build . --config ${{ matrix.build }}
491
+
492
+ - name: Copy CUDA DLLs
493
+ run: >
494
+ Copy-Item -PassThru
495
+ -Path "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}/bin/*.dll"
496
+ -Include cudart64_*,cublas64_*,cublasLt64_*
497
+ -Destination build/bin/${{ matrix.build }}
498
+
499
+ - name: Copy SDL2.dll
500
+ if: matrix.sdl2 == 'ON'
501
+ run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
502
+
503
+ - name: Upload binaries
504
+ if: matrix.sdl2 == 'ON'
505
+ uses: actions/upload-artifact@v4
506
+ with:
507
+ name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}
508
+ path: build/bin/${{ matrix.build }}
509
+
510
+ emscripten:
511
+ runs-on: ubuntu-latest
512
+
513
+ strategy:
514
+ matrix:
515
+ build: [Release]
516
+
517
+ steps:
518
+ - name: Clone
519
+ uses: actions/checkout@v4
520
+
521
+ - name: Setup emsdk
522
+ uses: mymindstorm/setup-emsdk@v14
523
+
524
+ - name: Verify
525
+ run: emcc -v
526
+
527
+ - name: Build
528
+ run: |
529
+ emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
530
+ make
531
+
532
+ ios:
533
+ runs-on: macos-latest
534
+
535
+ strategy:
536
+ matrix:
537
+ build: [Release]
538
+
539
+ steps:
540
+ - name: Clone
541
+ uses: actions/checkout@v4
542
+
543
+ - name: Configure
544
+ run: |
545
+ cp models/for-tests-ggml-base.en.bin models/ggml-base.en.bin
546
+ mkdir models/ggml-base.en-encoder.mlmodelc
547
+
548
+ - name: Build objc example
549
+ run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphonesimulator build
550
+
551
+ - name: Build swiftui example
552
+ run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphonesimulator build
553
+
554
+ android:
555
+ runs-on: ubuntu-latest
556
+
557
+ steps:
558
+ - name: Clone
559
+ uses: actions/checkout@v4
560
+ with:
561
+ path: whisper
562
+
563
+ - name: Clone
564
+ uses: actions/checkout@v4
565
+ with:
566
+ repository: ggerganov/ggml
567
+ path: ggml
568
+
569
+ - name: Install Java
570
+ uses: actions/setup-java@v4
571
+ with:
572
+ distribution: zulu
573
+ java-version: 21
574
+
575
+ - name: Setup Android SDK
576
+ uses: android-actions/setup-android@v3
577
+
578
+ - name: Build
579
+ run: |
580
+ cd whisper/examples/whisper.android
581
+ ./gradlew assembleRelease --no-daemon
582
+
583
+ - name: Build with external ggml
584
+ run: |
585
+ export PATH_TO_GGML=$PWD/ggml
586
+ cd whisper/examples/whisper.android
587
+ ./gradlew assembleRelease --no-daemon -PGGML_HOME=$PATH_TO_GGML
588
+
589
+ android_java:
590
+ runs-on: ubuntu-latest
591
+
592
+ steps:
593
+ - name: Clone
594
+ uses: actions/checkout@v4
595
+
596
+ - name: set up JDK 11
597
+ uses: actions/setup-java@v4
598
+ with:
599
+ java-version: '11'
600
+ distribution: 'temurin'
601
+ cache: gradle
602
+
603
+ - name: Setup Android SDK
604
+ uses: android-actions/setup-android@v3
605
+ with:
606
+ cmdline-tools-version: 9.0
607
+
608
+ - name: Build
609
+ run: |
610
+ cd examples/whisper.android.java
611
+ chmod +x ./gradlew
612
+ ./gradlew assembleRelease
613
+
614
+ # TODO: disabled because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/9686220096/job/26735899598
615
+ # java:
616
+ # needs: [ 'windows' ]
617
+ # runs-on: windows-latest
618
+ # steps:
619
+ # - uses: actions/checkout@v4
620
+ #
621
+ # - name: Install Java
622
+ # uses: actions/setup-java@v4
623
+ # with:
624
+ # distribution: zulu
625
+ # java-version: 20
626
+ #
627
+ # - name: Download Windows lib
628
+ # uses: actions/download-artifact@v4
629
+ # with:
630
+ # name: win32-x86-64_whisper.dll
631
+ # path: bindings/java/build/generated/resources/main/win32-x86-64
632
+ #
633
+ # - name: Build
634
+ # run: |
635
+ # models\download-ggml-model.cmd tiny.en
636
+ # cd bindings/java
637
+ # chmod +x ./gradlew
638
+ # ./gradlew build
639
+ #
640
+ # - name: Upload jar
641
+ # uses: actions/upload-artifact@v4
642
+ # with:
643
+ # name: whispercpp.jar
644
+ # path: bindings/java/build/libs/whispercpp-*.jar
645
+ #
646
+ # - name: Publish package
647
+ # if: ${{ github.ref == 'refs/heads/master' }}
648
+ # uses: gradle/gradle-build-action@v2.4.2
649
+ # with:
650
+ # arguments: publish
651
+ # build-root-directory: bindings/java
652
+ # env:
653
+ # MAVEN_USERNAME: ${{ secrets.JIRA_USER }}
654
+ # MAVEN_PASSWORD: ${{ secrets.JIRA_PASS }}
655
+ # PGP_SECRET: ${{ secrets.GPG_PRIVATE_KEY }}
656
+ # PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
657
+
658
+ quantize:
659
+ runs-on: ubuntu-latest
660
+
661
+ steps:
662
+ - name: Clone
663
+ uses: actions/checkout@v4
664
+
665
+ - name: Test quantize
666
+ run: |
667
+ ./models/download-ggml-model.sh tiny.en
668
+ make quantize
669
+ ./quantize models/ggml-tiny.en.bin models/ggml-tiny.en-q4_0.bin q4_0
.github/workflows/docker.yml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Publish Docker image
2
+
3
+ on:
4
+ pull_request:
5
+ push:
6
+ branches:
7
+ - master
8
+
9
+ jobs:
10
+ push_to_registry:
11
+ name: Push Docker image to Docker Hub
12
+ if: github.event.pull_request.draft == false
13
+
14
+ runs-on: ubuntu-latest
15
+ env:
16
+ COMMIT_SHA: ${{ github.sha }}
17
+ strategy:
18
+ matrix:
19
+ config:
20
+ - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64,linux/arm64" }
21
+ - { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
22
+
23
+ steps:
24
+ - name: Check out the repo
25
+ uses: actions/checkout@v3
26
+
27
+ - name: Set up QEMU
28
+ uses: docker/setup-qemu-action@v3
29
+
30
+ - name: Set up Docker Buildx
31
+ uses: docker/setup-buildx-action@v3
32
+
33
+ - name: Log in to Docker Hub
34
+ uses: docker/login-action@v3
35
+ with:
36
+ registry: ghcr.io
37
+ username: ${{ github.repository_owner }}
38
+ password: ${{ secrets.GITHUB_TOKEN }}
39
+
40
+ - name: Build and push Docker image (versioned)
41
+ if: github.event_name == 'push'
42
+ uses: docker/build-push-action@v5
43
+ with:
44
+ context: .
45
+ push: true
46
+ platforms: ${{ matrix.config.platforms }}
47
+ tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
48
+ file: ${{ matrix.config.dockerfile }}
49
+
50
+ - name: Build and push Docker image (tagged)
51
+ uses: docker/build-push-action@v4
52
+ with:
53
+ context: .
54
+ push: ${{ github.event_name == 'push' }}
55
+ platforms: ${{ matrix.config.platforms }}
56
+ tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
57
+ file: ${{ matrix.config.dockerfile }}
.github/workflows/examples.yml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Examples Tests
2
+ on:
3
+ push:
4
+ paths:
5
+ - examples/addon.node/**
6
+ - whisper.h
7
+ pull_request:
8
+ paths:
9
+ - examples/addon.node/**
10
+ - whisper.h
11
+
12
+ jobs:
13
+ addon_node-ubuntu-latest:
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ matrix:
17
+ node-version: [ 16.x, 18.x ]
18
+ steps:
19
+ - name: Clone
20
+ uses: actions/checkout@v1
21
+
22
+ - name: Dependencies
23
+ run: |
24
+ sudo apt-get update
25
+ sudo apt-get install build-essential
26
+ sudo apt-get install cmake
27
+ sudo apt-get install libsdl2-dev
28
+
29
+ - name: Use Node.js ${{ matrix.node-version }}
30
+ uses: actions/setup-node@v1
31
+ with:
32
+ node-version: ${{ matrix.node-version }}
33
+ cache: 'npm'
34
+
35
+ - name: Install package.json dependencies
36
+ working-directory: ./examples/addon.node
37
+ run: npm install
38
+
39
+ - name: Compile addon.node
40
+ run: npx cmake-js compile -T addon.node -B Release
41
+
42
+ - name: Download test model
43
+ run: |
44
+ bash ./models/download-ggml-model.sh base.en
45
+ - name: Test
46
+ run: |
47
+ cd examples/addon.node
48
+ npm run test
.gitignore ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.o
2
+ *.a
3
+ .cache/
4
+ .coreml/
5
+ .test/
6
+ .vs/
7
+ .vscode/
8
+ .DS_Store
9
+ .vimspector.json
10
+ /CMakeSettings.json
11
+
12
+ build/
13
+ build-*/
14
+
15
+ # SPM
16
+ .build/
17
+ .swiftpm
18
+ *.metallib
19
+
20
+ /main
21
+ /stream
22
+ /command
23
+ /talk
24
+ /talk-llama
25
+ /bench
26
+ /quantize
27
+ /server
28
+ /lsp
29
+
30
+ arm_neon.h
31
+ sync.sh
32
+ libwhisper.a
33
+ libwhisper.so
34
+ compile_commands.json
35
+
36
+ examples/arm_neon.h
37
+ examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
38
+ examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
39
+ examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
40
+
41
+ extra/bench-gg.txt
42
+
43
+ models/*.mlmodel
44
+ models/*.mlmodelc
45
+ models/*.mlpackage
46
+ bindings/java/.gradle/
47
+ bindings/java/.idea/
48
+ .idea/
49
+
50
+ benchmark_results.csv
51
+ cmake-build-debug/
52
+ .cxx/
53
+ .gradle/
54
+ local.properties
.gitmodules ADDED
File without changes
AUTHORS ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # date: Tue Apr 9 20:27:03 EEST 2024
2
+ # this file is auto-generated by scripts/gen-authors.sh
3
+
4
+ 0/0 <zero@imaskeleton.me>
5
+ 0cc4m <picard12@live.de>
6
+ 0xsourcecode <134374803+0xsourcecode@users.noreply.github.com>
7
+ AT <manyoso@users.noreply.github.com>
8
+ Aarni Koskela <akx@iki.fi>
9
+ Aaron Pham <29749331+aarnphm@users.noreply.github.com>
10
+ Aaron Taylor <aaron@exphat.com>
11
+ Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
12
+ Abitofevrything <54505189+abitofevrything@users.noreply.github.com>
13
+ AfryMask <AfryMask@163.com>
14
+ Ahmad Bilal <ahmad.bilal@empglabs.com>
15
+ AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
16
+ Akash Mahajan <akash7190@gmail.com>
17
+ Akash Mahajan <akashmjn@stanford.edu>
18
+ Al Hoang <3811822-hoanga@users.noreply.gitlab.com>
19
+ Alan <unknown>
20
+ Aleksander Andrzejewski <18704749+aleksanderandrzejewski@users.noreply.github.com>
21
+ Alex Azarov <alex@azarov.by>
22
+ Alex Bacart <13940752+alex-bacart@users.noreply.github.com>
23
+ Alex Evgrashin <aevgrashin@yandex.ru>
24
+ Alexandr Graschenkov <alexandr.graschenkov91@gmail.com>
25
+ Alexandru Mariuti <alex@mariuti.com>
26
+ Alexey Kharlamov <alexey@kharlamov.biz>
27
+ Alfredo Montesinos <alfredo.montesinos@g.austincc.edu>
28
+ Ali Alameh <ali.alameh@isae.edu.lb>
29
+ Ananta Bastola <anantarajbastola@gmail.com>
30
+ Andreu Huguet <andreuhuguet@gmail.com>
31
+ Andrew Huynh <a5thuynh@gmail.com>
32
+ Andrew S <andrews54757@gmail.com>
33
+ Andy Maloney <asmaloney@gmail.com>
34
+ Anton Kostin <masguit42@users.noreply.github.com>
35
+ Artyom Mezin <psycho.fading@gmail.com>
36
+ Asad Memon <asad.lionpk@gmail.com>
37
+ Ashraful Islam <ashraful.meche@gmail.com>
38
+ AsukaMinato <asukaminato@nyan.eu.org>
39
+ AustinMroz <austinmroz@utexas.edu>
40
+ Avik Sengupta <avik@sengupta.net>
41
+ Bader-eddine Ouaich <49657842+baderouaich@users.noreply.github.com>
42
+ Baffin Lee <baffinlee@gmail.com>
43
+ Ben Nortier <bjnortier@gmail.com>
44
+ Benjamin Heiniger <benjamin.heiniger@bluewin.ch>
45
+ Bo-Yi Wu <appleboy.tw@gmail.com>
46
+ Boris Bliznioukov <blib@mail.com>
47
+ Borislav Stanimirov <b.stanimirov@abv.bg>
48
+ Brad Murray <59848399+bradmurray-dt@users.noreply.github.com>
49
+ Brian Murray <brian@bmurray.ca>
50
+ CRD716 <crd716@gmail.com>
51
+ Canis Lupus <Canis-UK@users.noreply.github.com>
52
+ Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
53
+ ChangSeok Oh <shivamidow@users.noreply.github.com>
54
+ Chaoqun <27287694+OpenWaygate@users.noreply.github.com>
55
+ Chia-Hsiang Cheng <88014292+garychia@users.noreply.github.com>
56
+ Chidi Williams <williamschidi1@gmail.com>
57
+ Christian <12550267+iceychris@users.noreply.github.com>
58
+ Clifford Heath <clifford.heath@gmail.com>
59
+ Colin <github@whoisc.cc>
60
+ DGdev91 <DGdev91@users.noreply.github.com>
61
+ Damian Czaja <trojan295@protonmail.com>
62
+ Daniel Bevenius <daniel.bevenius@gmail.com>
63
+ David <dnhkng@gmail.com>
64
+ David Thorpe <djt@mutablelogic.com>
65
+ Davidson Francis <davidsondfgl@gmail.com>
66
+ Dener Stassun <denerstassun@gmail.com>
67
+ Didzis Gosko <didzis@users.noreply.github.com>
68
+ Digipom <admin@digipom.com>
69
+ Dimo <dimo@ieee.org>
70
+ Dody Suria Wijaya <dodysw@gmail.com>
71
+ Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
72
+ Duncan McConnell <ddmcconnell4@gmail.com>
73
+ Egor Egorov <me@egorfine.com>
74
+ Elkana Bardugo <ttv200@gmail.com>
75
+ Emmanuel Schmidbauer <eschmidbauer@gmail.com>
76
+ Engininja2 <139037756+Engininja2@users.noreply.github.com>
77
+ Eric Swanson <eswanson@alloscomp.com>
78
+ Eric Tendian <erictendian@gmail.com>
79
+ Erik Scholz <Green-Sky@users.noreply.github.com>
80
+ Evan Jones <evan.q.jones@gmail.com>
81
+ Evan Martin <evan.martin@gmail.com>
82
+ Eve <139727413+netrunnereve@users.noreply.github.com>
83
+ Evgeny Kuznetsov <evgeny@kuznetsov.md>
84
+ F1L1P <78918286+F1L1Pv2@users.noreply.github.com>
85
+ Fangjun Kuang <csukuangfj@gmail.com>
86
+ Felix <stenbackfelix@gmail.com>
87
+ Finn Voorhees <finnvoorhees@gmail.com>
88
+ FlippFuzz <41221030+FlippFuzz@users.noreply.github.com>
89
+ Gang Chen <goncha@gmail.com>
90
+ Gavin Cai <gavin1818@hotmail.com>
91
+ George Hindle <george@georgehindle.com>
92
+ Georgi Gerganov <ggerganov@gmail.com>
93
+ GitAritron <103900385+GitAritron@users.noreply.github.com>
94
+ GiviMAD <GiviMAD@users.noreply.github.com>
95
+ Gleicon Moraes <gleicon@gmail.com>
96
+ Gregor Jasny <gjasny@googlemail.com>
97
+ Guillaume Wenzek <gwenzek@users.noreply.github.com>
98
+ HY. Kelvin Lee <34256578+hykelvinlee42@users.noreply.github.com>
99
+ Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
100
+ Hang <bebound@gmail.com>
101
+ Herman Semenov <GermanAizek@yandex.ru>
102
+ Hrishikesh Barman <geekodour@users.noreply.github.com>
103
+ Ian Bicking <ian@ianbicking.org>
104
+ Ian Bull <irbull@eclipsesource.com>
105
+ Ikko Ashimine <eltociear@gmail.com>
106
+ InconsolableCellist <23345188+InconsolableCellist@users.noreply.github.com>
107
+ Ismatulla Mansurov <47342870+sapoepsilon@users.noreply.github.com>
108
+ Ivan Gorin <ivangorin21@gmail.com>
109
+ JJ <103335846+computerscienceiscool@users.noreply.github.com>
110
+ Jack Mousseau <jmousseau@users.noreply.github.com>
111
+ JacobLinCool <jacoblincool@gmail.com>
112
+ Jakub Ráček <blizzcz@gmail.com>
113
+ Jared Van Bortel <jared@nomic.ai>
114
+ Jay Binks <jaybinks@gmail.com>
115
+ Jhen-Jie Hong <developer@jhen.me>
116
+ Jhen-Jie Hong <iainst0409@gmail.com>
117
+ JidongZhang-THU <1119708529@qq.com>
118
+ Jo Liss <joliss42@gmail.com>
119
+ Johan <jr.raffin@gmail.com>
120
+ Johannes Gäßler <johannesg@5d6.de>
121
+ John Balis <phobossystems@gmail.com>
122
+ Jonathan Soo <jcsoo@agora.com>
123
+ Jonno <1160532+razodactyl@users.noreply.github.com>
124
+ Joonas Pihlajamaa <joonas.pihlajamaa@iki.fi>
125
+ Jose <34888496+Jerry-Master@users.noreply.github.com>
126
+ Josh Bleecher Snyder <josharian@gmail.com>
127
+ Judd <foldl@users.noreply.github.com>
128
+ Jumper775 <78500318+jumpers775@users.noreply.github.com>
129
+ Justine Tunney <jtunney@gmail.com>
130
+ KP Kaiser <kirk@zothcorp.com>
131
+ Kamilake <exjang0@gmail.com>
132
+ Kartik Saranathan <278928+Kartiku@users.noreply.github.com>
133
+ Kasumi <90275229+kasumi-1@users.noreply.github.com>
134
+ Kawrakow <48489457+ikawrakow@users.noreply.github.com>
135
+ Kevin Brothaler <admin@digipom.com>
136
+ Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
137
+ Kreijstal <rainb@tfwno.gf>
138
+ Kylin <56434533+KyL0N@users.noreply.github.com>
139
+ LBlue <153975653+lbluep@users.noreply.github.com>
140
+ Larry Battle <larry.battle.tech@gmail.com>
141
+ Laytan Laats <laytanlaats@hotmail.com>
142
+ Leo Moll <leo.moll@yeasoft.com>
143
+ Lexevolution <31176843+Lexevolution@users.noreply.github.com>
144
+ LittleLoli <26589867+WhichWho@users.noreply.github.com>
145
+ Lucas Zanek <57494138+LucasZNK@users.noreply.github.com>
146
+ Luis Herrera <herrera-luis@users.noreply.github.com>
147
+ Lukas Rist <glaslos@gmail.com>
148
+ M. A. Ali <73258591+MightyStud@users.noreply.github.com>
149
+ M. Eren Akbiyik <erenakbiyik@gmail.com>
150
+ Maciek <maciek.mab122@gmail.com>
151
+ Marcin Mielniczuk <marmistrz.dev@zoho.eu>
152
+ Martin Warnaar <martinwarnaar@gmail.com>
153
+ Matheus de Sousa <23645013+keyehzy@users.noreply.github.com>
154
+ Mathijs de Bruin <mathijs@mathijsfietst.nl>
155
+ Matija Pevec <mightymatth@users.noreply.github.com>
156
+ Maximiliano Levi <8160966+maxilevi@users.noreply.github.com>
157
+ Meng, Hengyu <hengyu.meng@intel.com>
158
+ Michael Podvitskiy <podvitskiymichael@gmail.com>
159
+ Michael Rienstra <mrienstra@gmail.com>
160
+ Mikhail Grigorev <sleuthhound@gmail.com>
161
+ Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
162
+ Mohit Agarwal <mohit@sdf.org>
163
+ Murilo Santana <mvrilo@gmail.com>
164
+ Neil Chudleigh <nchudleigh@users.noreply.github.com>
165
+ Neo Zhang Jianyu <jianyu.zhang@intel.com>
166
+ Neuman Vong <neuman.vong@gmail.com>
167
+ Nicholas Albion <nalbion@yahoo.com>
168
+ Niels Mayer <Niels.Mayer@gmail.com>
169
+ Okabintaro <103938900+Okabintaro@users.noreply.github.com>
170
+ Oleg Sidorov <me@whitebox.io>
171
+ Oleg Sidorov <oleg@sidorov.nl>
172
+ Ondrej Kokes <ondrej.kokes@gmail.com>
173
+ Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
174
+ Paul Tsochantaris <ptsochantaris@icloud.com>
175
+ Philipp Zabel <philipp.zabel@gmail.com>
176
+ Philippe Normand <phil@base-art.net>
177
+ Przemysław Pawełczyk <przemoc@gmail.com>
178
+ Qianhe Chen <54462604+chenqianhe@users.noreply.github.com>
179
+ Radosław Gryta <radek.gryta@gmail.com>
180
+ Reinforce-II <fate@eastal.com>
181
+ Reinis Muiznieks <muiznieks.reinis@gmail.com>
182
+ RelatedTitle <r3latedtitle@gmail.com>
183
+ RhinoDevel <RhinoDevel@users.noreply.github.com>
184
+ Rich Jones <miserlou@gmail.com>
185
+ Robin <robin.xw@hotmail.com>
186
+ Roddur Dasgupta <roddurd@gmail.com>
187
+ Roland Rabien <figbug@gmail.com>
188
+ Rotem Dan <rotemdan@gmail.com>
189
+ Ryan Hitchman <hitchmanr@gmail.com>
190
+ Ryan Metcalfe <107415876+RyanMetcalfeInt8@users.noreply.github.com>
191
+ RyanChang <ftes90015@gmail.com>
192
+ Sam <49637763+Onlyartist9@users.noreply.github.com>
193
+ Sam Pullara <spullara@gmail.com>
194
+ Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>
195
+ Sergio López <slp@sinrega.org>
196
+ Siddharth Ramakrishnan <srr2141@columbia.edu>
197
+ Simon Moisselin <simon.moisstoll@gmail.com>
198
+ Sindre Sorhus <sindresorhus@gmail.com>
199
+ Slava Primenko <primenko.s@gmail.com>
200
+ Syahmi Azhar <prsyahmi@gmail.com>
201
+ Syed Jafri <syedjafri97@gmail.com>
202
+ Sơn Phan Trung <phantrungson17@gmail.com>
203
+ Taisei Mima <bhbstar.me@gmail.com>
204
+ Takeshi Inoue <inoue.takeshi@gmail.com>
205
+ Tamotsu Takahashi <ttakah+github@gmail.com>
206
+ Taras Glek <taras@thegp.com>
207
+ Tauseef Mohiuddin <35351464+tauseefmohammed2@users.noreply.github.com>
208
+ Thijs Raymakers <thijs@raymakers.nl>
209
+ Thomas Fitzsimmons <fitzsim@fitzsim.org>
210
+ Tiago Fassoni <tiagofassoni@users.noreply.github.com>
211
+ Tienshiao Ma <tienshiao@tienshiao.org>
212
+ Timothy Cronin <40186632+4imothy@users.noreply.github.com>
213
+ Tobrun <tobrun.van.nuland@gmail.com>
214
+ Todd <taf2@users.noreply.github.com>
215
+ Tong Li <31761981+litongjava@users.noreply.github.com>
216
+ Topping1 <78745143+Topping1@users.noreply.github.com>
217
+ Travis Cline <travis.cline@gmail.com>
218
+ UEXTM.com <84163508+uextm@users.noreply.github.com>
219
+ Vadim Peretokin <vperetokin@hey.com>
220
+ Valentin Gosu <1454649+valenting@users.noreply.github.com>
221
+ Vulcan <93451215+trholding@users.noreply.github.com>
222
+ WhiteOlivierus <36532695+WhiteOlivierus@users.noreply.github.com>
223
+ Xiang (Kevin) Li <kevinli020508@gmail.com>
224
+ Xiao-Yong Jin <jinxiaoyong@gmail.com>
225
+ XiaotaoChen <chenxiaotao1234@gmail.com>
226
+ Yajing Tang <phillis@google.com>
227
+ Yang Shen <aplshenyang@gmail.com>
228
+ Yunès <jean.baptiste.yunes@free.fr>
229
+ ZaBlazzingZephyrus <119159668+blazingzephyr@users.noreply.github.com>
230
+ Zigfrid Zvezdin <ziggerZZ@gmail.com>
231
+ Zollner <24618122+Zolliner@users.noreply.github.com>
232
+ ai-at-home <149282006+ai-at-home@users.noreply.github.com>
233
+ alonfaraj <alonfaraj@gmail.com>
234
+ andypayne <apayne@gmail.com>
235
+ ardfork <134447697+ardfork@users.noreply.github.com>
236
+ automaticcat <daogiatuank54@gmail.com>
237
+ be-next <jerome.ramette@gmail.com>
238
+ bert hubert <bert@hubertnet.nl>
239
+ bmwl <brian.marshall@tolko.com>
240
+ bobqianic <129547291+bobqianic@users.noreply.github.com>
241
+ bocytko <bocytko+github@gmail.com>
242
+ boolemancer <48014766+boolemancer@users.noreply.github.com>
243
+ boolemancer <boolemancer@gmail.com>
244
+ bradmit <151883577+bradmit@users.noreply.github.com>
245
+ brunofaustino <b.fa.amorim@gmail.com>
246
+ bssrdf <merlintiger@hotmail.com>
247
+ byte-6174 <88070277+byte-6174@users.noreply.github.com>
248
+ cdosoftei <ciprian.dosoftei@gmail.com>
249
+ clach04 <Chris.Clark@actian.com>
250
+ compilade <113953597+compilade@users.noreply.github.com>
251
+ conradg <conradjgodfrey@gmail.com>
252
+ ddpasa <112642920+ddpasa@users.noreply.github.com>
253
+ denersc <denerstassun@gmail.com>
254
+ dscripka <dscripka@users.noreply.github.com>
255
+ duthils <duthils@duthils.net>
256
+ ecneladis <ecneladis@users.noreply.github.com>
257
+ faker <nspyia2002@gmail.com>
258
+ fitzsim <fitzsim@fitzsim.org>
259
+ fraxy-v <65565042+fraxy-v@users.noreply.github.com>
260
+ genevera (she/her) <genevera@users.noreply.github.com>
261
+ geniusnut <geniusnut@gmail.com>
262
+ greeshmay <greeshmay@gmail.com>
263
+ hydai <z54981220@gmail.com>
264
+ iamthad <thadeus.j.fleming@gmail.com>
265
+ james wolf <contractorwolf@hotmail.com>
266
+ joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
267
+ jorismertz <35079666+jorismertz@users.noreply.github.com>
268
+ junkfood <69683722+JunkFood02@users.noreply.github.com>
269
+ jwijffels <jwijffels@bnosac.be>
270
+ kamranjon <kamranjon@gmail.com>
271
+ katsu560 <katsu560oo-@docomo.ne.jp>
272
+ kennethge <57784063+kenneth-ge@users.noreply.github.com>
273
+ keyehzy <msamuel@aluno.puc-rio.br>
274
+ leejet <leejet714@gmail.com>
275
+ litong <31761981+litongjava@users.noreply.github.com>
276
+ lnyan <lkwq007@gmail.com>
277
+ m.bell <m.bell@techsmith.com>
278
+ mkiol <mkiol@users.noreply.github.com>
279
+ novag <7754358+novag@users.noreply.github.com>
280
+ pajowu <pajowu@pajowu.de>
281
+ polarmoon <90010972+polarmoon@users.noreply.github.com>
282
+ rlapray <lapray.romain@gmail.com>
283
+ sandrohanea <40202887+sandrohanea@users.noreply.github.com>
284
+ semiformal-net <84111142+semiformal-net@users.noreply.github.com>
285
+ shibukazu <61775791+shibukazu@users.noreply.github.com>
286
+ shikokuchuo <53399081+shikokuchuo@users.noreply.github.com>
287
+ slaren <slarengh@gmail.com>
288
+ slashlib <slashlib@users.noreply.github.com>
289
+ snadampal <87143774+snadampal@users.noreply.github.com>
290
+ st-gr <38470677+st-gr@users.noreply.github.com>
291
+ texmex76 <40733439+texmex76@users.noreply.github.com>
292
+ thefinaldegree <thefinaldegree@gmail.com>
293
+ trixirt <trix@redhat.com>
294
+ ulatekh <ulatekh@yahoo.com>
295
+ undef <undefdev@gmail.com>
296
+ venkr <venkateshrameshkumar+1@gmail.com>
297
+ vicalloy <zbirder@gmail.com>
298
+ xdrudis <xavierdrudis@yahoo.es>
299
+ zhouwg <6889919+zhouwg@users.noreply.github.com>
300
+ 布客飞龙 <562826179@qq.com>
301
+ Артём Земляк <azemlyak@smart-consulting.ru>
CMakeLists.txt ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
2
+ project("whisper.cpp" C CXX)
3
+ project("whisper.cpp" VERSION 1.6.2)
4
+ include(CheckIncludeFileCXX)
5
+
6
+ set(SOVERSION 1)
7
+
8
+ #set(CMAKE_WARN_DEPRECATED YES)
9
+ set(CMAKE_WARN_UNUSED_CLI YES)
10
+
11
+ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
12
+
13
+ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
14
+ set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
15
+ set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
16
+ endif()
17
+
18
+ # Add path to modules
19
+ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
20
+
21
+ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
22
+
23
+ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
24
+ set(WHISPER_STANDALONE ON)
25
+
26
+ include(git-vars)
27
+
28
+ # configure project version
29
+ configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
30
+ else()
31
+ set(WHISPER_STANDALONE OFF)
32
+ endif()
33
+
34
+ if (EMSCRIPTEN)
35
+ set(BUILD_SHARED_LIBS_DEFAULT OFF)
36
+
37
+ option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
38
+
39
+ # TODO: without these, we get the following error:
40
+ # wasm-ld: error: --shared-memory is disallowed by whisper.cpp.o because it was not compiled with 'atomics' or 'bulk-memory' features.
41
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -s TOTAL_STACK=5242880")
42
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
43
+ else()
44
+ if (MINGW)
45
+ set(BUILD_SHARED_LIBS_DEFAULT OFF)
46
+ else()
47
+ set(BUILD_SHARED_LIBS_DEFAULT ON)
48
+ endif()
49
+ endif()
50
+
51
+ option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
52
+
53
+ #
54
+ # option list
55
+ #
56
+
57
+ # general
58
+ option(WHISPER_CCACHE "whisper: use ccache if available" ON)
59
+
60
+ # debug
61
+ option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
62
+ option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
63
+
64
+ # build
65
+ option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)
66
+
67
+ # sanitizers
68
+ option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
69
+ option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
70
+ option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
71
+
72
+ # extra artifacts
73
+ option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
74
+ option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
75
+ option(WHISPER_BUILD_SERVER "whisper: build server example" ${WHISPER_STANDALONE})
76
+
77
+ # 3rd party libs
78
+ option(WHISPER_CURL "whisper: use libcurl to download model from an URL" OFF)
79
+ option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
80
+
81
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
82
+ option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs (avcodec, swresample, ...)" OFF)
83
+ endif()
84
+
85
+ option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
86
+ option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
87
+ option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF)
88
+
89
+ # Required for relocatable CMake package
90
+ include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
91
+
92
+ # override ggml options
93
+ set(GGML_CCACHE ${WHISPER_CCACHE})
94
+ set(GGML_SANITIZE_THREAD ${WHISPER_SANITIZE_THREAD})
95
+ set(GGML_SANITIZE_ADDRESS ${WHISPER_SANITIZE_ADDRESS})
96
+ set(GGML_SANITIZE_UNDEFINED ${WHISPER_SANITIZE_UNDEFINED})
97
+ set(GGML_ALL_WARNINGS ${WHISPER_ALL_WARNINGS})
98
+ set(GGML_FATAL_WARNINGS ${WHISPER_FATAL_WARNINGS})
99
+
100
+ # transition helpers
101
+ function (whisper_option_depr TYPE OLD NEW)
102
+ if (${OLD})
103
+ message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
104
+ set(${NEW} ON)
105
+ endif()
106
+ endfunction()
107
+
108
+ whisper_option_depr(FATAL_ERROR WHISPER_CUBLAS GGML_CUDA)
109
+ whisper_option_depr(WARNING WHISPER_CUDA GGML_CUDA)
110
+ whisper_option_depr(WARNING WHISPER_KOMPUTE GGML_KOMPUTE)
111
+ whisper_option_depr(WARNING WHISPER_METAL GGML_METAL)
112
+ whisper_option_depr(WARNING WHISPER_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
113
+ whisper_option_depr(WARNING WHISPER_NATIVE GGML_NATIVE)
114
+ whisper_option_depr(WARNING WHISPER_OPENMP GGML_OPENMP)
115
+ whisper_option_depr(WARNING WHISPER_RPC GGML_RPC)
116
+ whisper_option_depr(WARNING WHISPER_SYCL GGML_SYCL)
117
+ whisper_option_depr(WARNING WHISPER_SYCL_F16 GGML_SYCL_F16)
118
+
119
+ #
120
+ # build the library
121
+ #
122
+
123
+ if (NOT TARGET ggml)
124
+ add_subdirectory(ggml)
125
+ # ... otherwise assume ggml is added by a parent CMakeLists.txt
126
+ endif()
127
+ add_subdirectory(src)
128
+
129
+ #
130
+ # install
131
+ #
132
+
133
+ include(GNUInstallDirs)
134
+ include(CMakePackageConfigHelpers)
135
+
136
+ set(WHISPER_BUILD_NUMBER ${BUILD_NUMBER})
137
+ set(WHISPER_BUILD_COMMIT ${BUILD_COMMIT})
138
+ set(WHISPER_INSTALL_VERSION ${CMAKE_PROJECT_VERSION})
139
+
140
+ set(WHISPER_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
141
+ set(WHISPER_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
142
+ set(WHISPER_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
143
+
144
+ get_directory_property(WHISPER_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
145
+
146
+ set_target_properties(whisper PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/whisper.h)
147
+ install(TARGETS whisper LIBRARY PUBLIC_HEADER)
148
+
149
+ configure_package_config_file(
150
+ ${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in
151
+ ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
152
+ INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper
153
+ PATH_VARS
154
+ WHISPER_INCLUDE_INSTALL_DIR
155
+ WHISPER_LIB_INSTALL_DIR
156
+ WHISPER_BIN_INSTALL_DIR )
157
+
158
+ write_basic_package_version_file(
159
+ ${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
160
+ VERSION ${WHISPER_INSTALL_VERSION}
161
+ COMPATIBILITY SameMajorVersion)
162
+
163
+ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
164
+ ${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
165
+ DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper)
166
+
167
+ configure_file(cmake/whisper.pc.in
168
+ "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
169
+ @ONLY)
170
+
171
+ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
172
+ DESTINATION lib/pkgconfig)
173
+
174
+ #
175
+ # programs, examples and tests
176
+ #
177
+
178
+ if (WHISPER_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
179
+ #include(CTest)
180
+ #add_subdirectory(tests)
181
+ endif ()
182
+
183
+ if (WHISPER_BUILD_EXAMPLES)
184
+ add_subdirectory(examples)
185
+ endif()
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023-2024 The ggml authors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
Makefile CHANGED
@@ -971,7 +971,8 @@ $(LIB_WHISPER): \
971
  $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
972
 
973
  $(LIB_WHISPER_S): \
974
- $(OBJ_WHISPER)
 
975
  ar rcs $(LIB_WHISPER_S) $^
976
 
977
  # common
 
971
  $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
972
 
973
  $(LIB_WHISPER_S): \
974
+ $(OBJ_WHISPER) \
975
+ $(OBJ_GGML)
976
  ar rcs $(LIB_WHISPER_S) $^
977
 
978
  # common
Package.swift ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // swift-tools-version:5.5
2
+
3
+ import PackageDescription
4
+
5
+ let package = Package(
6
+ name: "whisper",
7
+ platforms: [
8
+ .macOS(.v12),
9
+ .iOS(.v14),
10
+ .watchOS(.v4),
11
+ .tvOS(.v14)
12
+ ],
13
+ products: [
14
+ .library(name: "whisper", targets: ["whisper"]),
15
+ ],
16
+ targets: [
17
+ .target(
18
+ name: "whisper",
19
+ path: ".",
20
+ exclude: [
21
+ "bindings",
22
+ "cmake",
23
+ "coreml",
24
+ "examples",
25
+ "extra",
26
+ "models",
27
+ "samples",
28
+ "tests",
29
+ "CMakeLists.txt",
30
+ "Makefile"
31
+ ],
32
+ sources: [
33
+ "ggml/src/ggml.c",
34
+ "src/whisper.cpp",
35
+ "ggml/src/ggml-aarch64.c",
36
+ "ggml/src/ggml-alloc.c",
37
+ "ggml/src/ggml-backend.c",
38
+ "ggml/src/ggml-quants.c",
39
+ "ggml/src/ggml-metal.m"
40
+ ],
41
+ resources: [.process("ggml-metal.metal")],
42
+ publicHeadersPath: "spm-headers",
43
+ cSettings: [
44
+ .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
45
+ .define("GGML_USE_ACCELERATE"),
46
+ .unsafeFlags(["-fno-objc-arc"]),
47
+ .define("GGML_USE_METAL")
48
+ // NOTE: NEW_LAPACK will required iOS version 16.4+
49
+ // We should consider add this in the future when we drop support for iOS 14
50
+ // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
51
+ // .define("ACCELERATE_NEW_LAPACK"),
52
+ // .define("ACCELERATE_LAPACK_ILP64")
53
+ ],
54
+ linkerSettings: [
55
+ .linkedFramework("Accelerate")
56
+ ]
57
+ )
58
+ ],
59
+ cxxLanguageStandard: .cxx11
60
+ )
README.md CHANGED
@@ -1,13 +1,832 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
- title: AudioValidation
3
- emoji: 🐨
4
- colorFrom: indigo
5
- colorTo: gray
6
- sdk: streamlit
7
- sdk_version: 1.38.0
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # whisper.cpp
2
+
3
+ ![whisper.cpp](https://user-images.githubusercontent.com/1991296/235238348-05d0f6a4-da44-4900-a1de-d0707e75b763.jpeg)
4
+
5
+ [![Actions Status](https://github.com/ggerganov/whisper.cpp/workflows/CI/badge.svg)](https://github.com/ggerganov/whisper.cpp/actions)
6
+ [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
7
+ [![Conan Center](https://shields.io/conan/v/whisper-cpp)](https://conan.io/center/whisper-cpp)
8
+ [![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)
9
+
10
+ Stable: [v1.6.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.6.0) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
11
+
12
+ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
13
+
14
+ - Plain C/C++ implementation without dependencies
15
+ - Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](https://github.com/ggerganov/whisper.cpp#core-ml-support)
16
+ - AVX intrinsics support for x86 architectures
17
+ - VSX intrinsics support for POWER architectures
18
+ - Mixed F16 / F32 precision
19
+ - [4-bit and 5-bit integer quantization support](https://github.com/ggerganov/whisper.cpp#quantization)
20
+ - Zero memory allocations at runtime
21
+ - Support for CPU-only inference
22
+ - [Efficient GPU support for NVIDIA](https://github.com/ggerganov/whisper.cpp#nvidia-gpu-support-via-cublas)
23
+ - [OpenVINO Support](https://github.com/ggerganov/whisper.cpp#openvino-support)
24
+ - [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h)
25
+
26
+ Supported platforms:
27
+
28
+ - [x] Mac OS (Intel and Arm)
29
+ - [x] [iOS](examples/whisper.objc)
30
+ - [x] [Android](examples/whisper.android)
31
+ - [x] [Java](bindings/java/README.md)
32
+ - [x] Linux / [FreeBSD](https://github.com/ggerganov/whisper.cpp/issues/56#issuecomment-1350920264)
33
+ - [x] [WebAssembly](examples/whisper.wasm)
34
+ - [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
35
+ - [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
36
+ - [x] [Docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp)
37
+
38
+ The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp).
39
+ The rest of the code is part of the [`ggml`](https://github.com/ggerganov/ggml) machine learning library.
40
+
41
+ Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
42
+ As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc)
43
+
44
+ https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
45
+
46
+ You can also easily make your own offline voice assistant application: [command](examples/command)
47
+
48
+ https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
49
+
50
+ On Apple Silicon, the inference runs fully on the GPU via Metal:
51
+
52
+ https://github.com/ggerganov/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225
53
+
54
+ Or you can even run it straight in the browser: [talk.wasm](examples/talk.wasm)
55
+
56
+ ## Implementation details
57
+
58
+ - The core tensor operations are implemented in C ([ggml.h](ggml/include/ggml.h) / [ggml.c](ggml/src/ggml.c))
59
+ - The transformer model and the high-level C-style API are implemented in C++ ([whisper.h](include/whisper.h) / [whisper.cpp](src/whisper.cpp))
60
+ - Sample usage is demonstrated in [main.cpp](examples/main)
61
+ - Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](examples/stream)
62
+ - Various other examples are available in the [examples](examples) folder
63
+
64
+ The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD intrinsics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products.
65
+
66
+ ## Quick start
67
+
68
+ First clone the repository:
69
+
70
+ ```bash
71
+ git clone https://github.com/ggerganov/whisper.cpp.git
72
+ ```
73
+
74
+ Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example:
75
+
76
+ ```bash
77
+ bash ./models/download-ggml-model.sh base.en
78
+ ```
79
+
80
+ Now build the [main](examples/main) example and transcribe an audio file like this:
81
+
82
+ ```bash
83
+ # build the main example
84
+ make
85
+
86
+ # transcribe an audio file
87
+ ./main -f samples/jfk.wav
88
+ ```
89
+
90
+ ---
91
+
92
+ For a quick demo, simply run `make base.en`:
93
+
94
+ ```text
95
+ $ make base.en
96
+
97
+ cc -I. -O3 -std=c11 -pthread -DGGML_USE_ACCELERATE -c ggml.c -o ggml.o
98
+ c++ -I. -I./examples -O3 -std=c++11 -pthread -c whisper.cpp -o whisper.o
99
+ c++ -I. -I./examples -O3 -std=c++11 -pthread examples/main/main.cpp whisper.o ggml.o -o main -framework Accelerate
100
+ ./main -h
101
+
102
+ usage: ./main [options] file0.wav file1.wav ...
103
+
104
+ options:
105
+ -h, --help [default] show this help message and exit
106
+ -t N, --threads N [4 ] number of threads to use during computation
107
+ -p N, --processors N [1 ] number of processors to use during computation
108
+ -ot N, --offset-t N [0 ] time offset in milliseconds
109
+ -on N, --offset-n N [0 ] segment index offset
110
+ -d N, --duration N [0 ] duration of audio to process in milliseconds
111
+ -mc N, --max-context N [-1 ] maximum number of text context tokens to store
112
+ -ml N, --max-len N [0 ] maximum segment length in characters
113
+ -sow, --split-on-word [false ] split on word rather than on token
114
+ -bo N, --best-of N [5 ] number of best candidates to keep
115
+ -bs N, --beam-size N [5 ] beam size for beam search
116
+ -wt N, --word-thold N [0.01 ] word timestamp probability threshold
117
+ -et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
118
+ -lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
119
+ -debug, --debug-mode [false ] enable debug mode (eg. dump log_mel)
120
+ -tr, --translate [false ] translate from source language to english
121
+ -di, --diarize [false ] stereo audio diarization
122
+ -tdrz, --tinydiarize [false ] enable tinydiarize (requires a tdrz model)
123
+ -nf, --no-fallback [false ] do not use temperature fallback while decoding
124
+ -otxt, --output-txt [false ] output result in a text file
125
+ -ovtt, --output-vtt [false ] output result in a vtt file
126
+ -osrt, --output-srt [false ] output result in a srt file
127
+ -olrc, --output-lrc [false ] output result in a lrc file
128
+ -owts, --output-words [false ] output script for generating karaoke video
129
+ -fp, --font-path [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video
130
+ -ocsv, --output-csv [false ] output result in a CSV file
131
+ -oj, --output-json [false ] output result in a JSON file
132
+ -ojf, --output-json-full [false ] include more information in the JSON file
133
+ -of FNAME, --output-file FNAME [ ] output file path (without file extension)
134
+ -ps, --print-special [false ] print special tokens
135
+ -pc, --print-colors [false ] print colors
136
+ -pp, --print-progress [false ] print progress
137
+ -nt, --no-timestamps [false ] do not print timestamps
138
+ -l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)
139
+ -dl, --detect-language [false ] exit after automatically detecting language
140
+ --prompt PROMPT [ ] initial prompt
141
+ -m FNAME, --model FNAME [models/ggml-base.en.bin] model path
142
+ -f FNAME, --file FNAME [ ] input WAV file path
143
+ -oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
144
+ -ls, --log-score [false ] log best decoder scores of tokens
145
+ -ng, --no-gpu [false ] disable GPU
146
+
147
+
148
+ bash ./models/download-ggml-model.sh base.en
149
+ Downloading ggml model base.en ...
150
+ ggml-base.en.bin 100%[========================>] 141.11M 6.34MB/s in 24s
151
+ Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
152
+ You can now use it like this:
153
+
154
+ $ ./main -m models/ggml-base.en.bin -f samples/jfk.wav
155
+
156
+
157
+ ===============================================
158
+ Running base.en on all samples in ./samples ...
159
+ ===============================================
160
+
161
+ ----------------------------------------------
162
+ [+] Running base.en on samples/jfk.wav ... (run 'ffplay samples/jfk.wav' to listen)
163
+ ----------------------------------------------
164
+
165
+ whisper_init_from_file: loading model from 'models/ggml-base.en.bin'
166
+ whisper_model_load: loading model
167
+ whisper_model_load: n_vocab = 51864
168
+ whisper_model_load: n_audio_ctx = 1500
169
+ whisper_model_load: n_audio_state = 512
170
+ whisper_model_load: n_audio_head = 8
171
+ whisper_model_load: n_audio_layer = 6
172
+ whisper_model_load: n_text_ctx = 448
173
+ whisper_model_load: n_text_state = 512
174
+ whisper_model_load: n_text_head = 8
175
+ whisper_model_load: n_text_layer = 6
176
+ whisper_model_load: n_mels = 80
177
+ whisper_model_load: f16 = 1
178
+ whisper_model_load: type = 2
179
+ whisper_model_load: mem required = 215.00 MB (+ 6.00 MB per decoder)
180
+ whisper_model_load: kv self size = 5.25 MB
181
+ whisper_model_load: kv cross size = 17.58 MB
182
+ whisper_model_load: adding 1607 extra tokens
183
+ whisper_model_load: model ctx = 140.60 MB
184
+ whisper_model_load: model size = 140.54 MB
185
+
186
+ system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |
187
+
188
+ main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
189
+
190
+
191
+ [00:00:00.000 --> 00:00:11.000] And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
192
+
193
+
194
+ whisper_print_timings: fallbacks = 0 p / 0 h
195
+ whisper_print_timings: load time = 113.81 ms
196
+ whisper_print_timings: mel time = 15.40 ms
197
+ whisper_print_timings: sample time = 11.58 ms / 27 runs ( 0.43 ms per run)
198
+ whisper_print_timings: encode time = 266.60 ms / 1 runs ( 266.60 ms per run)
199
+ whisper_print_timings: decode time = 66.11 ms / 27 runs ( 2.45 ms per run)
200
+ whisper_print_timings: total time = 476.31 ms
201
+ ```
202
+
203
+ The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.
204
+
205
+ For detailed usage instructions, run: `./main -h`
206
+
207
+ Note that the [main](examples/main) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
208
+ For example, you can use `ffmpeg` like this:
209
+
210
+ ```bash
211
+ ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
212
+ ```
213
+
214
+ ## More audio samples
215
+
216
+ If you want some extra audio samples to play with, simply run:
217
+
218
+ ```
219
+ make samples
220
+ ```
221
+
222
+ This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`.
223
+
224
+ You can download and run the other models as follows:
225
+
226
+ ```
227
+ make tiny.en
228
+ make tiny
229
+ make base.en
230
+ make base
231
+ make small.en
232
+ make small
233
+ make medium.en
234
+ make medium
235
+ make large-v1
236
+ make large-v2
237
+ make large-v3
238
+ ```
239
+
240
+ ## Memory usage
241
+
242
+ | Model | Disk | Mem |
243
+ | ------ | ------- | ------- |
244
+ | tiny | 75 MiB | ~273 MB |
245
+ | base | 142 MiB | ~388 MB |
246
+ | small | 466 MiB | ~852 MB |
247
+ | medium | 1.5 GiB | ~2.1 GB |
248
+ | large | 2.9 GiB | ~3.9 GB |
249
+
250
+ ## Quantization
251
+
252
+ `whisper.cpp` supports integer quantization of the Whisper `ggml` models.
253
+ Quantized models require less memory and disk space and depending on the hardware can be processed more efficiently.
254
+
255
+ Here are the steps for creating and using a quantized model:
256
+
257
+ ```bash
258
+ # quantize a model with Q5_0 method
259
+ make quantize
260
+ ./quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0
261
+
262
+ # run the examples as usual, specifying the quantized model file
263
+ ./main -m models/ggml-base.en-q5_0.bin ./samples/gb0.wav
264
+ ```
265
+
266
+ ## Core ML support
267
+
268
+ On Apple Silicon devices, the Encoder inference can be executed on the Apple Neural Engine (ANE) via Core ML. This can result in significant
269
+ speed-up - more than x3 faster compared with CPU-only execution. Here are the instructions for generating a Core ML model and using it with `whisper.cpp`:
270
+
271
+ - Install Python dependencies needed for the creation of the Core ML model:
272
+
273
+ ```bash
274
+ pip install ane_transformers
275
+ pip install openai-whisper
276
+ pip install coremltools
277
+ ```
278
+
279
+ - To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools.
280
+ - Python 3.10 is recommended.
281
+ - MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination.
282
+ - [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step:
283
+ - To create an environment, use: `conda create -n py310-whisper python=3.10 -y`
284
+ - To activate the environment, use: `conda activate py310-whisper`
285
+
286
+ - Generate a Core ML model. For example, to generate a `base.en` model, use:
287
+
288
+ ```bash
289
+ ./models/generate-coreml-model.sh base.en
290
+ ```
291
+
292
+ This will generate the folder `models/ggml-base.en-encoder.mlmodelc`
293
+
294
+ - Build `whisper.cpp` with Core ML support:
295
+
296
+ ```bash
297
+ # using Makefile
298
+ make clean
299
+ WHISPER_COREML=1 make -j
300
+
301
+ # using CMake
302
+ cmake -B build -DWHISPER_COREML=1
303
+ cmake --build build -j --config Release
304
+ ```
305
+
306
+ - Run the examples as usual. For example:
307
+
308
+ ```text
309
+ $ ./main -m models/ggml-base.en.bin -f samples/jfk.wav
310
+
311
+ ...
312
+
313
+ whisper_init_state: loading Core ML model from 'models/ggml-base.en-encoder.mlmodelc'
314
+ whisper_init_state: first run on a device may take a while ...
315
+ whisper_init_state: Core ML model loaded
316
+
317
+ system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | COREML = 1 |
318
+
319
+ ...
320
+ ```
321
+
322
+ The first run on a device is slow, since the ANE service compiles the Core ML model to some device-specific format.
323
+ Next runs are faster.
324
+
325
+ For more information about the Core ML implementation please refer to PR [#566](https://github.com/ggerganov/whisper.cpp/pull/566).
326
+
327
+ ## OpenVINO support
328
+
329
+ On platforms that support [OpenVINO](https://github.com/openvinotoolkit/openvino), the Encoder inference can be executed
330
+ on OpenVINO-supported devices including x86 CPUs and Intel GPUs (integrated & discrete).
331
+
332
+ This can result in significant speedup in encoder performance. Here are the instructions for generating the OpenVINO model and using it with `whisper.cpp`:
333
+
334
+ - First, setup python virtual env. and install python dependencies. Python 3.10 is recommended.
335
+
336
+ Windows:
337
+
338
+ ```powershell
339
+ cd models
340
+ python -m venv openvino_conv_env
341
+ openvino_conv_env\Scripts\activate
342
+ python -m pip install --upgrade pip
343
+ pip install -r requirements-openvino.txt
344
+ ```
345
+
346
+ Linux and macOS:
347
+
348
+ ```bash
349
+ cd models
350
+ python3 -m venv openvino_conv_env
351
+ source openvino_conv_env/bin/activate
352
+ python -m pip install --upgrade pip
353
+ pip install -r requirements-openvino.txt
354
+ ```
355
+
356
+ - Generate an OpenVINO encoder model. For example, to generate a `base.en` model, use:
357
+
358
+ ```
359
+ python convert-whisper-to-openvino.py --model base.en
360
+ ```
361
+
362
+ This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as `ggml` models, as that
363
+ is the default location that the OpenVINO extension will search at runtime.
364
+
365
+ - Build `whisper.cpp` with OpenVINO support:
366
+
367
+ Download OpenVINO package from [release page](https://github.com/openvinotoolkit/openvino/releases). The recommended version to use is [2023.0.0](https://github.com/openvinotoolkit/openvino/releases/tag/2023.0.0).
368
+
369
+ After downloading & extracting package onto your development system, set up required environment by sourcing setupvars script. For example:
370
+
371
+ Linux:
372
+
373
+ ```bash
374
+ source /path/to/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/setupvars.sh
375
+ ```
376
+
377
+ Windows (cmd):
378
+
379
+ ```powershell
380
+ C:\Path\To\w_openvino_toolkit_windows_2023.0.0.10926.b4452d56304_x86_64\setupvars.bat
381
+ ```
382
+
383
+ And then build the project using cmake:
384
+
385
+ ```bash
386
+ cmake -B build -DWHISPER_OPENVINO=1
387
+ cmake --build build -j --config Release
388
+ ```
389
+
390
+ - Run the examples as usual. For example:
391
+
392
+ ```text
393
+ $ ./main -m models/ggml-base.en.bin -f samples/jfk.wav
394
+
395
+ ...
396
+
397
+ whisper_ctx_init_openvino_encoder: loading OpenVINO model from 'models/ggml-base.en-encoder-openvino.xml'
398
+ whisper_ctx_init_openvino_encoder: first run on a device may take a while ...
399
+ whisper_openvino_init: path_model = models/ggml-base.en-encoder-openvino.xml, device = GPU, cache_dir = models/ggml-base.en-encoder-openvino-cache
400
+ whisper_ctx_init_openvino_encoder: OpenVINO model loaded
401
+
402
+ system_info: n_threads = 4 / 8 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | OPENVINO = 1 |
403
+
404
+ ...
405
+ ```
406
+
407
+ The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get
408
+ cached for the next run.
409
+
410
+ For more information about the Core ML implementation please refer to PR [#1037](https://github.com/ggerganov/whisper.cpp/pull/1037).
411
+
412
+ ## NVIDIA GPU support
413
+
414
+ With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels.
415
+ First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads
416
+
417
+ Now build `whisper.cpp` with CUDA support:
418
+
419
+ ```
420
+ make clean
421
+ GGML_CUDA=1 make -j
422
+ ```
423
+
424
+ ## BLAS CPU support via OpenBLAS
425
+
426
+ Encoder processing can be accelerated on the CPU via OpenBLAS.
427
+ First, make sure you have installed `openblas`: https://www.openblas.net/
428
+
429
+ Now build `whisper.cpp` with OpenBLAS support:
430
+
431
+ ```
432
+ make clean
433
+ GGML_OPENBLAS=1 make -j
434
+ ```
435
+
436
+ ## BLAS CPU support via Intel MKL
437
+
438
+ Encoder processing can be accelerated on the CPU via the BLAS compatible interface of Intel's Math Kernel Library.
439
+ First, make sure you have installed Intel's MKL runtime and development packages: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-download.html
440
+
441
+ Now build `whisper.cpp` with Intel MKL BLAS support:
442
+
443
+ ```
444
+ source /opt/intel/oneapi/setvars.sh
445
+ mkdir build
446
+ cd build
447
+ cmake -DWHISPER_MKL=ON ..
448
+ WHISPER_MKL=1 make -j
449
+ ```
450
+
451
+ ## Docker
452
+
453
+ ### Prerequisites
454
+
455
+ - Docker must be installed and running on your system.
456
+ - Create a folder to store big models & intermediate files (ex. /whisper/models)
457
+
458
+ ### Images
459
+
460
+ We have two Docker images available for this project:
461
+
462
+ 1. `ghcr.io/ggerganov/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
463
+ 2. `ghcr.io/ggerganov/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
464
+
465
+ ### Usage
466
+
467
+ ```shell
468
+ # download model and persist it in a local folder
469
+ docker run -it --rm \
470
+ -v path/to/models:/models \
471
+ whisper.cpp:main "./models/download-ggml-model.sh base /models"
472
+ # transcribe an audio file
473
+ docker run -it --rm \
474
+ -v path/to/models:/models \
475
+ -v path/to/audios:/audios \
476
+ whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
477
+ # transcribe an audio file in samples folder
478
+ docker run -it --rm \
479
+ -v path/to/models:/models \
480
+ whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
481
+ ```
482
+
483
+ ## Installing with Conan
484
+
485
+ You can install pre-built binaries for whisper.cpp or build it from source using [Conan](https://conan.io/). Use the following command:
486
+
487
+ ```
488
+ conan install --requires="whisper-cpp/[*]" --build=missing
489
+ ```
490
+
491
+ For detailed instructions on how to use Conan, please refer to the [Conan documentation](https://docs.conan.io/2/).
492
+
493
+ ## Limitations
494
+
495
+ - Inference only
496
+
497
+ ## Another example
498
+
499
+ Here is another example of transcribing a [3:24 min speech](https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg)
500
+ in about half a minute on a MacBook M1 Pro, using `medium.en` model:
501
+
502
+ <details>
503
+ <summary>Expand to see the result</summary>
504
+
505
+ ```text
506
+ $ ./main -m models/ggml-medium.en.bin -f samples/gb1.wav -t 8
507
+
508
+ whisper_init_from_file: loading model from 'models/ggml-medium.en.bin'
509
+ whisper_model_load: loading model
510
+ whisper_model_load: n_vocab = 51864
511
+ whisper_model_load: n_audio_ctx = 1500
512
+ whisper_model_load: n_audio_state = 1024
513
+ whisper_model_load: n_audio_head = 16
514
+ whisper_model_load: n_audio_layer = 24
515
+ whisper_model_load: n_text_ctx = 448
516
+ whisper_model_load: n_text_state = 1024
517
+ whisper_model_load: n_text_head = 16
518
+ whisper_model_load: n_text_layer = 24
519
+ whisper_model_load: n_mels = 80
520
+ whisper_model_load: f16 = 1
521
+ whisper_model_load: type = 4
522
+ whisper_model_load: mem required = 1720.00 MB (+ 43.00 MB per decoder)
523
+ whisper_model_load: kv self size = 42.00 MB
524
+ whisper_model_load: kv cross size = 140.62 MB
525
+ whisper_model_load: adding 1607 extra tokens
526
+ whisper_model_load: model ctx = 1462.35 MB
527
+ whisper_model_load: model size = 1462.12 MB
528
+
529
+ system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |
530
+
531
+ main: processing 'samples/gb1.wav' (3179750 samples, 198.7 sec), 8 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
532
+
533
+
534
+ [00:00:00.000 --> 00:00:08.000] My fellow Americans, this day has brought terrible news and great sadness to our country.
535
+ [00:00:08.000 --> 00:00:17.000] At nine o'clock this morning, Mission Control in Houston lost contact with our Space Shuttle Columbia.
536
+ [00:00:17.000 --> 00:00:23.000] A short time later, debris was seen falling from the skies above Texas.
537
+ [00:00:23.000 --> 00:00:29.000] The Columbia's lost. There are no survivors.
538
+ [00:00:29.000 --> 00:00:32.000] On board was a crew of seven.
539
+ [00:00:32.000 --> 00:00:39.000] Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark,
540
+ [00:00:39.000 --> 00:00:48.000] Captain David Brown, Commander William McCool, Dr. Kultna Shavla, and Ilan Ramon,
541
+ [00:00:48.000 --> 00:00:52.000] a colonel in the Israeli Air Force.
542
+ [00:00:52.000 --> 00:00:58.000] These men and women assumed great risk in the service to all humanity.
543
+ [00:00:58.000 --> 00:01:03.000] In an age when space flight has come to seem almost routine,
544
+ [00:01:03.000 --> 00:01:07.000] it is easy to overlook the dangers of travel by rocket
545
+ [00:01:07.000 --> 00:01:12.000] and the difficulties of navigating the fierce outer atmosphere of the Earth.
546
+ [00:01:12.000 --> 00:01:18.000] These astronauts knew the dangers, and they faced them willingly,
547
+ [00:01:18.000 --> 00:01:23.000] knowing they had a high and noble purpose in life.
548
+ [00:01:23.000 --> 00:01:31.000] Because of their courage and daring and idealism, we will miss them all the more.
549
+ [00:01:31.000 --> 00:01:36.000] All Americans today are thinking as well of the families of these men and women
550
+ [00:01:36.000 --> 00:01:40.000] who have been given this sudden shock and grief.
551
+ [00:01:40.000 --> 00:01:45.000] You're not alone. Our entire nation grieves with you,
552
+ [00:01:45.000 --> 00:01:52.000] and those you love will always have the respect and gratitude of this country.
553
+ [00:01:52.000 --> 00:01:56.000] The cause in which they died will continue.
554
+ [00:01:56.000 --> 00:02:04.000] Mankind is led into the darkness beyond our world by the inspiration of discovery
555
+ [00:02:04.000 --> 00:02:11.000] and the longing to understand. Our journey into space will go on.
556
+ [00:02:11.000 --> 00:02:16.000] In the skies today, we saw destruction and tragedy.
557
+ [00:02:16.000 --> 00:02:22.000] Yet farther than we can see, there is comfort and hope.
558
+ [00:02:22.000 --> 00:02:29.000] In the words of the prophet Isaiah, "Lift your eyes and look to the heavens
559
+ [00:02:29.000 --> 00:02:35.000] who created all these. He who brings out the starry hosts one by one
560
+ [00:02:35.000 --> 00:02:39.000] and calls them each by name."
561
+ [00:02:39.000 --> 00:02:46.000] Because of His great power and mighty strength, not one of them is missing.
562
+ [00:02:46.000 --> 00:02:55.000] The same Creator who names the stars also knows the names of the seven souls we mourn today.
563
+ [00:02:55.000 --> 00:03:01.000] The crew of the shuttle Columbia did not return safely to earth,
564
+ [00:03:01.000 --> 00:03:05.000] yet we can pray that all are safely home.
565
+ [00:03:05.000 --> 00:03:13.000] May God bless the grieving families, and may God continue to bless America.
566
+ [00:03:13.000 --> 00:03:19.000] [Silence]
567
+
568
+
569
+ whisper_print_timings: fallbacks = 1 p / 0 h
570
+ whisper_print_timings: load time = 569.03 ms
571
+ whisper_print_timings: mel time = 146.85 ms
572
+ whisper_print_timings: sample time = 238.66 ms / 553 runs ( 0.43 ms per run)
573
+ whisper_print_timings: encode time = 18665.10 ms / 9 runs ( 2073.90 ms per run)
574
+ whisper_print_timings: decode time = 13090.93 ms / 549 runs ( 23.85 ms per run)
575
+ whisper_print_timings: total time = 32733.52 ms
576
+ ```
577
+
578
+ </details>
579
+
580
+ ## Real-time audio input example
581
+
582
+ This is a naive example of performing real-time inference on audio from your microphone.
583
+ The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously.
584
+ More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
585
+
586
+ ```bash
587
+ make stream
588
+ ./stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
589
+ ```
590
+
591
+ https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
592
+
593
+ ## Confidence color-coding
594
+
595
+ Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy
596
+ to highlight words with high or low confidence:
597
+
598
+ ```bash
599
+ ./main -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors
600
+ ```
601
+
602
+ <img width="965" alt="image" src="https://user-images.githubusercontent.com/1991296/197356445-311c8643-9397-4e5e-b46e-0b4b4daa2530.png">
603
+
604
+ ## Controlling the length of the generated text segments (experimental)
605
+
606
+ For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`:
607
+
608
+ ```text
609
+ $ ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
610
+
611
+ whisper_model_load: loading model from './models/ggml-base.en.bin'
612
+ ...
613
+ system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
614
+
615
+ main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
616
+
617
+ [00:00:00.000 --> 00:00:00.850] And so my
618
+ [00:00:00.850 --> 00:00:01.590] fellow
619
+ [00:00:01.590 --> 00:00:04.140] Americans, ask
620
+ [00:00:04.140 --> 00:00:05.660] not what your
621
+ [00:00:05.660 --> 00:00:06.840] country can do
622
+ [00:00:06.840 --> 00:00:08.430] for you, ask
623
+ [00:00:08.430 --> 00:00:09.440] what you can do
624
+ [00:00:09.440 --> 00:00:10.020] for your
625
+ [00:00:10.020 --> 00:00:11.000] country.
626
+ ```
627
+
628
+ ## Word-level timestamp (experimental)
629
+
630
+ The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`:
631
+
632
+ ```text
633
+ $ ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
634
+
635
+ whisper_model_load: loading model from './models/ggml-base.en.bin'
636
+ ...
637
+ system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
638
+
639
+ main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
640
+
641
+ [00:00:00.000 --> 00:00:00.320]
642
+ [00:00:00.320 --> 00:00:00.370] And
643
+ [00:00:00.370 --> 00:00:00.690] so
644
+ [00:00:00.690 --> 00:00:00.850] my
645
+ [00:00:00.850 --> 00:00:01.590] fellow
646
+ [00:00:01.590 --> 00:00:02.850] Americans
647
+ [00:00:02.850 --> 00:00:03.300] ,
648
+ [00:00:03.300 --> 00:00:04.140] ask
649
+ [00:00:04.140 --> 00:00:04.990] not
650
+ [00:00:04.990 --> 00:00:05.410] what
651
+ [00:00:05.410 --> 00:00:05.660] your
652
+ [00:00:05.660 --> 00:00:06.260] country
653
+ [00:00:06.260 --> 00:00:06.600] can
654
+ [00:00:06.600 --> 00:00:06.840] do
655
+ [00:00:06.840 --> 00:00:07.010] for
656
+ [00:00:07.010 --> 00:00:08.170] you
657
+ [00:00:08.170 --> 00:00:08.190] ,
658
+ [00:00:08.190 --> 00:00:08.430] ask
659
+ [00:00:08.430 --> 00:00:08.910] what
660
+ [00:00:08.910 --> 00:00:09.040] you
661
+ [00:00:09.040 --> 00:00:09.320] can
662
+ [00:00:09.320 --> 00:00:09.440] do
663
+ [00:00:09.440 --> 00:00:09.760] for
664
+ [00:00:09.760 --> 00:00:10.020] your
665
+ [00:00:10.020 --> 00:00:10.510] country
666
+ [00:00:10.510 --> 00:00:11.000] .
667
+ ```
668
+
669
+ ## Speaker segmentation via tinydiarize (experimental)
670
+
671
+ More information about this approach is available here: https://github.com/ggerganov/whisper.cpp/pull/1058
672
+
673
+ Sample usage:
674
+
675
+ ```py
676
+ # download a tinydiarize compatible model
677
+ ./models/download-ggml-model.sh small.en-tdrz
678
+
679
+ # run as usual, adding the "-tdrz" command-line argument
680
+ ./main -f ./samples/a13.wav -m ./models/ggml-small.en-tdrz.bin -tdrz
681
+ ...
682
+ main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, tdrz = 1, timestamps = 1 ...
683
+ ...
684
+ [00:00:00.000 --> 00:00:03.800] Okay Houston, we've had a problem here. [SPEAKER_TURN]
685
+ [00:00:03.800 --> 00:00:06.200] This is Houston. Say again please. [SPEAKER_TURN]
686
+ [00:00:06.200 --> 00:00:08.260] Uh Houston we've had a problem.
687
+ [00:00:08.260 --> 00:00:11.320] We've had a main beam up on a volt. [SPEAKER_TURN]
688
+ [00:00:11.320 --> 00:00:13.820] Roger main beam interval. [SPEAKER_TURN]
689
+ [00:00:13.820 --> 00:00:15.100] Uh uh [SPEAKER_TURN]
690
+ [00:00:15.100 --> 00:00:18.020] So okay stand, by thirteen we're looking at it. [SPEAKER_TURN]
691
+ [00:00:18.020 --> 00:00:25.740] Okay uh right now uh Houston the uh voltage is uh is looking good um.
692
+ [00:00:27.620 --> 00:00:29.940] And we had a a pretty large bank or so.
693
+ ```
694
+
695
+ ## Karaoke-style movie generation (experimental)
696
+
697
+ The [main](examples/main) example provides support for output of karaoke-style movies, where the
698
+ currently pronounced word is highlighted. Use the `-wts` argument and run the generated bash script.
699
+ This requires to have `ffmpeg` installed.
700
+
701
+ Here are a few _"typical"_ examples:
702
+
703
+ ```bash
704
+ ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
705
+ source ./samples/jfk.wav.wts
706
+ ffplay ./samples/jfk.wav.mp4
707
+ ```
708
+
709
+ https://user-images.githubusercontent.com/1991296/199337465-dbee4b5e-9aeb-48a3-b1c6-323ac4db5b2c.mp4
710
+
711
  ---
712
+
713
+ ```bash
714
+ ./main -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
715
+ source ./samples/mm0.wav.wts
716
+ ffplay ./samples/mm0.wav.mp4
717
+ ```
718
+
719
+ https://user-images.githubusercontent.com/1991296/199337504-cc8fd233-0cb7-4920-95f9-4227de3570aa.mp4
720
+
721
+ ---
722
+
723
+ ```bash
724
+ ./main -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
725
+ source ./samples/gb0.wav.wts
726
+ ffplay ./samples/gb0.wav.mp4
727
+ ```
728
+
729
+ https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a0cd-f28a317987ba.mp4
730
+
731
  ---
732
 
733
+ ## Video comparison of different models
734
+
735
+ Use the [scripts/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format:
736
+
737
+ ```bash
738
+ ./scripts/bench-wts.sh samples/jfk.wav
739
+ ffplay ./samples/jfk.wav.all.mp4
740
+ ```
741
+
742
+ https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8c3b-eb9f9c39d6fc.mp4
743
+
744
+ ---
745
+
746
+ ## Benchmarks
747
+
748
+ In order to have an objective comparison of the performance of the inference across different system configurations,
749
+ use the [bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
750
+ took to execute it. The results are summarized in the following Github issue:
751
+
752
+ [Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89)
753
+
754
+ Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py).
755
+
756
+ You can run it with the following command, by default it will run against any standard model in the models folder.
757
+
758
+ ```bash
759
+ python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2
760
+ ```
761
+
762
+ It is written in python with the intention of being easy to modify and extend for your benchmarking use case.
763
+
764
+ It outputs a csv file with the results of the benchmarking.
765
+
766
+ ## `ggml` format
767
+
768
+ The original models are converted to a custom binary format. This allows to pack everything needed into a single file:
769
+
770
+ - model parameters
771
+ - mel filters
772
+ - vocabulary
773
+ - weights
774
+
775
+ You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script
776
+ or manually from here:
777
+
778
+ - https://huggingface.co/ggerganov/whisper.cpp
779
+ - https://ggml.ggerganov.com
780
+
781
+ For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md).
782
+
783
+ ## [Bindings](https://github.com/ggerganov/whisper.cpp/discussions/categories/bindings)
784
+
785
+ - [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310)
786
+ - [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309)
787
+ - React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn)
788
+ - [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312)
789
+ - [x] Java:
790
+ - [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni)
791
+ - [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggerganov/whisper.cpp/discussions/507)
792
+ - [x] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313)
793
+ - [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper)
794
+ - [x] .NET: | [#422](https://github.com/ggerganov/whisper.cpp/discussions/422)
795
+ - [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net)
796
+ - [NickDarvey/whisper](https://github.com/NickDarvey/whisper)
797
+ - [x] Python: | [#9](https://github.com/ggerganov/whisper.cpp/issues/9)
798
+ - [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
799
+ - [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp)
800
+ - [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
801
+ - [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)
802
+ - [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity)
803
+
804
+ ## Examples
805
+
806
+ There are various examples of using the library for different projects in the [examples](examples) folder.
807
+ Some of the examples are even ported to run in the browser using WebAssembly. Check them out!
808
+
809
+ | Example | Web | Description |
810
+ | --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
811
+ | [main](examples/main) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
812
+ | [bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
813
+ | [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
814
+ | [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
815
+ | [wchess](examples/wchess) | [wchess.wasm](examples/wchess) | Voice-controlled chess |
816
+ | [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot |
817
+ | [talk-llama](examples/talk-llama) | | Talk with a LLaMA bot |
818
+ | [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
819
+ | [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp |
820
+ | [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp |
821
+ | [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
822
+ | [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
823
+ | [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
824
+ | [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
825
+ | [server](examples/server) | | HTTP transcription server with OAI-like API |
826
+
827
+ ## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions)
828
+
829
+ If you have any kind of feedback about this project feel free to use the Discussions section and open a new topic.
830
+ You can use the [Show and tell](https://github.com/ggerganov/whisper.cpp/discussions/categories/show-and-tell) category
831
+ to share your own projects that use `whisper.cpp`. If you have a question, make sure to check the
832
+ [Frequently asked questions (#126)](https://github.com/ggerganov/whisper.cpp/discussions/126) discussion.
README_sycl.md ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # whisper.cpp for SYCL
2
+
3
+ [Background](#background)
4
+
5
+ [OS](#os)
6
+
7
+ [Intel GPU](#intel-gpu)
8
+
9
+ [Linux](#linux)
10
+
11
+ [Environment Variable](#environment-variable)
12
+
13
+ [Known Issue](#known-issue)
14
+
15
+ [Todo](#todo)
16
+
17
+ ## Background
18
+
19
+ SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators�such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17.
20
+
21
+ oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms.
22
+
23
+ Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs.
24
+
25
+ To avoid re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel� DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL.
26
+
27
+ The whisper.cpp for SYCL is used to support Intel GPUs.
28
+
29
+ For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build).
30
+
31
+ ## OS
32
+
33
+ |OS|Status|Verified|
34
+ |-|-|-|
35
+ |Linux|Support|Ubuntu 22.04|
36
+ |Windows|Ongoing| |
37
+
38
+
39
+ ## Intel GPU
40
+
41
+ |Intel GPU| Status | Verified Model|
42
+ |-|-|-|
43
+ |Intel Data Center Max Series| Support| Max 1550|
44
+ |Intel Data Center Flex Series| Support| Flex 170|
45
+ |Intel Arc Series| Support| Arc 770|
46
+ |Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake|
47
+ |Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7|
48
+
49
+
50
+ ## Linux
51
+
52
+ ### Setup Environment
53
+
54
+ 1. Install Intel GPU driver.
55
+
56
+ a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html).
57
+
58
+ Note: for iGPU, please install the client GPU driver.
59
+
60
+ b. Add user to group: video, render.
61
+
62
+ ```
63
+ sudo usermod -aG render username
64
+ sudo usermod -aG video username
65
+ ```
66
+
67
+ Note: re-login to enable it.
68
+
69
+ c. Check
70
+
71
+ ```
72
+ sudo apt install clinfo
73
+ sudo clinfo -l
74
+ ```
75
+
76
+ Output (example):
77
+
78
+ ```
79
+ Platform #0: Intel(R) OpenCL Graphics
80
+ `-- Device #0: Intel(R) Arc(TM) A770 Graphics
81
+
82
+
83
+ Platform #0: Intel(R) OpenCL HD Graphics
84
+ `-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]
85
+ ```
86
+
87
+ 2. Install Intel� oneAPI Base toolkit.
88
+
89
+
90
+ a. Please follow the procedure in [Get the Intel� oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).
91
+
92
+ Recommend to install to default folder: **/opt/intel/oneapi**.
93
+
94
+ Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder.
95
+
96
+ b. Check
97
+
98
+ ```
99
+ source /opt/intel/oneapi/setvars.sh
100
+
101
+ sycl-ls
102
+ ```
103
+
104
+ There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.
105
+
106
+ Output (example):
107
+ ```
108
+ [opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]
109
+ [opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
110
+ [opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50]
111
+ [ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
112
+
113
+ ```
114
+
115
+ 2. Build locally:
116
+
117
+ ```
118
+ mkdir -p build
119
+ cd build
120
+ source /opt/intel/oneapi/setvars.sh
121
+
122
+ #for FP16
123
+ #cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON
124
+
125
+ #for FP32
126
+ cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
127
+
128
+ #build example/main only
129
+ #cmake --build . --config Release --target main
130
+
131
+ #build all binary
132
+ cmake --build . --config Release -v
133
+
134
+ ```
135
+
136
+ or
137
+
138
+ ```
139
+ ./examples/sycl/build.sh
140
+ ```
141
+
142
+ Note:
143
+
144
+ - By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
145
+
146
+ ### Run
147
+
148
+ 1. Put model file to folder **models**
149
+
150
+ 2. Enable oneAPI running environment
151
+
152
+ ```
153
+ source /opt/intel/oneapi/setvars.sh
154
+ ```
155
+
156
+ 3. List device ID
157
+
158
+ Run without parameter:
159
+
160
+ ```
161
+ ./build/bin/ls-sycl-device
162
+
163
+ or
164
+
165
+ ./build/bin/main
166
+ ```
167
+
168
+ Check the ID in startup log, like:
169
+
170
+ ```
171
+ found 4 SYCL devices:
172
+ Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
173
+ max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
174
+ Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
175
+ max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
176
+ Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
177
+ max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
178
+ Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
179
+ max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
180
+
181
+ ```
182
+
183
+ |Attribute|Note|
184
+ |-|-|
185
+ |compute capability 1.3|Level-zero running time, recommended |
186
+ |compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
187
+
188
+ 4. Set device ID and execute whisper.cpp
189
+
190
+ Set device ID = 0 by **GGML_SYCL_DEVICE=0**
191
+
192
+ ```
193
+ GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
194
+ ```
195
+ or run by script:
196
+
197
+ ```
198
+ ./examples/sycl/run_whisper.sh
199
+ ```
200
+
201
+
202
+
203
+ 5. Check the device ID in output
204
+
205
+ Like:
206
+ ```
207
+ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
208
+ ```
209
+
210
+
211
+ ## Environment Variable
212
+
213
+ #### Build
214
+
215
+ |Name|Value|Function|
216
+ |-|-|-|
217
+ |WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, WHISPER_SYCL=ON is mandatory.|
218
+ |WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.|
219
+ |CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
220
+ |CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|
221
+
222
+ #### Running
223
+
224
+
225
+ |Name|Value|Function|
226
+ |-|-|-|
227
+ |GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
228
+ |GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
229
+
230
+ ## Known Issue
231
+
232
+ - Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
233
+
234
+ Miss to enable oneAPI running environment.
235
+
236
+ Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
237
+
238
+
239
+ - Hang during startup
240
+
241
+ llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
242
+
243
+ Solution: add **--no-mmap**.
244
+
245
+ ## Todo
246
+
247
+ - Support to build in Windows.
248
+
249
+ - Support multiple cards.
bindings/CMakeLists.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ if (EMSCRIPTEN)
2
+ add_subdirectory(javascript)
3
+
4
+ add_custom_command(
5
+ OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/javascript/publish.log
6
+ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/whisper.js
7
+ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/libwhisper.worker.js
8
+ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/package.json
9
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/javascript
10
+ COMMAND npm publish
11
+ COMMAND touch publish.log
12
+ COMMENT "Publishing npm module v${PROJECT_VERSION}"
13
+ VERBATIM
14
+ )
15
+
16
+ add_custom_target(publish-npm
17
+ DEPENDS javascript/publish.log
18
+ )
19
+ endif()
bindings/go/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ build
2
+ models
bindings/go/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 David Thorpe
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
bindings/go/Makefile ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ifndef UNAME_S
2
+ UNAME_S := $(shell uname -s)
3
+ endif
4
+
5
+ ifndef UNAME_P
6
+ UNAME_P := $(shell uname -p)
7
+ endif
8
+
9
+ ifndef UNAME_M
10
+ UNAME_M := $(shell uname -m)
11
+ endif
12
+
13
+ GGML_METAL_PATH_RESOURCES := $(abspath ../..)
14
+ BUILD_DIR := build
15
+ MODELS_DIR := models
16
+ EXAMPLES_DIR := $(wildcard examples/*)
17
+ INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
18
+ LIBRARY_PATH := $(abspath ../..)
19
+
20
+ ifeq ($(UNAME_S),Darwin)
21
+ EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit
22
+ endif
23
+
24
+ all: clean whisper examples
25
+
26
+ whisper: mkdir
27
+ @echo Build whisper
28
+ @${MAKE} -C ../.. libwhisper.a
29
+
30
+ test: model-small whisper modtidy
31
+ ifeq ($(UNAME_S),Darwin)
32
+ @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v .
33
+ @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v ./pkg/whisper/...
34
+ else
35
+ @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v .
36
+ @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v ./pkg/whisper/...
37
+ endif
38
+
39
+ examples: $(EXAMPLES_DIR)
40
+
41
+ model-small: mkdir examples/go-model-download
42
+ @${BUILD_DIR}/go-model-download -out models ggml-small.en.bin
43
+
44
+ $(EXAMPLES_DIR): mkdir whisper modtidy
45
+ @echo Build example $(notdir $@)
46
+ ifeq ($(UNAME_S),Darwin)
47
+ @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go build ${BUILD_FLAGS} -ldflags "-extldflags '$(EXT_LDFLAGS)'" -o ${BUILD_DIR}/$(notdir $@) ./$@
48
+ else
49
+ @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
50
+ endif
51
+
52
+ mkdir:
53
+ @echo Mkdir ${BUILD_DIR}
54
+ @install -d ${BUILD_DIR}
55
+ @echo Mkdir ${MODELS_DIR}
56
+ @install -d ${MODELS_DIR}
57
+
58
+ modtidy:
59
+ @go mod tidy
60
+
61
+ clean:
62
+ @echo Clean
63
+ @rm -fr $(BUILD_DIR)
64
+ @go clean
bindings/go/README.md ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Go bindings for Whisper
2
+
3
+ This package provides Go bindings for whisper.cpp. They have been tested on:
4
+
5
+ * Darwin (OS X) 12.6 on x64_64
6
+ * Debian Linux on arm64
7
+ * Fedora Linux on x86_64
8
+
9
+ The "low level" bindings are in the `bindings/go` directory and there is a more
10
+ Go-style package in the `bindings/go/pkg/whisper` directory. The most simple usage
11
+ is as follows:
12
+
13
+ ```go
14
+ import (
15
+ "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
16
+ )
17
+
18
+ func main() {
19
+ var modelpath string // Path to the model
20
+ var samples []float32 // Samples to process
21
+
22
+ // Load the model
23
+ model, err := whisper.New(modelpath)
24
+ if err != nil {
25
+ panic(err)
26
+ }
27
+ defer model.Close()
28
+
29
+ // Process samples
30
+ context, err := model.NewContext()
31
+ if err != nil {
32
+ panic(err)
33
+ }
34
+ if err := context.Process(samples, nil, nil); err != nil {
35
+ return err
36
+ }
37
+
38
+ // Print out the results
39
+ for {
40
+ segment, err := context.NextSegment()
41
+ if err != nil {
42
+ break
43
+ }
44
+ fmt.Printf("[%6s->%6s] %s\n", segment.Start, segment.End, segment.Text)
45
+ }
46
+ }
47
+ ```
48
+
49
+ ## Building & Testing
50
+
51
+ In order to build, you need to have the Go compiler installed. You can get it from [here](https://golang.org/dl/). Run the tests with:
52
+
53
+ ```bash
54
+ git clone https://github.com/ggerganov/whisper.cpp.git
55
+ cd whisper.cpp/bindings/go
56
+ make test
57
+ ```
58
+
59
+ This will compile a static `libwhisper.a` in a `build` folder, download a model file, then run the tests. To build the examples:
60
+
61
+ ```bash
62
+ make examples
63
+ ```
64
+
65
+ The examples are placed in the `build` directory. Once built, you can download all the models with the following command:
66
+
67
+ ```bash
68
+ ./build/go-model-download -out models
69
+ ```
70
+
71
+ And you can then test a model against samples with the following command:
72
+
73
+ ```bash
74
+ ./build/go-whisper -model models/ggml-tiny.en.bin samples/jfk.wav
75
+ ```
76
+
77
+ ## Using the bindings
78
+
79
+ To use the bindings in your own software,
80
+
81
+ 1. Import `github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper` (or `github.com/ggerganov/whisper.cpp/bindings/go` into your package;
82
+ 2. Compile `libwhisper.a` (you can use `make whisper` in the `bindings/go` directory);
83
+ 3. Link your go binary against whisper by setting the environment variables `C_INCLUDE_PATH` and `LIBRARY_PATH`
84
+ to point to the `whisper.h` file directory and `libwhisper.a` file directory respectively.
85
+
86
+ Look at the `Makefile` in the `bindings/go` directory for an example.
87
+
88
+ The API Documentation:
89
+
90
+ * https://pkg.go.dev/github.com/ggerganov/whisper.cpp/bindings/go
91
+ * https://pkg.go.dev/github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper
92
+
93
+ Getting help:
94
+
95
+ * Follow the discussion for the go bindings [here](https://github.com/ggerganov/whisper.cpp/discussions/312)
96
+
97
+ ## License
98
+
99
+ The license for the Go bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
100
+
bindings/go/doc.go ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ /*
2
+ github.com/ggerganov/whisper.cpp/bindings/go
3
+ provides a speech-to-text service bindings for the Go programming language.
4
+ */
5
+ package whisper
bindings/go/examples/go-model-download/context.go ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package main
2
+
3
+ import (
4
+ "context"
5
+ "os"
6
+ "os/signal"
7
+ )
8
+
9
+ // ContextForSignal returns a context object which is cancelled when a signal
10
+ // is received. It returns nil if no signal parameter is provided
11
+ func ContextForSignal(signals ...os.Signal) context.Context {
12
+ if len(signals) == 0 {
13
+ return nil
14
+ }
15
+
16
+ ch := make(chan os.Signal)
17
+ ctx, cancel := context.WithCancel(context.Background())
18
+
19
+ // Send message on channel when signal received
20
+ signal.Notify(ch, signals...)
21
+
22
+ // When any signal received, call cancel
23
+ go func() {
24
+ <-ch
25
+ cancel()
26
+ }()
27
+
28
+ // Return success
29
+ return ctx
30
+ }
bindings/go/examples/go-model-download/main.go ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package main
2
+
3
+ import (
4
+ "context"
5
+ "flag"
6
+ "fmt"
7
+ "io"
8
+ "net/http"
9
+ "net/url"
10
+ "os"
11
+ "path/filepath"
12
+ "syscall"
13
+ "time"
14
+ )
15
+
16
+ ///////////////////////////////////////////////////////////////////////////////
17
+ // CONSTANTS
18
+
19
+ const (
20
+ srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main" // The location of the models
21
+ srcExt = ".bin" // Filename extension
22
+ bufSize = 1024 * 64 // Size of the buffer used for downloading the model
23
+ )
24
+
25
+ var (
26
+ // The models which will be downloaded, if no model is specified as an argument
27
+ modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3"}
28
+ )
29
+
30
+ var (
31
+ // The output folder. When not set, use current working directory.
32
+ flagOut = flag.String("out", "", "Output folder")
33
+
34
+ // HTTP timeout parameter - will timeout if takes longer than this to download a model
35
+ flagTimeout = flag.Duration("timeout", 30*time.Minute, "HTTP timeout")
36
+
37
+ // Quiet parameter - will not print progress if set
38
+ flagQuiet = flag.Bool("quiet", false, "Quiet mode")
39
+ )
40
+
41
+ ///////////////////////////////////////////////////////////////////////////////
42
+ // MAIN
43
+
44
+ func main() {
45
+ flag.Usage = func() {
46
+ name := filepath.Base(flag.CommandLine.Name())
47
+ fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] <model>\n\n", name)
48
+ flag.PrintDefaults()
49
+ }
50
+ flag.Parse()
51
+
52
+ // Get output path
53
+ out, err := GetOut()
54
+ if err != nil {
55
+ fmt.Fprintln(os.Stderr, "Error:", err)
56
+ os.Exit(-1)
57
+ }
58
+
59
+ // Create context which quits on SIGINT or SIGQUIT
60
+ ctx := ContextForSignal(os.Interrupt, syscall.SIGQUIT)
61
+
62
+ // Progress filehandle
63
+ progress := os.Stdout
64
+ if *flagQuiet {
65
+ progress, err = os.Open(os.DevNull)
66
+ if err != nil {
67
+ fmt.Fprintln(os.Stderr, "Error:", err)
68
+ os.Exit(-1)
69
+ }
70
+ defer progress.Close()
71
+ }
72
+
73
+ // Download models - exit on error or interrupt
74
+ for _, model := range GetModels() {
75
+ url, err := URLForModel(model)
76
+ if err != nil {
77
+ fmt.Fprintln(os.Stderr, "Error:", err)
78
+ continue
79
+ } else if path, err := Download(ctx, progress, url, out); err == nil || err == io.EOF {
80
+ continue
81
+ } else if err == context.Canceled {
82
+ os.Remove(path)
83
+ fmt.Fprintln(progress, "\nInterrupted")
84
+ break
85
+ } else if err == context.DeadlineExceeded {
86
+ os.Remove(path)
87
+ fmt.Fprintln(progress, "Timeout downloading model")
88
+ continue
89
+ } else {
90
+ os.Remove(path)
91
+ fmt.Fprintln(os.Stderr, "Error:", err)
92
+ break
93
+ }
94
+ }
95
+ }
96
+
97
+ ///////////////////////////////////////////////////////////////////////////////
98
+ // PUBLIC METHODS
99
+
100
+ // GetOut returns the path to the output directory
101
+ func GetOut() (string, error) {
102
+ if *flagOut == "" {
103
+ return os.Getwd()
104
+ }
105
+ if info, err := os.Stat(*flagOut); err != nil {
106
+ return "", err
107
+ } else if !info.IsDir() {
108
+ return "", fmt.Errorf("not a directory: %s", info.Name())
109
+ } else {
110
+ return *flagOut, nil
111
+ }
112
+ }
113
+
114
+ // GetModels returns the list of models to download
115
+ func GetModels() []string {
116
+ if flag.NArg() == 0 {
117
+ return modelNames
118
+ } else {
119
+ return flag.Args()
120
+ }
121
+ }
122
+
123
+ // URLForModel returns the URL for the given model on huggingface.co
124
+ func URLForModel(model string) (string, error) {
125
+ if filepath.Ext(model) != srcExt {
126
+ model += srcExt
127
+ }
128
+ url, err := url.Parse(srcUrl)
129
+ if err != nil {
130
+ return "", err
131
+ } else {
132
+ url.Path = filepath.Join(url.Path, model)
133
+ }
134
+ return url.String(), nil
135
+ }
136
+
137
+ // Download downloads the model from the given URL to the given output directory
138
+ func Download(ctx context.Context, p io.Writer, model, out string) (string, error) {
139
+ // Create HTTP client
140
+ client := http.Client{
141
+ Timeout: *flagTimeout,
142
+ }
143
+
144
+ // Initiate the download
145
+ req, err := http.NewRequest("GET", model, nil)
146
+ if err != nil {
147
+ return "", err
148
+ }
149
+ resp, err := client.Do(req)
150
+ if err != nil {
151
+ return "", err
152
+ }
153
+ defer resp.Body.Close()
154
+ if resp.StatusCode != http.StatusOK {
155
+ return "", fmt.Errorf("%s: %s", model, resp.Status)
156
+ }
157
+
158
+ // If output file exists and is the same size as the model, skip
159
+ path := filepath.Join(out, filepath.Base(model))
160
+ if info, err := os.Stat(path); err == nil && info.Size() == resp.ContentLength {
161
+ fmt.Fprintln(p, "Skipping", model, "as it already exists")
162
+ return "", nil
163
+ }
164
+
165
+ // Create file
166
+ w, err := os.Create(path)
167
+ if err != nil {
168
+ return "", err
169
+ }
170
+ defer w.Close()
171
+
172
+ // Report
173
+ fmt.Fprintln(p, "Downloading", model, "to", out)
174
+
175
+ // Progressively download the model
176
+ data := make([]byte, bufSize)
177
+ count, pct := int64(0), int64(0)
178
+ ticker := time.NewTicker(5 * time.Second)
179
+ for {
180
+ select {
181
+ case <-ctx.Done():
182
+ // Cancelled, return error
183
+ return path, ctx.Err()
184
+ case <-ticker.C:
185
+ pct = DownloadReport(p, pct, count, resp.ContentLength)
186
+ default:
187
+ // Read body
188
+ n, err := resp.Body.Read(data)
189
+ if err != nil {
190
+ DownloadReport(p, pct, count, resp.ContentLength)
191
+ return path, err
192
+ } else if m, err := w.Write(data[:n]); err != nil {
193
+ return path, err
194
+ } else {
195
+ count += int64(m)
196
+ }
197
+ }
198
+ }
199
+ }
200
+
201
+ // Report periodically reports the download progress when percentage changes
202
+ func DownloadReport(w io.Writer, pct, count, total int64) int64 {
203
+ pct_ := count * 100 / total
204
+ if pct_ > pct {
205
+ fmt.Fprintf(w, " ...%d MB written (%d%%)\n", count/1e6, pct_)
206
+ }
207
+ return pct_
208
+ }
bindings/go/examples/go-whisper/color.go ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package main
2
+
3
+ import "fmt"
4
+
5
+ ///////////////////////////////////////////////////////////////////////////////
6
+ // CONSTANTS
7
+
8
+ const (
9
+ Reset = "\033[0m"
10
+ RGBPrefix = "\033[38;5;" // followed by RGB values in decimal format separated by colons
11
+ RGBSuffix = "m"
12
+ )
13
+
14
+ ///////////////////////////////////////////////////////////////////////////////
15
+ // PUBLIC METHODS
16
+
17
+ // Colorize text with RGB values, from 0 to 23
18
+ func Colorize(text string, v int) string {
19
+ // https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit
20
+ // Grayscale colors are in the range 232-255
21
+ return RGBPrefix + fmt.Sprint(v%24+232) + RGBSuffix + text + Reset
22
+ }
bindings/go/examples/go-whisper/flags.go ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package main
2
+
3
+ import (
4
+ "flag"
5
+ "fmt"
6
+ "strings"
7
+ "time"
8
+
9
+ // Packages
10
+ whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
11
+ )
12
+
13
+ ///////////////////////////////////////////////////////////////////////////////
14
+ // TYPES
15
+
16
+ type Flags struct {
17
+ *flag.FlagSet
18
+ }
19
+
20
+ ///////////////////////////////////////////////////////////////////////////////
21
+ // LIFECYCLE
22
+
23
+ func NewFlags(name string, args []string) (*Flags, error) {
24
+ flags := &Flags{
25
+ FlagSet: flag.NewFlagSet(name, flag.ContinueOnError),
26
+ }
27
+
28
+ // Register the command line arguments
29
+ registerFlags(flags)
30
+
31
+ // Parse command line
32
+ if err := flags.Parse(args); err != nil {
33
+ return nil, err
34
+ }
35
+
36
+ // Return success
37
+ return flags, nil
38
+ }
39
+
40
+ ///////////////////////////////////////////////////////////////////////////////
41
+ // PUBLIC METHODS
42
+
43
+ func (flags *Flags) GetModel() string {
44
+ return flags.Lookup("model").Value.String()
45
+ }
46
+
47
+ func (flags *Flags) GetLanguage() string {
48
+ return flags.Lookup("language").Value.String()
49
+ }
50
+
51
+ func (flags *Flags) IsTranslate() bool {
52
+ return flags.Lookup("translate").Value.(flag.Getter).Get().(bool)
53
+ }
54
+
55
+ func (flags *Flags) GetOffset() time.Duration {
56
+ return flags.Lookup("offset").Value.(flag.Getter).Get().(time.Duration)
57
+ }
58
+
59
+ func (flags *Flags) GetDuration() time.Duration {
60
+ return flags.Lookup("duration").Value.(flag.Getter).Get().(time.Duration)
61
+ }
62
+
63
+ func (flags *Flags) GetThreads() uint {
64
+ return flags.Lookup("threads").Value.(flag.Getter).Get().(uint)
65
+ }
66
+
67
+ func (flags *Flags) GetOut() string {
68
+ return strings.ToLower(flags.Lookup("out").Value.String())
69
+ }
70
+
71
+ func (flags *Flags) IsTokens() bool {
72
+ return flags.Lookup("tokens").Value.String() == "true"
73
+ }
74
+
75
+ func (flags *Flags) IsColorize() bool {
76
+ return flags.Lookup("colorize").Value.String() == "true"
77
+ }
78
+
79
+ func (flags *Flags) GetMaxLen() uint {
80
+ return flags.Lookup("max-len").Value.(flag.Getter).Get().(uint)
81
+ }
82
+
83
+ func (flags *Flags) GetMaxTokens() uint {
84
+ return flags.Lookup("max-tokens").Value.(flag.Getter).Get().(uint)
85
+ }
86
+
87
+ func (flags *Flags) GetWordThreshold() float32 {
88
+ return float32(flags.Lookup("word-thold").Value.(flag.Getter).Get().(float64))
89
+ }
90
+
91
+ func (flags *Flags) SetParams(context whisper.Context) error {
92
+ if lang := flags.GetLanguage(); lang != "" && lang != "auto" {
93
+ fmt.Fprintf(flags.Output(), "Setting language to %q\n", lang)
94
+ if err := context.SetLanguage(lang); err != nil {
95
+ return err
96
+ }
97
+ }
98
+ if flags.IsTranslate() && context.IsMultilingual() {
99
+ fmt.Fprintf(flags.Output(), "Setting translate to true\n")
100
+ context.SetTranslate(true)
101
+ }
102
+ if offset := flags.GetOffset(); offset != 0 {
103
+ fmt.Fprintf(flags.Output(), "Setting offset to %v\n", offset)
104
+ context.SetOffset(offset)
105
+ }
106
+ if duration := flags.GetDuration(); duration != 0 {
107
+ fmt.Fprintf(flags.Output(), "Setting duration to %v\n", duration)
108
+ context.SetDuration(duration)
109
+ }
110
+ if threads := flags.GetThreads(); threads != 0 {
111
+ fmt.Fprintf(flags.Output(), "Setting threads to %d\n", threads)
112
+ context.SetThreads(threads)
113
+ }
114
+ if max_len := flags.GetMaxLen(); max_len != 0 {
115
+ fmt.Fprintf(flags.Output(), "Setting max_segment_length to %d\n", max_len)
116
+ context.SetMaxSegmentLength(max_len)
117
+ }
118
+ if max_tokens := flags.GetMaxTokens(); max_tokens != 0 {
119
+ fmt.Fprintf(flags.Output(), "Setting max_tokens to %d\n", max_tokens)
120
+ context.SetMaxTokensPerSegment(max_tokens)
121
+ }
122
+ if word_threshold := flags.GetWordThreshold(); word_threshold != 0 {
123
+ fmt.Fprintf(flags.Output(), "Setting word_threshold to %f\n", word_threshold)
124
+ context.SetTokenThreshold(word_threshold)
125
+ }
126
+
127
+ // Return success
128
+ return nil
129
+ }
130
+
131
+ ///////////////////////////////////////////////////////////////////////////////
132
+ // PRIVATE METHODS
133
+
134
+ func registerFlags(flag *Flags) {
135
+ flag.String("model", "", "Path to the model file")
136
+ flag.String("language", "", "Spoken language")
137
+ flag.Bool("translate", false, "Translate from source language to english")
138
+ flag.Duration("offset", 0, "Time offset")
139
+ flag.Duration("duration", 0, "Duration of audio to process")
140
+ flag.Uint("threads", 0, "Number of threads to use")
141
+ flag.Uint("max-len", 0, "Maximum segment length in characters")
142
+ flag.Uint("max-tokens", 0, "Maximum tokens per segment")
143
+ flag.Float64("word-thold", 0, "Maximum segment score")
144
+ flag.Bool("tokens", false, "Display tokens")
145
+ flag.Bool("colorize", false, "Colorize tokens")
146
+ flag.String("out", "", "Output format (srt, none or leave as empty string)")
147
+ }
bindings/go/examples/go-whisper/main.go ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package main
2
+
3
+ import (
4
+ "flag"
5
+ "fmt"
6
+ "os"
7
+ "path/filepath"
8
+
9
+ // Packages
10
+ whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
11
+ )
12
+
13
+ func main() {
14
+ flags, err := NewFlags(filepath.Base(os.Args[0]), os.Args[1:])
15
+ if err == flag.ErrHelp {
16
+ os.Exit(0)
17
+ } else if err != nil {
18
+ fmt.Fprintln(os.Stderr, err)
19
+ os.Exit(1)
20
+ } else if flags.GetModel() == "" {
21
+ fmt.Fprintln(os.Stderr, "Use -model flag to specify which model file to use")
22
+ os.Exit(1)
23
+ } else if flags.NArg() == 0 {
24
+ fmt.Fprintln(os.Stderr, "No input files specified")
25
+ os.Exit(1)
26
+ }
27
+
28
+ // Load model
29
+ model, err := whisper.New(flags.GetModel())
30
+ if err != nil {
31
+ fmt.Fprintln(os.Stderr, err)
32
+ os.Exit(1)
33
+ }
34
+ defer model.Close()
35
+
36
+ // Process files
37
+ for _, filename := range flags.Args() {
38
+ if err := Process(model, filename, flags); err != nil {
39
+ fmt.Fprintln(os.Stderr, err)
40
+ continue
41
+ }
42
+ }
43
+ }
bindings/go/examples/go-whisper/process.go ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package main
2
+
3
+ import (
4
+ "fmt"
5
+ "io"
6
+ "os"
7
+ "time"
8
+
9
+ // Package imports
10
+ whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
11
+ wav "github.com/go-audio/wav"
12
+ )
13
+
14
+ func Process(model whisper.Model, path string, flags *Flags) error {
15
+ var data []float32
16
+
17
+ // Create processing context
18
+ context, err := model.NewContext()
19
+ if err != nil {
20
+ return err
21
+ }
22
+
23
+ // Set the parameters
24
+ if err := flags.SetParams(context); err != nil {
25
+ return err
26
+ }
27
+
28
+ fmt.Printf("\n%s\n", context.SystemInfo())
29
+
30
+ // Open the file
31
+ fmt.Fprintf(flags.Output(), "Loading %q\n", path)
32
+ fh, err := os.Open(path)
33
+ if err != nil {
34
+ return err
35
+ }
36
+ defer fh.Close()
37
+
38
+ // Decode the WAV file - load the full buffer
39
+ dec := wav.NewDecoder(fh)
40
+ if buf, err := dec.FullPCMBuffer(); err != nil {
41
+ return err
42
+ } else if dec.SampleRate != whisper.SampleRate {
43
+ return fmt.Errorf("unsupported sample rate: %d", dec.SampleRate)
44
+ } else if dec.NumChans != 1 {
45
+ return fmt.Errorf("unsupported number of channels: %d", dec.NumChans)
46
+ } else {
47
+ data = buf.AsFloat32Buffer().Data
48
+ }
49
+
50
+ // Segment callback when -tokens is specified
51
+ var cb whisper.SegmentCallback
52
+ if flags.IsTokens() {
53
+ cb = func(segment whisper.Segment) {
54
+ fmt.Fprintf(flags.Output(), "%02d [%6s->%6s] ", segment.Num, segment.Start.Truncate(time.Millisecond), segment.End.Truncate(time.Millisecond))
55
+ for _, token := range segment.Tokens {
56
+ if flags.IsColorize() && context.IsText(token) {
57
+ fmt.Fprint(flags.Output(), Colorize(token.Text, int(token.P*24.0)), " ")
58
+ } else {
59
+ fmt.Fprint(flags.Output(), token.Text, " ")
60
+ }
61
+ }
62
+ fmt.Fprintln(flags.Output(), "")
63
+ fmt.Fprintln(flags.Output(), "")
64
+ }
65
+ }
66
+
67
+ // Process the data
68
+ fmt.Fprintf(flags.Output(), " ...processing %q\n", path)
69
+ context.ResetTimings()
70
+ if err := context.Process(data, cb, nil); err != nil {
71
+ return err
72
+ }
73
+
74
+ context.PrintTimings()
75
+
76
+ // Print out the results
77
+ switch {
78
+ case flags.GetOut() == "srt":
79
+ return OutputSRT(os.Stdout, context)
80
+ case flags.GetOut() == "none":
81
+ return nil
82
+ default:
83
+ return Output(os.Stdout, context, flags.IsColorize())
84
+ }
85
+ }
86
+
87
+ // Output text as SRT file
88
+ func OutputSRT(w io.Writer, context whisper.Context) error {
89
+ n := 1
90
+ for {
91
+ segment, err := context.NextSegment()
92
+ if err == io.EOF {
93
+ return nil
94
+ } else if err != nil {
95
+ return err
96
+ }
97
+ fmt.Fprintln(w, n)
98
+ fmt.Fprintln(w, srtTimestamp(segment.Start), " --> ", srtTimestamp(segment.End))
99
+ fmt.Fprintln(w, segment.Text)
100
+ fmt.Fprintln(w, "")
101
+ n++
102
+ }
103
+ }
104
+
105
+ // Output text to terminal
106
+ func Output(w io.Writer, context whisper.Context, colorize bool) error {
107
+ for {
108
+ segment, err := context.NextSegment()
109
+ if err == io.EOF {
110
+ return nil
111
+ } else if err != nil {
112
+ return err
113
+ }
114
+ fmt.Fprintf(w, "[%6s->%6s]", segment.Start.Truncate(time.Millisecond), segment.End.Truncate(time.Millisecond))
115
+ if colorize {
116
+ for _, token := range segment.Tokens {
117
+ if !context.IsText(token) {
118
+ continue
119
+ }
120
+ fmt.Fprint(w, " ", Colorize(token.Text, int(token.P*24.0)))
121
+ }
122
+ fmt.Fprint(w, "\n")
123
+ } else {
124
+ fmt.Fprintln(w, " ", segment.Text)
125
+ }
126
+ }
127
+ }
128
+
129
+ // Return srtTimestamp
130
+ func srtTimestamp(t time.Duration) string {
131
+ return fmt.Sprintf("%02d:%02d:%02d,%03d", t/time.Hour, (t%time.Hour)/time.Minute, (t%time.Minute)/time.Second, (t%time.Second)/time.Millisecond)
132
+ }
bindings/go/go.mod ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ module github.com/ggerganov/whisper.cpp/bindings/go
2
+
3
+ go 1.19
4
+
5
+ require (
6
+ github.com/go-audio/wav v1.1.0
7
+ github.com/stretchr/testify v1.8.1
8
+ )
9
+
10
+ require (
11
+ github.com/davecgh/go-spew v1.1.1 // indirect
12
+ github.com/go-audio/audio v1.0.0 // indirect
13
+ github.com/go-audio/riff v1.0.0 // indirect
14
+ github.com/pmezard/go-difflib v1.0.0 // indirect
15
+ gopkg.in/yaml.v3 v3.0.1 // indirect
16
+ )
bindings/go/go.sum ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
2
+ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
3
+ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4
+ github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
5
+ github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
6
+ github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
7
+ github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498=
8
+ github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
9
+ github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
10
+ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
11
+ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
12
+ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
13
+ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
14
+ github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
15
+ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
16
+ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
17
+ github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
18
+ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
19
+ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
20
+ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
21
+ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
22
+ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
23
+ gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
bindings/go/params.go ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper
2
+
3
+ import (
4
+ "fmt"
5
+ )
6
+
7
+ ///////////////////////////////////////////////////////////////////////////////
8
+ // CGO
9
+
10
+ /*
11
+ #include <whisper.h>
12
+ */
13
+ import "C"
14
+
15
+ ///////////////////////////////////////////////////////////////////////////////
16
+ // PUBLIC METHODS
17
+
18
+ func (p *Params) SetTranslate(v bool) {
19
+ p.translate = toBool(v)
20
+ }
21
+
22
+ func (p *Params) SetSplitOnWord(v bool) {
23
+ p.split_on_word = toBool(v)
24
+ }
25
+
26
+ func (p *Params) SetNoContext(v bool) {
27
+ p.no_context = toBool(v)
28
+ }
29
+
30
+ func (p *Params) SetSingleSegment(v bool) {
31
+ p.single_segment = toBool(v)
32
+ }
33
+
34
+ func (p *Params) SetPrintSpecial(v bool) {
35
+ p.print_special = toBool(v)
36
+ }
37
+
38
+ func (p *Params) SetPrintProgress(v bool) {
39
+ p.print_progress = toBool(v)
40
+ }
41
+
42
+ func (p *Params) SetPrintRealtime(v bool) {
43
+ p.print_realtime = toBool(v)
44
+ }
45
+
46
+ func (p *Params) SetPrintTimestamps(v bool) {
47
+ p.print_timestamps = toBool(v)
48
+ }
49
+
50
+ // Set language id
51
+ func (p *Params) SetLanguage(lang int) error {
52
+ if lang == -1 {
53
+ p.language = nil
54
+ return nil
55
+ }
56
+ str := C.whisper_lang_str(C.int(lang))
57
+ if str == nil {
58
+ return ErrInvalidLanguage
59
+ } else {
60
+ p.language = str
61
+ }
62
+ return nil
63
+ }
64
+
65
+ // Get language id
66
+ func (p *Params) Language() int {
67
+ if p.language == nil {
68
+ return -1
69
+ }
70
+ return int(C.whisper_lang_id(p.language))
71
+ }
72
+
73
+ // Threads available
74
+ func (p *Params) Threads() int {
75
+ return int(p.n_threads)
76
+ }
77
+
78
+ // Set number of threads to use
79
+ func (p *Params) SetThreads(threads int) {
80
+ p.n_threads = C.int(threads)
81
+ }
82
+
83
+ // Set start offset in ms
84
+ func (p *Params) SetOffset(offset_ms int) {
85
+ p.offset_ms = C.int(offset_ms)
86
+ }
87
+
88
+ // Set audio duration to process in ms
89
+ func (p *Params) SetDuration(duration_ms int) {
90
+ p.duration_ms = C.int(duration_ms)
91
+ }
92
+
93
+ // Set timestamp token probability threshold (~0.01)
94
+ func (p *Params) SetTokenThreshold(t float32) {
95
+ p.thold_pt = C.float(t)
96
+ }
97
+
98
+ // Set timestamp token sum probability threshold (~0.01)
99
+ func (p *Params) SetTokenSumThreshold(t float32) {
100
+ p.thold_ptsum = C.float(t)
101
+ }
102
+
103
+ // Set max segment length in characters
104
+ func (p *Params) SetMaxSegmentLength(n int) {
105
+ p.max_len = C.int(n)
106
+ }
107
+
108
+ func (p *Params) SetTokenTimestamps(b bool) {
109
+ p.token_timestamps = toBool(b)
110
+ }
111
+
112
+ // Set max tokens per segment (0 = no limit)
113
+ func (p *Params) SetMaxTokensPerSegment(n int) {
114
+ p.max_tokens = C.int(n)
115
+ }
116
+
117
+ // Set audio encoder context
118
+ func (p *Params) SetAudioCtx(n int) {
119
+ p.audio_ctx = C.int(n)
120
+ }
121
+
122
+ func (p *Params) SetMaxContext(n int) {
123
+ p.n_max_text_ctx = C.int(n)
124
+ }
125
+
126
+ func (p *Params) SetBeamSize(n int) {
127
+ p.beam_search.beam_size = C.int(n)
128
+ }
129
+
130
+ func (p *Params) SetEntropyThold(t float32) {
131
+ p.entropy_thold = C.float(t)
132
+ }
133
+
134
+ // Set initial prompt
135
+ func (p *Params) SetInitialPrompt(prompt string) {
136
+ p.initial_prompt = C.CString(prompt)
137
+ }
138
+
139
+ ///////////////////////////////////////////////////////////////////////////////
140
+ // PRIVATE METHODS
141
+
142
+ func toBool(v bool) C.bool {
143
+ if v {
144
+ return C.bool(true)
145
+ }
146
+ return C.bool(false)
147
+ }
148
+
149
+ ///////////////////////////////////////////////////////////////////////////////
150
+ // STRINGIFY
151
+
152
+ func (p *Params) String() string {
153
+ str := "<whisper.params"
154
+ str += fmt.Sprintf(" strategy=%v", p.strategy)
155
+ str += fmt.Sprintf(" n_threads=%d", p.n_threads)
156
+ if p.language != nil {
157
+ str += fmt.Sprintf(" language=%s", C.GoString(p.language))
158
+ }
159
+ str += fmt.Sprintf(" n_max_text_ctx=%d", p.n_max_text_ctx)
160
+ str += fmt.Sprintf(" offset_ms=%d", p.offset_ms)
161
+ str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
162
+ str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
163
+ str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
164
+ str += fmt.Sprintf(" entropy_thold=%f", p.entropy_thold)
165
+ str += fmt.Sprintf(" beam_size=%d", p.beam_search.beam_size)
166
+ if p.translate {
167
+ str += " translate"
168
+ }
169
+ if p.no_context {
170
+ str += " no_context"
171
+ }
172
+ if p.single_segment {
173
+ str += " single_segment"
174
+ }
175
+ if p.print_special {
176
+ str += " print_special"
177
+ }
178
+ if p.print_progress {
179
+ str += " print_progress"
180
+ }
181
+ if p.print_realtime {
182
+ str += " print_realtime"
183
+ }
184
+ if p.print_timestamps {
185
+ str += " print_timestamps"
186
+ }
187
+ if p.token_timestamps {
188
+ str += " token_timestamps"
189
+ }
190
+
191
+ return str + ">"
192
+ }
bindings/go/pkg/whisper/consts.go ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper
2
+
3
+ import (
4
+ "errors"
5
+
6
+ // Bindings
7
+ whisper "github.com/ggerganov/whisper.cpp/bindings/go"
8
+ )
9
+
10
+ ///////////////////////////////////////////////////////////////////////////////
11
+ // ERRORS
12
+
13
+ var (
14
+ ErrUnableToLoadModel = errors.New("unable to load model")
15
+ ErrInternalAppError = errors.New("internal application error")
16
+ ErrProcessingFailed = errors.New("processing failed")
17
+ ErrUnsupportedLanguage = errors.New("unsupported language")
18
+ ErrModelNotMultilingual = errors.New("model is not multilingual")
19
+ )
20
+
21
+ ///////////////////////////////////////////////////////////////////////////////
22
+ // CONSTANTS
23
+
24
+ // SampleRate is the sample rate of the audio data.
25
+ const SampleRate = whisper.SampleRate
26
+
27
+ // SampleBits is the number of bytes per sample.
28
+ const SampleBits = whisper.SampleBits
bindings/go/pkg/whisper/context.go ADDED
@@ -0,0 +1,331 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper
2
+
3
+ import (
4
+ "fmt"
5
+ "io"
6
+ "runtime"
7
+ "strings"
8
+ "time"
9
+
10
+ // Bindings
11
+ whisper "github.com/ggerganov/whisper.cpp/bindings/go"
12
+ )
13
+
14
+ ///////////////////////////////////////////////////////////////////////////////
15
+ // TYPES
16
+
17
+ type context struct {
18
+ n int
19
+ model *model
20
+ params whisper.Params
21
+ }
22
+
23
+ // Make sure context adheres to the interface
24
+ var _ Context = (*context)(nil)
25
+
26
+ ///////////////////////////////////////////////////////////////////////////////
27
+ // LIFECYCLE
28
+
29
+ func newContext(model *model, params whisper.Params) (Context, error) {
30
+ context := new(context)
31
+ context.model = model
32
+ context.params = params
33
+
34
+ // Return success
35
+ return context, nil
36
+ }
37
+
38
+ ///////////////////////////////////////////////////////////////////////////////
39
+ // PUBLIC METHODS
40
+
41
+ // Set the language to use for speech recognition.
42
+ func (context *context) SetLanguage(lang string) error {
43
+ if context.model.ctx == nil {
44
+ return ErrInternalAppError
45
+ }
46
+ if !context.model.IsMultilingual() {
47
+ return ErrModelNotMultilingual
48
+ }
49
+
50
+ if lang == "auto" {
51
+ context.params.SetLanguage(-1)
52
+ } else if id := context.model.ctx.Whisper_lang_id(lang); id < 0 {
53
+ return ErrUnsupportedLanguage
54
+ } else if err := context.params.SetLanguage(id); err != nil {
55
+ return err
56
+ }
57
+ // Return success
58
+ return nil
59
+ }
60
+
61
+ func (context *context) IsMultilingual() bool {
62
+ return context.model.IsMultilingual()
63
+ }
64
+
65
+ // Get language
66
+ func (context *context) Language() string {
67
+ id := context.params.Language()
68
+ if id == -1 {
69
+ return "auto"
70
+ }
71
+ return whisper.Whisper_lang_str(context.params.Language())
72
+ }
73
+
74
+ // Set translate flag
75
+ func (context *context) SetTranslate(v bool) {
76
+ context.params.SetTranslate(v)
77
+ }
78
+
79
+ func (context *context) SetSplitOnWord(v bool) {
80
+ context.params.SetSplitOnWord(v)
81
+ }
82
+
83
+ // Set number of threads to use
84
+ func (context *context) SetThreads(v uint) {
85
+ context.params.SetThreads(int(v))
86
+ }
87
+
88
+ // Set time offset
89
+ func (context *context) SetOffset(v time.Duration) {
90
+ context.params.SetOffset(int(v.Milliseconds()))
91
+ }
92
+
93
+ // Set duration of audio to process
94
+ func (context *context) SetDuration(v time.Duration) {
95
+ context.params.SetDuration(int(v.Milliseconds()))
96
+ }
97
+
98
+ // Set timestamp token probability threshold (~0.01)
99
+ func (context *context) SetTokenThreshold(t float32) {
100
+ context.params.SetTokenThreshold(t)
101
+ }
102
+
103
+ // Set timestamp token sum probability threshold (~0.01)
104
+ func (context *context) SetTokenSumThreshold(t float32) {
105
+ context.params.SetTokenSumThreshold(t)
106
+ }
107
+
108
+ // Set max segment length in characters
109
+ func (context *context) SetMaxSegmentLength(n uint) {
110
+ context.params.SetMaxSegmentLength(int(n))
111
+ }
112
+
113
+ // Set token timestamps flag
114
+ func (context *context) SetTokenTimestamps(b bool) {
115
+ context.params.SetTokenTimestamps(b)
116
+ }
117
+
118
+ // Set max tokens per segment (0 = no limit)
119
+ func (context *context) SetMaxTokensPerSegment(n uint) {
120
+ context.params.SetMaxTokensPerSegment(int(n))
121
+ }
122
+
123
+ // Set audio encoder context
124
+ func (context *context) SetAudioCtx(n uint) {
125
+ context.params.SetAudioCtx(int(n))
126
+ }
127
+
128
+ // Set maximum number of text context tokens to store
129
+ func (context *context) SetMaxContext(n int) {
130
+ context.params.SetMaxContext(n)
131
+ }
132
+
133
+ // Set Beam Size
134
+ func (context *context) SetBeamSize(n int) {
135
+ context.params.SetBeamSize(n)
136
+ }
137
+
138
+ // Set Entropy threshold
139
+ func (context *context) SetEntropyThold(t float32) {
140
+ context.params.SetEntropyThold(t)
141
+ }
142
+
143
+ // Set initial prompt
144
+ func (context *context) SetInitialPrompt(prompt string) {
145
+ context.params.SetInitialPrompt(prompt)
146
+ }
147
+
148
+ // ResetTimings resets the mode timings. Should be called before processing
149
+ func (context *context) ResetTimings() {
150
+ context.model.ctx.Whisper_reset_timings()
151
+ }
152
+
153
+ // PrintTimings prints the model timings to stdout.
154
+ func (context *context) PrintTimings() {
155
+ context.model.ctx.Whisper_print_timings()
156
+ }
157
+
158
+ // SystemInfo returns the system information
159
+ func (context *context) SystemInfo() string {
160
+ return fmt.Sprintf("system_info: n_threads = %d / %d | %s\n",
161
+ context.params.Threads(),
162
+ runtime.NumCPU(),
163
+ whisper.Whisper_print_system_info(),
164
+ )
165
+ }
166
+
167
+ // Use mel data at offset_ms to try and auto-detect the spoken language
168
+ // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
169
+ // Returns the probabilities of all languages.
170
+ func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]float32, error) {
171
+ langProbs, err := context.model.ctx.Whisper_lang_auto_detect(offset_ms, n_threads)
172
+ if err != nil {
173
+ return nil, err
174
+ }
175
+ return langProbs, nil
176
+ }
177
+
178
+ // Process new sample data and return any errors
179
+ func (context *context) Process(
180
+ data []float32,
181
+ callNewSegment SegmentCallback,
182
+ callProgress ProgressCallback,
183
+ ) error {
184
+ if context.model.ctx == nil {
185
+ return ErrInternalAppError
186
+ }
187
+ // If the callback is defined then we force on single_segment mode
188
+ if callNewSegment != nil {
189
+ context.params.SetSingleSegment(true)
190
+ }
191
+
192
+ // We don't do parallel processing at the moment
193
+ processors := 0
194
+ if processors > 1 {
195
+ if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, nil, func(new int) {
196
+ if callNewSegment != nil {
197
+ num_segments := context.model.ctx.Whisper_full_n_segments()
198
+ s0 := num_segments - new
199
+ for i := s0; i < num_segments; i++ {
200
+ callNewSegment(toSegment(context.model.ctx, i))
201
+ }
202
+ }
203
+ }); err != nil {
204
+ return err
205
+ }
206
+ } else if err := context.model.ctx.Whisper_full(context.params, data, nil, func(new int) {
207
+ if callNewSegment != nil {
208
+ num_segments := context.model.ctx.Whisper_full_n_segments()
209
+ s0 := num_segments - new
210
+ for i := s0; i < num_segments; i++ {
211
+ callNewSegment(toSegment(context.model.ctx, i))
212
+ }
213
+ }
214
+ }, func(progress int) {
215
+ if callProgress != nil {
216
+ callProgress(progress)
217
+ }
218
+ }); err != nil {
219
+ return err
220
+ }
221
+
222
+ // Return success
223
+ return nil
224
+ }
225
+
226
+ // Return the next segment of tokens
227
+ func (context *context) NextSegment() (Segment, error) {
228
+ if context.model.ctx == nil {
229
+ return Segment{}, ErrInternalAppError
230
+ }
231
+ if context.n >= context.model.ctx.Whisper_full_n_segments() {
232
+ return Segment{}, io.EOF
233
+ }
234
+
235
+ // Populate result
236
+ result := toSegment(context.model.ctx, context.n)
237
+
238
+ // Increment the cursor
239
+ context.n++
240
+
241
+ // Return success
242
+ return result, nil
243
+ }
244
+
245
+ // Test for text tokens
246
+ func (context *context) IsText(t Token) bool {
247
+ switch {
248
+ case context.IsBEG(t):
249
+ return false
250
+ case context.IsSOT(t):
251
+ return false
252
+ case whisper.Token(t.Id) >= context.model.ctx.Whisper_token_eot():
253
+ return false
254
+ case context.IsPREV(t):
255
+ return false
256
+ case context.IsSOLM(t):
257
+ return false
258
+ case context.IsNOT(t):
259
+ return false
260
+ default:
261
+ return true
262
+ }
263
+ }
264
+
265
+ // Test for "begin" token
266
+ func (context *context) IsBEG(t Token) bool {
267
+ return whisper.Token(t.Id) == context.model.ctx.Whisper_token_beg()
268
+ }
269
+
270
+ // Test for "start of transcription" token
271
+ func (context *context) IsSOT(t Token) bool {
272
+ return whisper.Token(t.Id) == context.model.ctx.Whisper_token_sot()
273
+ }
274
+
275
+ // Test for "end of transcription" token
276
+ func (context *context) IsEOT(t Token) bool {
277
+ return whisper.Token(t.Id) == context.model.ctx.Whisper_token_eot()
278
+ }
279
+
280
+ // Test for "start of prev" token
281
+ func (context *context) IsPREV(t Token) bool {
282
+ return whisper.Token(t.Id) == context.model.ctx.Whisper_token_prev()
283
+ }
284
+
285
+ // Test for "start of lm" token
286
+ func (context *context) IsSOLM(t Token) bool {
287
+ return whisper.Token(t.Id) == context.model.ctx.Whisper_token_solm()
288
+ }
289
+
290
+ // Test for "No timestamps" token
291
+ func (context *context) IsNOT(t Token) bool {
292
+ return whisper.Token(t.Id) == context.model.ctx.Whisper_token_not()
293
+ }
294
+
295
+ // Test for token associated with a specific language
296
+ func (context *context) IsLANG(t Token, lang string) bool {
297
+ if id := context.model.ctx.Whisper_lang_id(lang); id >= 0 {
298
+ return whisper.Token(t.Id) == context.model.ctx.Whisper_token_lang(id)
299
+ } else {
300
+ return false
301
+ }
302
+ }
303
+
304
+ ///////////////////////////////////////////////////////////////////////////////
305
+ // PRIVATE METHODS
306
+
307
+ func toSegment(ctx *whisper.Context, n int) Segment {
308
+ return Segment{
309
+ Num: n,
310
+ Text: strings.TrimSpace(ctx.Whisper_full_get_segment_text(n)),
311
+ Start: time.Duration(ctx.Whisper_full_get_segment_t0(n)) * time.Millisecond * 10,
312
+ End: time.Duration(ctx.Whisper_full_get_segment_t1(n)) * time.Millisecond * 10,
313
+ Tokens: toTokens(ctx, n),
314
+ }
315
+ }
316
+
317
+ func toTokens(ctx *whisper.Context, n int) []Token {
318
+ result := make([]Token, ctx.Whisper_full_n_tokens(n))
319
+ for i := 0; i < len(result); i++ {
320
+ data := ctx.Whisper_full_get_token_data(n, i)
321
+
322
+ result[i] = Token{
323
+ Id: int(ctx.Whisper_full_get_token_id(n, i)),
324
+ Text: ctx.Whisper_full_get_token_text(n, i),
325
+ P: ctx.Whisper_full_get_token_p(n, i),
326
+ Start: time.Duration(data.T0()) * time.Millisecond * 10,
327
+ End: time.Duration(data.T1()) * time.Millisecond * 10,
328
+ }
329
+ }
330
+ return result
331
+ }
bindings/go/pkg/whisper/context_test.go ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper_test
2
+
3
+ import (
4
+ "os"
5
+ "testing"
6
+
7
+ // Packages
8
+ whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
9
+ assert "github.com/stretchr/testify/assert"
10
+ )
11
+
12
+ const (
13
+ ModelPath = "../../models/ggml-tiny.bin"
14
+ SamplePath = "../../samples/jfk.wav"
15
+ )
16
+
17
+ func Test_Whisper_000(t *testing.T) {
18
+ assert := assert.New(t)
19
+ if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
20
+ t.Skip("Skipping test, model not found:", ModelPath)
21
+ }
22
+ if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
23
+ t.Skip("Skipping test, sample not found:", SamplePath)
24
+ }
25
+
26
+ // Load model
27
+ model, err := whisper.New(ModelPath)
28
+ assert.NoError(err)
29
+ assert.NotNil(model)
30
+ assert.NoError(model.Close())
31
+
32
+ t.Log("languages=", model.Languages())
33
+ }
34
+
35
+ func Test_Whisper_001(t *testing.T) {
36
+ assert := assert.New(t)
37
+ if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
38
+ t.Skip("Skipping test, model not found:", ModelPath)
39
+ }
40
+ if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
41
+ t.Skip("Skipping test, sample not found:", SamplePath)
42
+ }
43
+
44
+ // Load model
45
+ model, err := whisper.New(ModelPath)
46
+ assert.NoError(err)
47
+ assert.NotNil(model)
48
+ defer model.Close()
49
+
50
+ // Get context for decoding
51
+ ctx, err := model.NewContext()
52
+ assert.NoError(err)
53
+ assert.NotNil(ctx)
54
+
55
+ }
bindings/go/pkg/whisper/doc.go ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ /*
2
+ This is the higher-level speech-to-text whisper.cpp API for go
3
+ */
4
+ package whisper
bindings/go/pkg/whisper/interface.go ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper
2
+
3
+ import (
4
+ "io"
5
+ "time"
6
+ )
7
+
8
+ ///////////////////////////////////////////////////////////////////////////////
9
+ // TYPES
10
+
11
+ // SegmentCallback is the callback function for processing segments in real
12
+ // time. It is called during the Process function
13
+ type SegmentCallback func(Segment)
14
+
15
+ // ProgressCallback is the callback function for reporting progress during
16
+ // processing. It is called during the Process function
17
+ type ProgressCallback func(int)
18
+
19
+ // Model is the interface to a whisper model. Create a new model with the
20
+ // function whisper.New(string)
21
+ type Model interface {
22
+ io.Closer
23
+
24
+ // Return a new speech-to-text context.
25
+ NewContext() (Context, error)
26
+
27
+ // Return true if the model is multilingual.
28
+ IsMultilingual() bool
29
+
30
+ // Return all languages supported.
31
+ Languages() []string
32
+ }
33
+
34
+ // Context is the speach recognition context.
35
+ type Context interface {
36
+ SetLanguage(string) error // Set the language to use for speech recognition, use "auto" for auto detect language.
37
+ SetTranslate(bool) // Set translate flag
38
+ IsMultilingual() bool // Return true if the model is multilingual.
39
+ Language() string // Get language
40
+
41
+ SetOffset(time.Duration) // Set offset
42
+ SetDuration(time.Duration) // Set duration
43
+ SetThreads(uint) // Set number of threads to use
44
+ SetSplitOnWord(bool) // Set split on word flag
45
+ SetTokenThreshold(float32) // Set timestamp token probability threshold
46
+ SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
47
+ SetMaxSegmentLength(uint) // Set max segment length in characters
48
+ SetTokenTimestamps(bool) // Set token timestamps flag
49
+ SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
50
+ SetAudioCtx(uint) // Set audio encoder context
51
+ SetMaxContext(n int) // Set maximum number of text context tokens to store
52
+ SetBeamSize(n int) // Set Beam Size
53
+ SetEntropyThold(t float32) // Set Entropy threshold
54
+ SetInitialPrompt(prompt string) // Set initial prompt
55
+
56
+ // Process mono audio data and return any errors.
57
+ // If defined, newly generated segments are passed to the
58
+ // callback function during processing.
59
+ Process([]float32, SegmentCallback, ProgressCallback) error
60
+
61
+ // After process is called, return segments until the end of the stream
62
+ // is reached, when io.EOF is returned.
63
+ NextSegment() (Segment, error)
64
+
65
+ IsBEG(Token) bool // Test for "begin" token
66
+ IsSOT(Token) bool // Test for "start of transcription" token
67
+ IsEOT(Token) bool // Test for "end of transcription" token
68
+ IsPREV(Token) bool // Test for "start of prev" token
69
+ IsSOLM(Token) bool // Test for "start of lm" token
70
+ IsNOT(Token) bool // Test for "No timestamps" token
71
+ IsLANG(Token, string) bool // Test for token associated with a specific language
72
+ IsText(Token) bool // Test for text token
73
+
74
+ // Timings
75
+ PrintTimings()
76
+ ResetTimings()
77
+
78
+ SystemInfo() string
79
+ }
80
+
81
+ // Segment is the text result of a speech recognition.
82
+ type Segment struct {
83
+ // Segment Number
84
+ Num int
85
+
86
+ // Time beginning and end timestamps for the segment.
87
+ Start, End time.Duration
88
+
89
+ // The text of the segment.
90
+ Text string
91
+
92
+ // The tokens of the segment.
93
+ Tokens []Token
94
+ }
95
+
96
+ // Token is a text or special token
97
+ type Token struct {
98
+ Id int
99
+ Text string
100
+ P float32
101
+ Start, End time.Duration
102
+ }
bindings/go/pkg/whisper/model.go ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper
2
+
3
+ import (
4
+ "fmt"
5
+ "os"
6
+ "runtime"
7
+
8
+ // Bindings
9
+ whisper "github.com/ggerganov/whisper.cpp/bindings/go"
10
+ )
11
+
12
+ ///////////////////////////////////////////////////////////////////////////////
13
+ // TYPES
14
+
15
+ type model struct {
16
+ path string
17
+ ctx *whisper.Context
18
+ }
19
+
20
+ // Make sure model adheres to the interface
21
+ var _ Model = (*model)(nil)
22
+
23
+ ///////////////////////////////////////////////////////////////////////////////
24
+ // LIFECYCLE
25
+
26
+ func New(path string) (Model, error) {
27
+ model := new(model)
28
+ if _, err := os.Stat(path); err != nil {
29
+ return nil, err
30
+ } else if ctx := whisper.Whisper_init(path); ctx == nil {
31
+ return nil, ErrUnableToLoadModel
32
+ } else {
33
+ model.ctx = ctx
34
+ model.path = path
35
+ }
36
+
37
+ // Return success
38
+ return model, nil
39
+ }
40
+
41
+ func (model *model) Close() error {
42
+ if model.ctx != nil {
43
+ model.ctx.Whisper_free()
44
+ }
45
+
46
+ // Release resources
47
+ model.ctx = nil
48
+
49
+ // Return success
50
+ return nil
51
+ }
52
+
53
+ ///////////////////////////////////////////////////////////////////////////////
54
+ // STRINGIFY
55
+
56
+ func (model *model) String() string {
57
+ str := "<whisper.model"
58
+ if model.ctx != nil {
59
+ str += fmt.Sprintf(" model=%q", model.path)
60
+ }
61
+ return str + ">"
62
+ }
63
+
64
+ ///////////////////////////////////////////////////////////////////////////////
65
+ // PUBLIC METHODS
66
+
67
+ // Return true if model is multilingual (language and translation options are supported)
68
+ func (model *model) IsMultilingual() bool {
69
+ return model.ctx.Whisper_is_multilingual() != 0
70
+ }
71
+
72
+ // Return all recognized languages. Initially it is set to auto-detect
73
+ func (model *model) Languages() []string {
74
+ result := make([]string, 0, whisper.Whisper_lang_max_id())
75
+ for i := 0; i < whisper.Whisper_lang_max_id(); i++ {
76
+ str := whisper.Whisper_lang_str(i)
77
+ if model.ctx.Whisper_lang_id(str) >= 0 {
78
+ result = append(result, str)
79
+ }
80
+ }
81
+ return result
82
+ }
83
+
84
+ func (model *model) NewContext() (Context, error) {
85
+ if model.ctx == nil {
86
+ return nil, ErrInternalAppError
87
+ }
88
+
89
+ // Create new context
90
+ params := model.ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY)
91
+ params.SetTranslate(false)
92
+ params.SetPrintSpecial(false)
93
+ params.SetPrintProgress(false)
94
+ params.SetPrintRealtime(false)
95
+ params.SetPrintTimestamps(false)
96
+ params.SetThreads(runtime.NumCPU())
97
+ params.SetNoContext(true)
98
+
99
+ // Return new context
100
+ return newContext(model, params)
101
+ }
bindings/go/samples/jfk.wav ADDED
Binary file (352 kB). View file
 
bindings/go/whisper.go ADDED
@@ -0,0 +1,468 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper
2
+
3
+ import (
4
+ "errors"
5
+ "unsafe"
6
+ )
7
+
8
+ ///////////////////////////////////////////////////////////////////////////////
9
+ // CGO
10
+
11
+ /*
12
+ #cgo LDFLAGS: -lwhisper -lm -lstdc++ -fopenmp
13
+ #cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
14
+ #include <whisper.h>
15
+ #include <stdlib.h>
16
+
17
+ extern void callNewSegment(void* user_data, int new);
18
+ extern void callProgress(void* user_data, int progress);
19
+ extern bool callEncoderBegin(void* user_data);
20
+
21
+ // Text segment callback
22
+ // Called on every newly generated text segment
23
+ // Use the whisper_full_...() functions to obtain the text segments
24
+ static void whisper_new_segment_cb(struct whisper_context* ctx, struct whisper_state* state, int n_new, void* user_data) {
25
+ if(user_data != NULL && ctx != NULL) {
26
+ callNewSegment(user_data, n_new);
27
+ }
28
+ }
29
+
30
+ // Progress callback
31
+ // Called on every newly generated text segment
32
+ // Use the whisper_full_...() functions to obtain the text segments
33
+ static void whisper_progress_cb(struct whisper_context* ctx, struct whisper_state* state, int progress, void* user_data) {
34
+ if(user_data != NULL && ctx != NULL) {
35
+ callProgress(user_data, progress);
36
+ }
37
+ }
38
+
39
+ // Encoder begin callback
40
+ // If not NULL, called before the encoder starts
41
+ // If it returns false, the computation is aborted
42
+ static bool whisper_encoder_begin_cb(struct whisper_context* ctx, struct whisper_state* state, void* user_data) {
43
+ if(user_data != NULL && ctx != NULL) {
44
+ return callEncoderBegin(user_data);
45
+ }
46
+ return false;
47
+ }
48
+
49
+ // Get default parameters and set callbacks
50
+ static struct whisper_full_params whisper_full_default_params_cb(struct whisper_context* ctx, enum whisper_sampling_strategy strategy) {
51
+ struct whisper_full_params params = whisper_full_default_params(strategy);
52
+ params.new_segment_callback = whisper_new_segment_cb;
53
+ params.new_segment_callback_user_data = (void*)(ctx);
54
+ params.encoder_begin_callback = whisper_encoder_begin_cb;
55
+ params.encoder_begin_callback_user_data = (void*)(ctx);
56
+ params.progress_callback = whisper_progress_cb;
57
+ params.progress_callback_user_data = (void*)(ctx);
58
+ return params;
59
+ }
60
+ */
61
+ import "C"
62
+
63
+ ///////////////////////////////////////////////////////////////////////////////
64
+ // TYPES
65
+
66
+ type (
67
+ Context C.struct_whisper_context
68
+ Token C.whisper_token
69
+ TokenData C.struct_whisper_token_data
70
+ SamplingStrategy C.enum_whisper_sampling_strategy
71
+ Params C.struct_whisper_full_params
72
+ )
73
+
74
+ ///////////////////////////////////////////////////////////////////////////////
75
+ // GLOBALS
76
+
77
+ const (
78
+ SAMPLING_GREEDY SamplingStrategy = C.WHISPER_SAMPLING_GREEDY
79
+ SAMPLING_BEAM_SEARCH SamplingStrategy = C.WHISPER_SAMPLING_BEAM_SEARCH
80
+ )
81
+
82
+ const (
83
+ SampleRate = C.WHISPER_SAMPLE_RATE // Expected sample rate, samples per second
84
+ SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits
85
+ NumFFT = C.WHISPER_N_FFT
86
+ HopLength = C.WHISPER_HOP_LENGTH
87
+ ChunkSize = C.WHISPER_CHUNK_SIZE
88
+ )
89
+
90
+ var (
91
+ ErrTokenizerFailed = errors.New("whisper_tokenize failed")
92
+ ErrAutoDetectFailed = errors.New("whisper_lang_auto_detect failed")
93
+ ErrConversionFailed = errors.New("whisper_convert failed")
94
+ ErrInvalidLanguage = errors.New("invalid language")
95
+ )
96
+
97
+ ///////////////////////////////////////////////////////////////////////////////
98
+ // PUBLIC METHODS
99
+
100
+ // Allocates all memory needed for the model and loads the model from the given file.
101
+ // Returns NULL on failure.
102
+ func Whisper_init(path string) *Context {
103
+ cPath := C.CString(path)
104
+ defer C.free(unsafe.Pointer(cPath))
105
+ if ctx := C.whisper_init_from_file_with_params(cPath, C.whisper_context_default_params()); ctx != nil {
106
+ return (*Context)(ctx)
107
+ } else {
108
+ return nil
109
+ }
110
+ }
111
+
112
+ // Frees all memory allocated by the model.
113
+ func (ctx *Context) Whisper_free() {
114
+ C.whisper_free((*C.struct_whisper_context)(ctx))
115
+ }
116
+
117
+ // Convert RAW PCM audio to log mel spectrogram.
118
+ // The resulting spectrogram is stored inside the provided whisper context.
119
+ func (ctx *Context) Whisper_pcm_to_mel(data []float32, threads int) error {
120
+ if C.whisper_pcm_to_mel((*C.struct_whisper_context)(ctx), (*C.float)(&data[0]), C.int(len(data)), C.int(threads)) == 0 {
121
+ return nil
122
+ } else {
123
+ return ErrConversionFailed
124
+ }
125
+ }
126
+
127
+ // This can be used to set a custom log mel spectrogram inside the provided whisper context.
128
+ // Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
129
+ // n_mel must be 80
130
+ func (ctx *Context) Whisper_set_mel(data []float32, n_mel int) error {
131
+ if C.whisper_set_mel((*C.struct_whisper_context)(ctx), (*C.float)(&data[0]), C.int(len(data)), C.int(n_mel)) == 0 {
132
+ return nil
133
+ } else {
134
+ return ErrConversionFailed
135
+ }
136
+ }
137
+
138
+ // Run the Whisper encoder on the log mel spectrogram stored inside the provided whisper context.
139
+ // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
140
+ // offset can be used to specify the offset of the first frame in the spectrogram.
141
+ func (ctx *Context) Whisper_encode(offset, threads int) error {
142
+ if C.whisper_encode((*C.struct_whisper_context)(ctx), C.int(offset), C.int(threads)) == 0 {
143
+ return nil
144
+ } else {
145
+ return ErrConversionFailed
146
+ }
147
+ }
148
+
149
+ // Run the Whisper decoder to obtain the logits and probabilities for the next token.
150
+ // Make sure to call whisper_encode() first.
151
+ // tokens + n_tokens is the provided context for the decoder.
152
+ // n_past is the number of tokens to use from previous decoder calls.
153
+ func (ctx *Context) Whisper_decode(tokens []Token, past, threads int) error {
154
+ if C.whisper_decode((*C.struct_whisper_context)(ctx), (*C.whisper_token)(&tokens[0]), C.int(len(tokens)), C.int(past), C.int(threads)) == 0 {
155
+ return nil
156
+ } else {
157
+ return ErrConversionFailed
158
+ }
159
+ }
160
+
161
+ // Convert the provided text into tokens. The tokens pointer must be large enough to hold the resulting tokens.
162
+ // Returns the number of tokens on success
163
+ func (ctx *Context) Whisper_tokenize(text string, tokens []Token) (int, error) {
164
+ cText := C.CString(text)
165
+ defer C.free(unsafe.Pointer(cText))
166
+ if n := C.whisper_tokenize((*C.struct_whisper_context)(ctx), cText, (*C.whisper_token)(&tokens[0]), C.int(len(tokens))); n >= 0 {
167
+ return int(n), nil
168
+ } else {
169
+ return 0, ErrTokenizerFailed
170
+ }
171
+ }
172
+
173
+ // Return the id of the specified language, returns -1 if not found
174
+ // Examples:
175
+ //
176
+ // "de" -> 2
177
+ // "german" -> 2
178
+ func (ctx *Context) Whisper_lang_id(lang string) int {
179
+ return int(C.whisper_lang_id(C.CString(lang)))
180
+ }
181
+
182
+ // Largest language id (i.e. number of available languages - 1)
183
+ func Whisper_lang_max_id() int {
184
+ return int(C.whisper_lang_max_id())
185
+ }
186
+
187
+ // Return the short string of the specified language id (e.g. 2 -> "de"),
188
+ // returns empty string if not found
189
+ func Whisper_lang_str(id int) string {
190
+ return C.GoString(C.whisper_lang_str(C.int(id)))
191
+ }
192
+
193
+ // Use mel data at offset_ms to try and auto-detect the spoken language
194
+ // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
195
+ // Returns the probabilities of all languages.
196
+ // ref: https://github.com/openai/whisper/blob/main/whisper/decoding.py#L18-L69
197
+ func (ctx *Context) Whisper_lang_auto_detect(offset_ms, n_threads int) ([]float32, error) {
198
+ probs := make([]float32, Whisper_lang_max_id()+1)
199
+ if n := int(C.whisper_lang_auto_detect((*C.struct_whisper_context)(ctx), C.int(offset_ms), C.int(n_threads), (*C.float)(&probs[0]))); n < 0 {
200
+ return nil, ErrAutoDetectFailed
201
+ } else {
202
+ return probs, nil
203
+ }
204
+ }
205
+
206
+ func (ctx *Context) Whisper_n_len() int {
207
+ return int(C.whisper_n_len((*C.struct_whisper_context)(ctx)))
208
+ }
209
+
210
+ func (ctx *Context) Whisper_n_vocab() int {
211
+ return int(C.whisper_n_vocab((*C.struct_whisper_context)(ctx)))
212
+ }
213
+
214
+ func (ctx *Context) Whisper_n_text_ctx() int {
215
+ return int(C.whisper_n_text_ctx((*C.struct_whisper_context)(ctx)))
216
+ }
217
+
218
+ func (ctx *Context) Whisper_n_audio_ctx() int {
219
+ return int(C.whisper_n_audio_ctx((*C.struct_whisper_context)(ctx)))
220
+ }
221
+
222
+ func (ctx *Context) Whisper_is_multilingual() int {
223
+ return int(C.whisper_is_multilingual((*C.struct_whisper_context)(ctx)))
224
+ }
225
+
226
+ // The probabilities for the next token
227
+ //func (ctx *Whisper_context) Whisper_get_probs() []float32 {
228
+ // return (*[1 << 30]float32)(unsafe.Pointer(C.whisper_get_probs((*C.struct_whisper_context)(ctx))))[:ctx.Whisper_n_vocab()]
229
+ //}
230
+
231
+ // Token Id -> String. Uses the vocabulary in the provided context
232
+ func (ctx *Context) Whisper_token_to_str(token Token) string {
233
+ return C.GoString(C.whisper_token_to_str((*C.struct_whisper_context)(ctx), C.whisper_token(token)))
234
+ }
235
+
236
+ // Special tokens
237
+ func (ctx *Context) Whisper_token_eot() Token {
238
+ return Token(C.whisper_token_eot((*C.struct_whisper_context)(ctx)))
239
+ }
240
+
241
+ // Special tokens
242
+ func (ctx *Context) Whisper_token_sot() Token {
243
+ return Token(C.whisper_token_sot((*C.struct_whisper_context)(ctx)))
244
+ }
245
+
246
+ // Special tokens
247
+ func (ctx *Context) Whisper_token_prev() Token {
248
+ return Token(C.whisper_token_prev((*C.struct_whisper_context)(ctx)))
249
+ }
250
+
251
+ // Special tokens
252
+ func (ctx *Context) Whisper_token_solm() Token {
253
+ return Token(C.whisper_token_solm((*C.struct_whisper_context)(ctx)))
254
+ }
255
+
256
+ // Special tokens
257
+ func (ctx *Context) Whisper_token_not() Token {
258
+ return Token(C.whisper_token_not((*C.struct_whisper_context)(ctx)))
259
+ }
260
+
261
+ // Special tokens
262
+ func (ctx *Context) Whisper_token_beg() Token {
263
+ return Token(C.whisper_token_beg((*C.struct_whisper_context)(ctx)))
264
+ }
265
+
266
+ // Special tokens
267
+ func (ctx *Context) Whisper_token_lang(lang_id int) Token {
268
+ return Token(C.whisper_token_lang((*C.struct_whisper_context)(ctx), C.int(lang_id)))
269
+ }
270
+
271
+ // Task tokens
272
+ func (ctx *Context) Whisper_token_translate() Token {
273
+ return Token(C.whisper_token_translate((*C.struct_whisper_context)(ctx)))
274
+ }
275
+
276
+ // Task tokens
277
+ func (ctx *Context) Whisper_token_transcribe() Token {
278
+ return Token(C.whisper_token_transcribe((*C.struct_whisper_context)(ctx)))
279
+ }
280
+
281
+ // Performance information
282
+ func (ctx *Context) Whisper_print_timings() {
283
+ C.whisper_print_timings((*C.struct_whisper_context)(ctx))
284
+ }
285
+
286
+ // Performance information
287
+ func (ctx *Context) Whisper_reset_timings() {
288
+ C.whisper_reset_timings((*C.struct_whisper_context)(ctx))
289
+ }
290
+
291
+ // Print system information
292
+ func Whisper_print_system_info() string {
293
+ return C.GoString(C.whisper_print_system_info())
294
+ }
295
+
296
+ // Return default parameters for a strategy
297
+ func (ctx *Context) Whisper_full_default_params(strategy SamplingStrategy) Params {
298
+ // Get default parameters
299
+ return Params(C.whisper_full_default_params_cb((*C.struct_whisper_context)(ctx), C.enum_whisper_sampling_strategy(strategy)))
300
+ }
301
+
302
+ // Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
303
+ // Uses the specified decoding strategy to obtain the text.
304
+ func (ctx *Context) Whisper_full(
305
+ params Params,
306
+ samples []float32,
307
+ encoderBeginCallback func() bool,
308
+ newSegmentCallback func(int),
309
+ progressCallback func(int),
310
+ ) error {
311
+ registerEncoderBeginCallback(ctx, encoderBeginCallback)
312
+ registerNewSegmentCallback(ctx, newSegmentCallback)
313
+ registerProgressCallback(ctx, progressCallback)
314
+ defer registerEncoderBeginCallback(ctx, nil)
315
+ defer registerNewSegmentCallback(ctx, nil)
316
+ defer registerProgressCallback(ctx, nil)
317
+ if C.whisper_full((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples))) == 0 {
318
+ return nil
319
+ } else {
320
+ return ErrConversionFailed
321
+ }
322
+ }
323
+
324
+ // Split the input audio in chunks and process each chunk separately using whisper_full()
325
+ // It seems this approach can offer some speedup in some cases.
326
+ // However, the transcription accuracy can be worse at the beginning and end of each chunk.
327
+ func (ctx *Context) Whisper_full_parallel(params Params, samples []float32, processors int, encoderBeginCallback func() bool, newSegmentCallback func(int)) error {
328
+ registerEncoderBeginCallback(ctx, encoderBeginCallback)
329
+ registerNewSegmentCallback(ctx, newSegmentCallback)
330
+ defer registerEncoderBeginCallback(ctx, nil)
331
+ defer registerNewSegmentCallback(ctx, nil)
332
+
333
+ if C.whisper_full_parallel((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples)), C.int(processors)) == 0 {
334
+ return nil
335
+ } else {
336
+ return ErrConversionFailed
337
+ }
338
+ }
339
+
340
+ // Return the id of the autodetected language, returns -1 if not found
341
+ // Added to whisper.cpp in
342
+ // https://github.com/ggerganov/whisper.cpp/commit/a1c1583cc7cd8b75222857afc936f0638c5683d6
343
+ //
344
+ // Examples:
345
+ //
346
+ // "de" -> 2
347
+ // "german" -> 2
348
+ func (ctx *Context) Whisper_full_lang_id() int {
349
+ return int(C.whisper_full_lang_id((*C.struct_whisper_context)(ctx)))
350
+ }
351
+
352
+ // Number of generated text segments.
353
+ // A segment can be a few words, a sentence, or even a paragraph.
354
+ func (ctx *Context) Whisper_full_n_segments() int {
355
+ return int(C.whisper_full_n_segments((*C.struct_whisper_context)(ctx)))
356
+ }
357
+
358
+ // Get the start and end time of the specified segment.
359
+ func (ctx *Context) Whisper_full_get_segment_t0(segment int) int64 {
360
+ return int64(C.whisper_full_get_segment_t0((*C.struct_whisper_context)(ctx), C.int(segment)))
361
+ }
362
+
363
+ // Get the start and end time of the specified segment.
364
+ func (ctx *Context) Whisper_full_get_segment_t1(segment int) int64 {
365
+ return int64(C.whisper_full_get_segment_t1((*C.struct_whisper_context)(ctx), C.int(segment)))
366
+ }
367
+
368
+ // Get the text of the specified segment.
369
+ func (ctx *Context) Whisper_full_get_segment_text(segment int) string {
370
+ return C.GoString(C.whisper_full_get_segment_text((*C.struct_whisper_context)(ctx), C.int(segment)))
371
+ }
372
+
373
+ // Get number of tokens in the specified segment.
374
+ func (ctx *Context) Whisper_full_n_tokens(segment int) int {
375
+ return int(C.whisper_full_n_tokens((*C.struct_whisper_context)(ctx), C.int(segment)))
376
+ }
377
+
378
+ // Get the token text of the specified token index in the specified segment.
379
+ func (ctx *Context) Whisper_full_get_token_text(segment int, token int) string {
380
+ return C.GoString(C.whisper_full_get_token_text((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
381
+ }
382
+
383
+ // Get the token of the specified token index in the specified segment.
384
+ func (ctx *Context) Whisper_full_get_token_id(segment int, token int) Token {
385
+ return Token(C.whisper_full_get_token_id((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
386
+ }
387
+
388
+ // Get token data for the specified token in the specified segment.
389
+ // This contains probabilities, timestamps, etc.
390
+ func (ctx *Context) Whisper_full_get_token_data(segment int, token int) TokenData {
391
+ return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
392
+ }
393
+
394
+ // Get the probability of the specified token in the specified segment.
395
+ func (ctx *Context) Whisper_full_get_token_p(segment int, token int) float32 {
396
+ return float32(C.whisper_full_get_token_p((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
397
+ }
398
+
399
+ ///////////////////////////////////////////////////////////////////////////////
400
+ // CALLBACKS
401
+
402
+ var (
403
+ cbNewSegment = make(map[unsafe.Pointer]func(int))
404
+ cbProgress = make(map[unsafe.Pointer]func(int))
405
+ cbEncoderBegin = make(map[unsafe.Pointer]func() bool)
406
+ )
407
+
408
+ func registerNewSegmentCallback(ctx *Context, fn func(int)) {
409
+ if fn == nil {
410
+ delete(cbNewSegment, unsafe.Pointer(ctx))
411
+ } else {
412
+ cbNewSegment[unsafe.Pointer(ctx)] = fn
413
+ }
414
+ }
415
+
416
+ func registerProgressCallback(ctx *Context, fn func(int)) {
417
+ if fn == nil {
418
+ delete(cbProgress, unsafe.Pointer(ctx))
419
+ } else {
420
+ cbProgress[unsafe.Pointer(ctx)] = fn
421
+ }
422
+ }
423
+
424
+ func registerEncoderBeginCallback(ctx *Context, fn func() bool) {
425
+ if fn == nil {
426
+ delete(cbEncoderBegin, unsafe.Pointer(ctx))
427
+ } else {
428
+ cbEncoderBegin[unsafe.Pointer(ctx)] = fn
429
+ }
430
+ }
431
+
432
+ //export callNewSegment
433
+ func callNewSegment(user_data unsafe.Pointer, new C.int) {
434
+ if fn, ok := cbNewSegment[user_data]; ok {
435
+ fn(int(new))
436
+ }
437
+ }
438
+
439
+ //export callProgress
440
+ func callProgress(user_data unsafe.Pointer, progress C.int) {
441
+ if fn, ok := cbProgress[user_data]; ok {
442
+ fn(int(progress))
443
+ }
444
+ }
445
+
446
+ //export callEncoderBegin
447
+ func callEncoderBegin(user_data unsafe.Pointer) C.bool {
448
+ if fn, ok := cbEncoderBegin[user_data]; ok {
449
+ if fn() {
450
+ return C.bool(true)
451
+ } else {
452
+ return C.bool(false)
453
+ }
454
+ }
455
+ return true
456
+ }
457
+
458
+ func (t TokenData) T0() int64 {
459
+ return int64(t.t0)
460
+ }
461
+
462
+ func (t TokenData) T1() int64 {
463
+ return int64(t.t1)
464
+ }
465
+
466
+ func (t TokenData) Id() Token {
467
+ return Token(t.id)
468
+ }
bindings/go/whisper_test.go ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package whisper_test
2
+
3
+ import (
4
+ "os"
5
+ "runtime"
6
+ "testing"
7
+ "time"
8
+
9
+ // Packages
10
+ whisper "github.com/ggerganov/whisper.cpp/bindings/go"
11
+ wav "github.com/go-audio/wav"
12
+ assert "github.com/stretchr/testify/assert"
13
+ )
14
+
15
+ const (
16
+ ModelPath = "models/ggml-small.en.bin"
17
+ SamplePath = "samples/jfk.wav"
18
+ )
19
+
20
+ func Test_Whisper_000(t *testing.T) {
21
+ assert := assert.New(t)
22
+ if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
23
+ t.Skip("Skipping test, model not found:", ModelPath)
24
+ }
25
+ ctx := whisper.Whisper_init(ModelPath)
26
+ assert.NotNil(ctx)
27
+ ctx.Whisper_free()
28
+ }
29
+
30
+ func Test_Whisper_001(t *testing.T) {
31
+ assert := assert.New(t)
32
+ if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
33
+ t.Skip("Skipping test, model not found:", ModelPath)
34
+ }
35
+ if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
36
+ t.Skip("Skipping test, sample not found:", SamplePath)
37
+ }
38
+
39
+ // Open samples
40
+ fh, err := os.Open(SamplePath)
41
+ assert.NoError(err)
42
+ defer fh.Close()
43
+
44
+ // Read samples
45
+ d := wav.NewDecoder(fh)
46
+ buf, err := d.FullPCMBuffer()
47
+ assert.NoError(err)
48
+
49
+ // Run whisper
50
+ ctx := whisper.Whisper_init(ModelPath)
51
+ assert.NotNil(ctx)
52
+ defer ctx.Whisper_free()
53
+ params := ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY)
54
+ data := buf.AsFloat32Buffer().Data
55
+ err = ctx.Whisper_full(params, data, nil, nil, nil)
56
+ assert.NoError(err)
57
+
58
+ // Print out tokens
59
+ num_segments := ctx.Whisper_full_n_segments()
60
+ assert.GreaterOrEqual(num_segments, 1)
61
+ for i := 0; i < num_segments; i++ {
62
+ str := ctx.Whisper_full_get_segment_text(i)
63
+ assert.NotEmpty(str)
64
+ t0 := time.Duration(ctx.Whisper_full_get_segment_t0(i)) * time.Millisecond
65
+ t1 := time.Duration(ctx.Whisper_full_get_segment_t1(i)) * time.Millisecond
66
+ t.Logf("[%6s->%-6s] %q", t0, t1, str)
67
+ }
68
+ }
69
+
70
+ func Test_Whisper_002(t *testing.T) {
71
+ assert := assert.New(t)
72
+ for i := 0; i < whisper.Whisper_lang_max_id(); i++ {
73
+ str := whisper.Whisper_lang_str(i)
74
+ assert.NotEmpty(str)
75
+ t.Log(str)
76
+ }
77
+ }
78
+
79
+ func Test_Whisper_003(t *testing.T) {
80
+ threads := runtime.NumCPU()
81
+ assert := assert.New(t)
82
+ if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
83
+ t.Skip("Skipping test, model not found:", ModelPath)
84
+ }
85
+ if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
86
+ t.Skip("Skipping test, sample not found:", SamplePath)
87
+ }
88
+
89
+ // Open samples
90
+ fh, err := os.Open(SamplePath)
91
+ assert.NoError(err)
92
+ defer fh.Close()
93
+
94
+ // Read samples
95
+ d := wav.NewDecoder(fh)
96
+ buf, err := d.FullPCMBuffer()
97
+ assert.NoError(err)
98
+
99
+ // Make the model
100
+ ctx := whisper.Whisper_init(ModelPath)
101
+ assert.NotNil(ctx)
102
+ defer ctx.Whisper_free()
103
+
104
+ // Get MEL
105
+ assert.NoError(ctx.Whisper_pcm_to_mel(buf.AsFloat32Buffer().Data, threads))
106
+
107
+ // Get Languages
108
+ languages, err := ctx.Whisper_lang_auto_detect(0, threads)
109
+ assert.NoError(err)
110
+ for i, p := range languages {
111
+ t.Logf("%s: %f", whisper.Whisper_lang_str(i), p)
112
+ }
113
+ }
bindings/java/.idea/uiDesigner.xml ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Palette2">
4
+ <group name="Swing">
5
+ <item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
6
+ <default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
7
+ </item>
8
+ <item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
9
+ <default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
10
+ </item>
11
+ <item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.svg" removable="false" auto-create-binding="false" can-attach-label="false">
12
+ <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
13
+ </item>
14
+ <item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.svg" removable="false" auto-create-binding="false" can-attach-label="true">
15
+ <default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
16
+ </item>
17
+ <item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.svg" removable="false" auto-create-binding="true" can-attach-label="false">
18
+ <default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
19
+ <initial-values>
20
+ <property name="text" value="Button" />
21
+ </initial-values>
22
+ </item>
23
+ <item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.svg" removable="false" auto-create-binding="true" can-attach-label="false">
24
+ <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
25
+ <initial-values>
26
+ <property name="text" value="RadioButton" />
27
+ </initial-values>
28
+ </item>
29
+ <item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.svg" removable="false" auto-create-binding="true" can-attach-label="false">
30
+ <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
31
+ <initial-values>
32
+ <property name="text" value="CheckBox" />
33
+ </initial-values>
34
+ </item>
35
+ <item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.svg" removable="false" auto-create-binding="false" can-attach-label="false">
36
+ <default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
37
+ <initial-values>
38
+ <property name="text" value="Label" />
39
+ </initial-values>
40
+ </item>
41
+ <item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
42
+ <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
43
+ <preferred-size width="150" height="-1" />
44
+ </default-constraints>
45
+ </item>
46
+ <item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
47
+ <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
48
+ <preferred-size width="150" height="-1" />
49
+ </default-constraints>
50
+ </item>
51
+ <item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
52
+ <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
53
+ <preferred-size width="150" height="-1" />
54
+ </default-constraints>
55
+ </item>
56
+ <item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.svg" removable="false" auto-create-binding="true" can-attach-label="true">
57
+ <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
58
+ <preferred-size width="150" height="50" />
59
+ </default-constraints>
60
+ </item>
61
+ <item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
62
+ <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
63
+ <preferred-size width="150" height="50" />
64
+ </default-constraints>
65
+ </item>
66
+ <item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
67
+ <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
68
+ <preferred-size width="150" height="50" />
69
+ </default-constraints>
70
+ </item>
71
+ <item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.svg" removable="false" auto-create-binding="true" can-attach-label="true">
72
+ <default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
73
+ </item>
74
+ <item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.svg" removable="false" auto-create-binding="true" can-attach-label="false">
75
+ <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
76
+ <preferred-size width="150" height="50" />
77
+ </default-constraints>
78
+ </item>
79
+ <item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.svg" removable="false" auto-create-binding="true" can-attach-label="false">
80
+ <default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
81
+ <preferred-size width="150" height="50" />
82
+ </default-constraints>
83
+ </item>
84
+ <item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.svg" removable="false" auto-create-binding="true" can-attach-label="false">
85
+ <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
86
+ <preferred-size width="150" height="50" />
87
+ </default-constraints>
88
+ </item>
89
+ <item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.svg" removable="false" auto-create-binding="true" can-attach-label="false">
90
+ <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
91
+ <preferred-size width="200" height="200" />
92
+ </default-constraints>
93
+ </item>
94
+ <item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.svg" removable="false" auto-create-binding="false" can-attach-label="false">
95
+ <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
96
+ <preferred-size width="200" height="200" />
97
+ </default-constraints>
98
+ </item>
99
+ <item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.svg" removable="false" auto-create-binding="true" can-attach-label="true">
100
+ <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
101
+ </item>
102
+ <item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.svg" removable="false" auto-create-binding="true" can-attach-label="false">
103
+ <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
104
+ </item>
105
+ <item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
106
+ <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
107
+ </item>
108
+ <item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
109
+ <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
110
+ </item>
111
+ <item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.svg" removable="false" auto-create-binding="false" can-attach-label="false">
112
+ <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
113
+ <preferred-size width="-1" height="20" />
114
+ </default-constraints>
115
+ </item>
116
+ <item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
117
+ <default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
118
+ </item>
119
+ <item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
120
+ <default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
121
+ </item>
122
+ </group>
123
+ </component>
124
+ </project>
bindings/java/README.md ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Java JNI bindings for Whisper
2
+
3
+ This package provides Java JNI bindings for whisper.cpp. They have been tested on:
4
+
5
+ * <strike>Darwin (OS X) 12.6 on x64_64</strike>
6
+ * Ubuntu on x86_64
7
+ * Windows on x86_64
8
+
9
+ The "low level" bindings are in `WhisperCppJnaLibrary`. The most simple usage is as follows:
10
+
11
+ JNA will attempt to load the `whispercpp` shared library from:
12
+
13
+ - jna.library.path
14
+ - jna.platform.library
15
+ - ~/Library/Frameworks
16
+ - /Library/Frameworks
17
+ - /System/Library/Frameworks
18
+ - classpath
19
+
20
+ ```java
21
+ import io.github.ggerganov.whispercpp.WhisperCpp;
22
+
23
+ public class Example {
24
+
25
+ public static void main(String[] args) {
26
+ WhisperCpp whisper = new WhisperCpp();
27
+ // By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin"
28
+ // or you can provide the absolute path to the model file.
29
+ long context = whisper.initContext("base.en");
30
+ try {
31
+ var whisperParams = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
32
+ // custom configuration if required
33
+ whisperParams.temperature_inc = 0f;
34
+
35
+ var samples = readAudio(); // divide each value by 32767.0f
36
+ whisper.fullTranscribe(whisperParams, samples);
37
+
38
+ int segmentCount = whisper.getTextSegmentCount(context);
39
+ for (int i = 0; i < segmentCount; i++) {
40
+ String text = whisper.getTextSegment(context, i);
41
+ System.out.println(segment.getText());
42
+ }
43
+ } finally {
44
+ whisper.freeContext(context);
45
+ }
46
+ }
47
+ }
48
+ ```
49
+
50
+ ## Building & Testing
51
+
52
+ In order to build, you need to have the JDK 8 or higher installed. Run the tests with:
53
+
54
+ ```bash
55
+ git clone https://github.com/ggerganov/whisper.cpp.git
56
+ cd whisper.cpp/bindings/java
57
+
58
+ ./gradlew build
59
+ ```
60
+
61
+ You need to have the `whisper` library in your [JNA library path](https://java-native-access.github.io/jna/4.2.1/com/sun/jna/NativeLibrary.html). On Windows the dll is included in the jar and you can update it:
62
+
63
+ ```bash
64
+ copy /y ..\..\build\bin\Release\whisper.dll build\generated\resources\main\win32-x86-64\whisper.dll
65
+ ```
66
+
67
+
68
+ ## License
69
+
70
+ The license for the Go bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
71
+
bindings/java/build.gradle ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ plugins {
2
+ id 'java'
3
+ id 'java-library'
4
+ id 'maven-publish'
5
+ id 'signing'
6
+ }
7
+
8
+ archivesBaseName = 'whispercpp'
9
+ group = 'io.github.ggerganov'
10
+ version = '1.4.0'
11
+
12
+
13
+ sourceCompatibility = 1.8
14
+ targetCompatibility = 1.8
15
+
16
+ sourceSets {
17
+ main {
18
+ resources {
19
+ srcDirs = ['src/main/resources', 'build/generated/resources/main']
20
+ }
21
+ }
22
+ test {
23
+ runtimeClasspath += files('build/generated/resources/main')
24
+ }
25
+ }
26
+
27
+ tasks.register('copyLibwhisperDynlib', Copy) {
28
+ from '../../build'
29
+ include 'libwhisper.dynlib'
30
+ into 'build/generated/resources/main/darwin'
31
+ }
32
+
33
+ tasks.register('copyLibwhisperSo', Copy) {
34
+ from '../../build'
35
+ include 'libwhisper.so'
36
+ into 'build/generated/resources/main/linux-x86-64'
37
+ }
38
+
39
+ tasks.register('copyWhisperDll', Copy) {
40
+ from '../../build/Release'
41
+ include 'whisper.dll'
42
+ into 'build/generated/resources/main/windows-x86-64'
43
+ }
44
+
45
+ tasks.register('copyLibs') {
46
+ dependsOn copyLibwhisperDynlib, copyLibwhisperSo, copyWhisperDll
47
+ }
48
+
49
+ test {
50
+ systemProperty 'jna.library.path', project.file('build/generated/resources/main').absolutePath
51
+ }
52
+
53
+ java {
54
+ withSourcesJar()
55
+ withJavadocJar()
56
+ }
57
+
58
+ jar {
59
+ exclude '**/whisper_java.exp', '**/whisper_java.lib'
60
+ }
61
+
62
+ javadoc {
63
+ options.addStringOption('Xdoclint:none', '-quiet')
64
+ }
65
+
66
+ tasks.withType(Test) {
67
+ useJUnitPlatform()
68
+ }
69
+
70
+ dependencies {
71
+ implementation "net.java.dev.jna:jna:5.13.0"
72
+ testImplementation "org.junit.jupiter:junit-jupiter:5.9.2"
73
+ testImplementation "org.assertj:assertj-core:3.24.2"
74
+ }
75
+
76
+ repositories {
77
+ mavenCentral()
78
+ }
79
+
80
+ publishing {
81
+ publications {
82
+ mavenJava(MavenPublication) {
83
+ artifactId = 'whispercpp'
84
+ from components.java
85
+ pom {
86
+ name = 'whispercpp'
87
+ description = "Java JNA bindings for OpenAI's Whisper model, implemented in C/C++"
88
+ url = 'https://github.com/ggerganov/whisper.cpp'
89
+ licenses {
90
+ license {
91
+ name = 'MIT licence'
92
+ url = 'https://raw.githubusercontent.com/ggerganov/whisper.cpp/master/LICENSE'
93
+ }
94
+ }
95
+ developers {
96
+ developer {
97
+ id = 'ggerganov'
98
+ name = 'Georgi Gerganov'
99
+ email = 'ggerganov@gmail.com'
100
+ }
101
+ developer {
102
+ id = 'nalbion'
103
+ name = 'Nicholas Albion'
104
+ email = 'nalbion@yahoo.com'
105
+ }
106
+ }
107
+ scm {
108
+ connection = 'scm:git:git://github.com/ggerganov/whisper.cpp.git'
109
+ url = 'https://github.com/ggerganov/whisper.cpp'
110
+ }
111
+ }
112
+ }
113
+ }
114
+
115
+ repositories {
116
+ maven {
117
+ def releasesRepoUrl = 'https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/'
118
+ def snapshotsRepoUrl = 'https://s01.oss.sonatype.org/content/repositories/snapshots/'
119
+ url = version.endsWith('-SNAPSHOT') ? snapshotsRepoUrl : releasesRepoUrl
120
+ credentials {
121
+ username = System.getenv("MAVEN_USERNAME")
122
+ password = System.getenv("MAVEN_PASSWORD")
123
+ }
124
+ }
125
+ }
126
+ }
127
+
128
+ signing {
129
+ def signingKey = System.getenv("PGP_SECRET")
130
+ def signingPassword = System.getenv("PGP_PASSPHRASE")
131
+ useInMemoryPgpKeys(signingKey, signingPassword)
132
+ sign publishing.publications.mavenJava
133
+ }
bindings/java/gradle.properties ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ org.gradle.jvmargs=-Xms256m -Xmx1024m
2
+ system.include.dir=/usr/include
3
+ #system.local.include.dir=../../include
4
+ system.local.include.dir=./build/generated/sources/headers/java/main
5
+ jni.include.dir=/usr/lib/jvm/java-8-openjdk-amd64/include/
6
+ jni.lib.dir=/usr/lib/jvm/java-8-openjdk-amd64/lib/
bindings/java/gradle/wrapper/gradle-wrapper.jar ADDED
Binary file (61.6 kB). View file
 
bindings/java/gradle/wrapper/gradle-wrapper.properties ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ distributionBase=GRADLE_USER_HOME
2
+ distributionPath=wrapper/dists
3
+ distributionUrl=https\://services.gradle.org/distributions/gradle-8.1-bin.zip
4
+ networkTimeout=10000
5
+ zipStoreBase=GRADLE_USER_HOME
6
+ zipStorePath=wrapper/dists
bindings/java/gradlew ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ #
4
+ # Copyright © 2015-2021 the original authors.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+
19
+ ##############################################################################
20
+ #
21
+ # Gradle start up script for POSIX generated by Gradle.
22
+ #
23
+ # Important for running:
24
+ #
25
+ # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
26
+ # noncompliant, but you have some other compliant shell such as ksh or
27
+ # bash, then to run this script, type that shell name before the whole
28
+ # command line, like:
29
+ #
30
+ # ksh Gradle
31
+ #
32
+ # Busybox and similar reduced shells will NOT work, because this script
33
+ # requires all of these POSIX shell features:
34
+ # * functions;
35
+ # * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
36
+ # «${var#prefix}», «${var%suffix}», and «$( cmd )»;
37
+ # * compound commands having a testable exit status, especially «case»;
38
+ # * various built-in commands including «command», «set», and «ulimit».
39
+ #
40
+ # Important for patching:
41
+ #
42
+ # (2) This script targets any POSIX shell, so it avoids extensions provided
43
+ # by Bash, Ksh, etc; in particular arrays are avoided.
44
+ #
45
+ # The "traditional" practice of packing multiple parameters into a
46
+ # space-separated string is a well documented source of bugs and security
47
+ # problems, so this is (mostly) avoided, by progressively accumulating
48
+ # options in "$@", and eventually passing that to Java.
49
+ #
50
+ # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
51
+ # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
52
+ # see the in-line comments for details.
53
+ #
54
+ # There are tweaks for specific operating systems such as AIX, CygWin,
55
+ # Darwin, MinGW, and NonStop.
56
+ #
57
+ # (3) This script is generated from the Groovy template
58
+ # https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
59
+ # within the Gradle project.
60
+ #
61
+ # You can find Gradle at https://github.com/gradle/gradle/.
62
+ #
63
+ ##############################################################################
64
+
65
+ # Attempt to set APP_HOME
66
+
67
+ # Resolve links: $0 may be a link
68
+ app_path=$0
69
+
70
+ # Need this for daisy-chained symlinks.
71
+ while
72
+ APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
73
+ [ -h "$app_path" ]
74
+ do
75
+ ls=$( ls -ld "$app_path" )
76
+ link=${ls#*' -> '}
77
+ case $link in #(
78
+ /*) app_path=$link ;; #(
79
+ *) app_path=$APP_HOME$link ;;
80
+ esac
81
+ done
82
+
83
+ # This is normally unused
84
+ # shellcheck disable=SC2034
85
+ APP_BASE_NAME=${0##*/}
86
+ APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
87
+
88
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
89
+ DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
90
+
91
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
92
+ MAX_FD=maximum
93
+
94
+ warn () {
95
+ echo "$*"
96
+ } >&2
97
+
98
+ die () {
99
+ echo
100
+ echo "$*"
101
+ echo
102
+ exit 1
103
+ } >&2
104
+
105
+ # OS specific support (must be 'true' or 'false').
106
+ cygwin=false
107
+ msys=false
108
+ darwin=false
109
+ nonstop=false
110
+ case "$( uname )" in #(
111
+ CYGWIN* ) cygwin=true ;; #(
112
+ Darwin* ) darwin=true ;; #(
113
+ MSYS* | MINGW* ) msys=true ;; #(
114
+ NONSTOP* ) nonstop=true ;;
115
+ esac
116
+
117
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
118
+
119
+
120
+ # Determine the Java command to use to start the JVM.
121
+ if [ -n "$JAVA_HOME" ] ; then
122
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
123
+ # IBM's JDK on AIX uses strange locations for the executables
124
+ JAVACMD=$JAVA_HOME/jre/sh/java
125
+ else
126
+ JAVACMD=$JAVA_HOME/bin/java
127
+ fi
128
+ if [ ! -x "$JAVACMD" ] ; then
129
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
130
+
131
+ Please set the JAVA_HOME variable in your environment to match the
132
+ location of your Java installation."
133
+ fi
134
+ else
135
+ JAVACMD=java
136
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
137
+
138
+ Please set the JAVA_HOME variable in your environment to match the
139
+ location of your Java installation."
140
+ fi
141
+
142
+ # Increase the maximum file descriptors if we can.
143
+ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
144
+ case $MAX_FD in #(
145
+ max*)
146
+ # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
147
+ # shellcheck disable=SC3045
148
+ MAX_FD=$( ulimit -H -n ) ||
149
+ warn "Could not query maximum file descriptor limit"
150
+ esac
151
+ case $MAX_FD in #(
152
+ '' | soft) :;; #(
153
+ *)
154
+ # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
155
+ # shellcheck disable=SC3045
156
+ ulimit -n "$MAX_FD" ||
157
+ warn "Could not set maximum file descriptor limit to $MAX_FD"
158
+ esac
159
+ fi
160
+
161
+ # Collect all arguments for the java command, stacking in reverse order:
162
+ # * args from the command line
163
+ # * the main class name
164
+ # * -classpath
165
+ # * -D...appname settings
166
+ # * --module-path (only if needed)
167
+ # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
168
+
169
+ # For Cygwin or MSYS, switch paths to Windows format before running java
170
+ if "$cygwin" || "$msys" ; then
171
+ APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
172
+ CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
173
+
174
+ JAVACMD=$( cygpath --unix "$JAVACMD" )
175
+
176
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
177
+ for arg do
178
+ if
179
+ case $arg in #(
180
+ -*) false ;; # don't mess with options #(
181
+ /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
182
+ [ -e "$t" ] ;; #(
183
+ *) false ;;
184
+ esac
185
+ then
186
+ arg=$( cygpath --path --ignore --mixed "$arg" )
187
+ fi
188
+ # Roll the args list around exactly as many times as the number of
189
+ # args, so each arg winds up back in the position where it started, but
190
+ # possibly modified.
191
+ #
192
+ # NB: a `for` loop captures its iteration list before it begins, so
193
+ # changing the positional parameters here affects neither the number of
194
+ # iterations, nor the values presented in `arg`.
195
+ shift # remove old arg
196
+ set -- "$@" "$arg" # push replacement arg
197
+ done
198
+ fi
199
+
200
+ # Collect all arguments for the java command;
201
+ # * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
202
+ # shell script including quotes and variable substitutions, so put them in
203
+ # double quotes to make sure that they get re-expanded; and
204
+ # * put everything else in single quotes, so that it's not re-expanded.
205
+
206
+ set -- \
207
+ "-Dorg.gradle.appname=$APP_BASE_NAME" \
208
+ -classpath "$CLASSPATH" \
209
+ org.gradle.wrapper.GradleWrapperMain \
210
+ "$@"
211
+
212
+ # Stop when "xargs" is not available.
213
+ if ! command -v xargs >/dev/null 2>&1
214
+ then
215
+ die "xargs is not available"
216
+ fi
217
+
218
+ # Use "xargs" to parse quoted args.
219
+ #
220
+ # With -n1 it outputs one arg per line, with the quotes and backslashes removed.
221
+ #
222
+ # In Bash we could simply go:
223
+ #
224
+ # readarray ARGS < <( xargs -n1 <<<"$var" ) &&
225
+ # set -- "${ARGS[@]}" "$@"
226
+ #
227
+ # but POSIX shell has neither arrays nor command substitution, so instead we
228
+ # post-process each arg (as a line of input to sed) to backslash-escape any
229
+ # character that might be a shell metacharacter, then use eval to reverse
230
+ # that process (while maintaining the separation between arguments), and wrap
231
+ # the whole thing up as a single "set" statement.
232
+ #
233
+ # This will of course break if any of these variables contains a newline or
234
+ # an unmatched quote.
235
+ #
236
+
237
+ eval "set -- $(
238
+ printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
239
+ xargs -n1 |
240
+ sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
241
+ tr '\n' ' '
242
+ )" '"$@"'
243
+
244
+ exec "$JAVACMD" "$@"
bindings/java/gradlew.bat ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @rem
2
+ @rem Copyright 2015 the original author or authors.
3
+ @rem
4
+ @rem Licensed under the Apache License, Version 2.0 (the "License");
5
+ @rem you may not use this file except in compliance with the License.
6
+ @rem You may obtain a copy of the License at
7
+ @rem
8
+ @rem https://www.apache.org/licenses/LICENSE-2.0
9
+ @rem
10
+ @rem Unless required by applicable law or agreed to in writing, software
11
+ @rem distributed under the License is distributed on an "AS IS" BASIS,
12
+ @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ @rem See the License for the specific language governing permissions and
14
+ @rem limitations under the License.
15
+ @rem
16
+
17
+ @if "%DEBUG%"=="" @echo off
18
+ @rem ##########################################################################
19
+ @rem
20
+ @rem Gradle startup script for Windows
21
+ @rem
22
+ @rem ##########################################################################
23
+
24
+ @rem Set local scope for the variables with windows NT shell
25
+ if "%OS%"=="Windows_NT" setlocal
26
+
27
+ set DIRNAME=%~dp0
28
+ if "%DIRNAME%"=="" set DIRNAME=.
29
+ @rem This is normally unused
30
+ set APP_BASE_NAME=%~n0
31
+ set APP_HOME=%DIRNAME%
32
+
33
+ @rem Resolve any "." and ".." in APP_HOME to make it shorter.
34
+ for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
35
+
36
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
37
+ set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
38
+
39
+ @rem Find java.exe
40
+ if defined JAVA_HOME goto findJavaFromJavaHome
41
+
42
+ set JAVA_EXE=java.exe
43
+ %JAVA_EXE% -version >NUL 2>&1
44
+ if %ERRORLEVEL% equ 0 goto execute
45
+
46
+ echo.
47
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
48
+ echo.
49
+ echo Please set the JAVA_HOME variable in your environment to match the
50
+ echo location of your Java installation.
51
+
52
+ goto fail
53
+
54
+ :findJavaFromJavaHome
55
+ set JAVA_HOME=%JAVA_HOME:"=%
56
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
57
+
58
+ if exist "%JAVA_EXE%" goto execute
59
+
60
+ echo.
61
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
62
+ echo.
63
+ echo Please set the JAVA_HOME variable in your environment to match the
64
+ echo location of your Java installation.
65
+
66
+ goto fail
67
+
68
+ :execute
69
+ @rem Setup the command line
70
+
71
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
72
+
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if %ERRORLEVEL% equ 0 goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ set EXIT_CODE=%ERRORLEVEL%
85
+ if %EXIT_CODE% equ 0 set EXIT_CODE=1
86
+ if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
87
+ exit /b %EXIT_CODE%
88
+
89
+ :mainEnd
90
+ if "%OS%"=="Windows_NT" endlocal
91
+
92
+ :omega
bindings/java/settings.gradle ADDED
@@ -0,0 +1 @@
 
 
1
+ rootProject.name = "whispercpp"