Spaces:
Sleeping
Sleeping
Skriller0208
commited on
Upload 814 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .devops/cublas.Dockerfile +28 -0
- .devops/main-cuda.Dockerfile +40 -0
- .devops/main.Dockerfile +19 -0
- .github/workflows/bindings-go.yml +22 -0
- .github/workflows/bindings-ruby.yml.disabled +23 -0
- .github/workflows/build.yml +669 -0
- .github/workflows/docker.yml +57 -0
- .github/workflows/examples.yml +48 -0
- .gitignore +54 -0
- .gitmodules +0 -0
- AUTHORS +301 -0
- CMakeLists.txt +185 -0
- LICENSE +21 -0
- Makefile +2 -1
- Package.swift +60 -0
- README.md +829 -10
- README_sycl.md +249 -0
- bindings/CMakeLists.txt +19 -0
- bindings/go/.gitignore +2 -0
- bindings/go/LICENSE +21 -0
- bindings/go/Makefile +64 -0
- bindings/go/README.md +100 -0
- bindings/go/doc.go +5 -0
- bindings/go/examples/go-model-download/context.go +30 -0
- bindings/go/examples/go-model-download/main.go +208 -0
- bindings/go/examples/go-whisper/color.go +22 -0
- bindings/go/examples/go-whisper/flags.go +147 -0
- bindings/go/examples/go-whisper/main.go +43 -0
- bindings/go/examples/go-whisper/process.go +132 -0
- bindings/go/go.mod +16 -0
- bindings/go/go.sum +23 -0
- bindings/go/params.go +192 -0
- bindings/go/pkg/whisper/consts.go +28 -0
- bindings/go/pkg/whisper/context.go +331 -0
- bindings/go/pkg/whisper/context_test.go +55 -0
- bindings/go/pkg/whisper/doc.go +4 -0
- bindings/go/pkg/whisper/interface.go +102 -0
- bindings/go/pkg/whisper/model.go +101 -0
- bindings/go/samples/jfk.wav +0 -0
- bindings/go/whisper.go +468 -0
- bindings/go/whisper_test.go +113 -0
- bindings/java/.idea/uiDesigner.xml +124 -0
- bindings/java/README.md +71 -0
- bindings/java/build.gradle +133 -0
- bindings/java/gradle.properties +6 -0
- bindings/java/gradle/wrapper/gradle-wrapper.jar +0 -0
- bindings/java/gradle/wrapper/gradle-wrapper.properties +6 -0
- bindings/java/gradlew +244 -0
- bindings/java/gradlew.bat +92 -0
- bindings/java/settings.gradle +1 -0
.devops/cublas.Dockerfile
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
|
3 |
+
# This needs to generally match the container host's environment.
|
4 |
+
ARG CUDA_VERSION=11.7.1
|
5 |
+
|
6 |
+
# Target the CUDA build image
|
7 |
+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
8 |
+
|
9 |
+
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
10 |
+
|
11 |
+
# Unless otherwise specified, we make a fat build.
|
12 |
+
ARG CUDA_DOCKER_ARCH=all
|
13 |
+
|
14 |
+
RUN apt-get update && \
|
15 |
+
apt-get install -y build-essential git cmake
|
16 |
+
|
17 |
+
WORKDIR /app
|
18 |
+
|
19 |
+
COPY . .
|
20 |
+
|
21 |
+
# Set nvcc architecture
|
22 |
+
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
23 |
+
# Enable cuBLAS
|
24 |
+
ENV GGML_CUDA=1
|
25 |
+
|
26 |
+
RUN make
|
27 |
+
|
28 |
+
ENTRYPOINT ["/app/main"]
|
.devops/main-cuda.Dockerfile
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
# This needs to generally match the container host's environment.
|
3 |
+
ARG CUDA_VERSION=12.3.1
|
4 |
+
# Target the CUDA build image
|
5 |
+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
6 |
+
# Target the CUDA runtime image
|
7 |
+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
8 |
+
|
9 |
+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
10 |
+
WORKDIR /app
|
11 |
+
|
12 |
+
# Unless otherwise specified, we make a fat build.
|
13 |
+
ARG CUDA_DOCKER_ARCH=all
|
14 |
+
# Set nvcc architecture
|
15 |
+
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
16 |
+
# Enable cuBLAS
|
17 |
+
ENV GGML_CUDA=1
|
18 |
+
|
19 |
+
RUN apt-get update && \
|
20 |
+
apt-get install -y build-essential \
|
21 |
+
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
22 |
+
|
23 |
+
# Ref: https://stackoverflow.com/a/53464012
|
24 |
+
ENV CUDA_MAIN_VERSION=12.3
|
25 |
+
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
|
26 |
+
|
27 |
+
COPY .. .
|
28 |
+
RUN make
|
29 |
+
|
30 |
+
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
|
31 |
+
ENV CUDA_MAIN_VERSION=12.3
|
32 |
+
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
|
33 |
+
WORKDIR /app
|
34 |
+
|
35 |
+
RUN apt-get update && \
|
36 |
+
apt-get install -y curl ffmpeg \
|
37 |
+
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
38 |
+
|
39 |
+
COPY --from=build /app /app
|
40 |
+
ENTRYPOINT [ "bash", "-c" ]
|
.devops/main.Dockerfile
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM ubuntu:22.04 AS build
|
2 |
+
WORKDIR /app
|
3 |
+
|
4 |
+
RUN apt-get update && \
|
5 |
+
apt-get install -y build-essential \
|
6 |
+
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
7 |
+
|
8 |
+
COPY .. .
|
9 |
+
RUN make
|
10 |
+
|
11 |
+
FROM ubuntu:22.04 AS runtime
|
12 |
+
WORKDIR /app
|
13 |
+
|
14 |
+
RUN apt-get update && \
|
15 |
+
apt-get install -y curl ffmpeg \
|
16 |
+
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
17 |
+
|
18 |
+
COPY --from=build /app /app
|
19 |
+
ENTRYPOINT [ "bash", "-c" ]
|
.github/workflows/bindings-go.yml
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Bindings Tests (Go)
|
2 |
+
on:
|
3 |
+
push:
|
4 |
+
paths:
|
5 |
+
- bindings/go/**
|
6 |
+
- whisper.h
|
7 |
+
pull_request:
|
8 |
+
paths:
|
9 |
+
- bindings/go/**
|
10 |
+
- whisper.h
|
11 |
+
|
12 |
+
jobs:
|
13 |
+
ubuntu-latest:
|
14 |
+
runs-on: ubuntu-latest
|
15 |
+
steps:
|
16 |
+
- uses: actions/setup-go@v3
|
17 |
+
with:
|
18 |
+
go-version: '^1.19'
|
19 |
+
- uses: actions/checkout@v1
|
20 |
+
- run: |
|
21 |
+
cd bindings/go
|
22 |
+
make test
|
.github/workflows/bindings-ruby.yml.disabled
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# TODO: fix this workflow file, disabled for now
|
2 |
+
name: Bindings Tests (Ruby)
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
paths:
|
6 |
+
- bindings/ruby/**
|
7 |
+
- whisper.h
|
8 |
+
pull_request:
|
9 |
+
paths:
|
10 |
+
- bindings/ruby/**
|
11 |
+
- whisper.h
|
12 |
+
|
13 |
+
jobs:
|
14 |
+
ubuntu-latest:
|
15 |
+
runs-on: ubuntu-latest
|
16 |
+
steps:
|
17 |
+
- uses: ruby/setup-ruby@v1
|
18 |
+
with:
|
19 |
+
ruby-version: '3.0'
|
20 |
+
- uses: actions/checkout@v1
|
21 |
+
- run: |
|
22 |
+
cd bindings/ruby/ext
|
23 |
+
ruby extconf.rb && make
|
.github/workflows/build.yml
ADDED
@@ -0,0 +1,669 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: CI
|
2 |
+
on: [push, pull_request]
|
3 |
+
|
4 |
+
env:
|
5 |
+
ubuntu_image: "ubuntu:22.04"
|
6 |
+
|
7 |
+
jobs:
|
8 |
+
ubuntu-latest:
|
9 |
+
runs-on: ubuntu-latest
|
10 |
+
|
11 |
+
strategy:
|
12 |
+
fail-fast: false
|
13 |
+
matrix:
|
14 |
+
arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
15 |
+
|
16 |
+
steps:
|
17 |
+
- name: Clone
|
18 |
+
uses: actions/checkout@v4
|
19 |
+
|
20 |
+
- name: Set up QEMU
|
21 |
+
uses: docker/setup-qemu-action@v3
|
22 |
+
|
23 |
+
- name: Build ${{ matrix.arch }}
|
24 |
+
run: |
|
25 |
+
docker run --platform ${{ matrix.arch }} --rm \
|
26 |
+
-v ${{ github.workspace }}:/workspace \
|
27 |
+
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
28 |
+
set -e
|
29 |
+
apt update
|
30 |
+
apt install -y build-essential libsdl2-dev
|
31 |
+
make
|
32 |
+
make stream'
|
33 |
+
|
34 |
+
macOS-latest:
|
35 |
+
runs-on: macOS-latest
|
36 |
+
|
37 |
+
steps:
|
38 |
+
- name: Clone
|
39 |
+
uses: actions/checkout@v4
|
40 |
+
|
41 |
+
- name: Dependencies
|
42 |
+
run: |
|
43 |
+
brew update
|
44 |
+
brew install sdl2
|
45 |
+
|
46 |
+
- name: Build
|
47 |
+
run: |
|
48 |
+
make
|
49 |
+
make stream
|
50 |
+
|
51 |
+
freeBSD-latest:
|
52 |
+
runs-on: macos-12
|
53 |
+
|
54 |
+
steps:
|
55 |
+
- name: Clone
|
56 |
+
uses: actions/checkout@v4
|
57 |
+
|
58 |
+
- name: Build
|
59 |
+
uses: cross-platform-actions/action@v0.24.0
|
60 |
+
with:
|
61 |
+
operating_system: freebsd
|
62 |
+
version: '13.3'
|
63 |
+
run: |
|
64 |
+
sudo pkg update
|
65 |
+
sudo pkg install -y gmake sdl2
|
66 |
+
gmake
|
67 |
+
gmake stream
|
68 |
+
|
69 |
+
ubuntu-latest-gcc:
|
70 |
+
runs-on: ubuntu-latest
|
71 |
+
|
72 |
+
strategy:
|
73 |
+
fail-fast: false
|
74 |
+
matrix:
|
75 |
+
build: [Debug, Release]
|
76 |
+
arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
77 |
+
|
78 |
+
steps:
|
79 |
+
- name: Clone
|
80 |
+
uses: actions/checkout@v4
|
81 |
+
|
82 |
+
- name: Set up QEMU
|
83 |
+
uses: docker/setup-qemu-action@v3
|
84 |
+
|
85 |
+
- name: Build ${{ matrix.arch }}
|
86 |
+
run: |
|
87 |
+
docker run --platform ${{ matrix.arch }} --rm \
|
88 |
+
-v ${{ github.workspace }}:/workspace \
|
89 |
+
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
90 |
+
set -e
|
91 |
+
apt update
|
92 |
+
apt install -y build-essential cmake libsdl2-dev
|
93 |
+
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
94 |
+
make
|
95 |
+
ctest -L gh --output-on-failure'
|
96 |
+
|
97 |
+
ubuntu-latest-clang:
|
98 |
+
runs-on: ubuntu-latest
|
99 |
+
|
100 |
+
strategy:
|
101 |
+
fail-fast: false
|
102 |
+
matrix:
|
103 |
+
build: [Debug, Release]
|
104 |
+
#arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
105 |
+
# TODO: arm/v7 disabled due to clang bug
|
106 |
+
# https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990
|
107 |
+
arch: [linux/amd64, linux/arm64, linux/ppc64le]
|
108 |
+
|
109 |
+
steps:
|
110 |
+
- name: Clone
|
111 |
+
uses: actions/checkout@v4
|
112 |
+
|
113 |
+
- name: Set up QEMU
|
114 |
+
uses: docker/setup-qemu-action@v3
|
115 |
+
|
116 |
+
- name: Build ${{ matrix.arch }}
|
117 |
+
run: |
|
118 |
+
docker run --platform ${{ matrix.arch }} --rm \
|
119 |
+
-v ${{ github.workspace }}:/workspace \
|
120 |
+
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
121 |
+
set -e
|
122 |
+
apt update
|
123 |
+
apt install -y clang build-essential cmake libsdl2-dev
|
124 |
+
cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
|
125 |
+
make
|
126 |
+
ctest -L gh --output-on-failure'
|
127 |
+
|
128 |
+
ubuntu-latest-gcc-sanitized:
|
129 |
+
runs-on: ubuntu-latest
|
130 |
+
|
131 |
+
strategy:
|
132 |
+
fail-fast: false
|
133 |
+
matrix:
|
134 |
+
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
135 |
+
arch: [linux/amd64]
|
136 |
+
|
137 |
+
steps:
|
138 |
+
- name: Clone
|
139 |
+
uses: actions/checkout@v4
|
140 |
+
|
141 |
+
- name: Set up QEMU
|
142 |
+
uses: docker/setup-qemu-action@v3
|
143 |
+
|
144 |
+
- name: Build ${{ matrix.arch }}
|
145 |
+
run: |
|
146 |
+
docker run --platform ${{ matrix.arch }} --rm \
|
147 |
+
-v ${{ github.workspace }}:/workspace \
|
148 |
+
-w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
|
149 |
+
set -e
|
150 |
+
apt update
|
151 |
+
apt install -y build-essential cmake
|
152 |
+
cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
|
153 |
+
make
|
154 |
+
ctest -L gh --output-on-failure'
|
155 |
+
|
156 |
+
ubuntu-22-cmake-sycl:
|
157 |
+
runs-on: ubuntu-22.04
|
158 |
+
|
159 |
+
strategy:
|
160 |
+
fail-fast: false
|
161 |
+
matrix:
|
162 |
+
dwhisper_sycl: [ON]
|
163 |
+
dcmake_c_compiler: [icx]
|
164 |
+
dcmake_cxx_compiler: [icpx]
|
165 |
+
arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
166 |
+
|
167 |
+
continue-on-error: true
|
168 |
+
|
169 |
+
steps:
|
170 |
+
- name: Clone
|
171 |
+
uses: actions/checkout@v4
|
172 |
+
|
173 |
+
- name: add oneAPI to apt
|
174 |
+
shell: bash
|
175 |
+
run: |
|
176 |
+
cd /tmp
|
177 |
+
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
178 |
+
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
179 |
+
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
180 |
+
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
181 |
+
|
182 |
+
- name: install oneAPI dpcpp compiler
|
183 |
+
shell: bash
|
184 |
+
run: |
|
185 |
+
sudo apt update
|
186 |
+
sudo apt install intel-oneapi-compiler-dpcpp-cpp
|
187 |
+
|
188 |
+
- name: install oneAPI MKL library
|
189 |
+
shell: bash
|
190 |
+
run: |
|
191 |
+
sudo apt install intel-oneapi-mkl-devel
|
192 |
+
|
193 |
+
- name: Clone
|
194 |
+
id: checkout
|
195 |
+
uses: actions/checkout@v4
|
196 |
+
|
197 |
+
- name: Build
|
198 |
+
id: cmake_build
|
199 |
+
run: |
|
200 |
+
source /opt/intel/oneapi/setvars.sh
|
201 |
+
mkdir build
|
202 |
+
cd build
|
203 |
+
cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
|
204 |
+
cmake --build . --config Release -j $(nproc)
|
205 |
+
|
206 |
+
ubuntu-22-cmake-sycl-fp16:
|
207 |
+
runs-on: ubuntu-22.04
|
208 |
+
|
209 |
+
strategy:
|
210 |
+
fail-fast: false
|
211 |
+
matrix:
|
212 |
+
dwhisper_sycl: [ON]
|
213 |
+
dcmake_c_compiler: [icx]
|
214 |
+
dcmake_cxx_compiler: [icpx]
|
215 |
+
arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
|
216 |
+
|
217 |
+
continue-on-error: true
|
218 |
+
|
219 |
+
steps:
|
220 |
+
- name: Clone
|
221 |
+
uses: actions/checkout@v4
|
222 |
+
|
223 |
+
- name: add oneAPI to apt
|
224 |
+
shell: bash
|
225 |
+
run: |
|
226 |
+
cd /tmp
|
227 |
+
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
228 |
+
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
229 |
+
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
230 |
+
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
231 |
+
|
232 |
+
- name: install oneAPI dpcpp compiler
|
233 |
+
shell: bash
|
234 |
+
run: |
|
235 |
+
sudo apt update
|
236 |
+
sudo apt install intel-oneapi-compiler-dpcpp-cpp
|
237 |
+
|
238 |
+
- name: install oneAPI MKL library
|
239 |
+
shell: bash
|
240 |
+
run: |
|
241 |
+
sudo apt install intel-oneapi-mkl-devel
|
242 |
+
|
243 |
+
- name: Clone
|
244 |
+
id: checkout
|
245 |
+
uses: actions/checkout@v4
|
246 |
+
|
247 |
+
- name: Build
|
248 |
+
id: cmake_build
|
249 |
+
run: |
|
250 |
+
source /opt/intel/oneapi/setvars.sh
|
251 |
+
mkdir build
|
252 |
+
cd build
|
253 |
+
cmake -DGGML_SYCL_F16=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
|
254 |
+
cmake --build . --config Release -j $(nproc)
|
255 |
+
|
256 |
+
windows-msys2:
|
257 |
+
runs-on: windows-latest
|
258 |
+
|
259 |
+
strategy:
|
260 |
+
fail-fast: false
|
261 |
+
matrix:
|
262 |
+
include:
|
263 |
+
- { sys: UCRT64, env: ucrt-x86_64, build: Release }
|
264 |
+
- { sys: CLANG64, env: clang-x86_64, build: Release }
|
265 |
+
|
266 |
+
steps:
|
267 |
+
- name: Clone
|
268 |
+
uses: actions/checkout@v4
|
269 |
+
|
270 |
+
- name: Setup ${{ matrix.sys }}
|
271 |
+
uses: msys2/setup-msys2@v2
|
272 |
+
with:
|
273 |
+
update: true
|
274 |
+
msystem: ${{matrix.sys}}
|
275 |
+
install: >-
|
276 |
+
base-devel
|
277 |
+
mingw-w64-${{matrix.env}}-toolchain
|
278 |
+
mingw-w64-${{matrix.env}}-cmake
|
279 |
+
mingw-w64-${{matrix.env}}-SDL2
|
280 |
+
mingw-w64-${{matrix.env}}-openblas
|
281 |
+
|
282 |
+
- name: Build using make
|
283 |
+
shell: msys2 {0}
|
284 |
+
run: |
|
285 |
+
make -j $(nproc)
|
286 |
+
|
287 |
+
- name: Clean after building using make
|
288 |
+
shell: msys2 {0}
|
289 |
+
run: |
|
290 |
+
make clean
|
291 |
+
|
292 |
+
- name: Build using make w/ OpenBLAS
|
293 |
+
shell: msys2 {0}
|
294 |
+
run: |
|
295 |
+
make GGML_OPENBLAS=1 -j $(nproc)
|
296 |
+
|
297 |
+
- name: Build using CMake
|
298 |
+
shell: msys2 {0}
|
299 |
+
run: |
|
300 |
+
cmake -B build
|
301 |
+
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
302 |
+
|
303 |
+
- name: Clean after building using CMake
|
304 |
+
shell: msys2 {0}
|
305 |
+
run: |
|
306 |
+
rm -rf build
|
307 |
+
|
308 |
+
- name: Build using CMake w/ OpenBLAS
|
309 |
+
shell: msys2 {0}
|
310 |
+
run: |
|
311 |
+
cmake -B build -DGGML_OPENBLAS=ON
|
312 |
+
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
313 |
+
|
314 |
+
windows:
|
315 |
+
runs-on: windows-latest
|
316 |
+
|
317 |
+
strategy:
|
318 |
+
matrix:
|
319 |
+
build: [Release]
|
320 |
+
arch: [Win32, x64]
|
321 |
+
sdl2: [ON]
|
322 |
+
include:
|
323 |
+
- arch: Win32
|
324 |
+
s2arc: x86
|
325 |
+
jnaPath: win32-x86
|
326 |
+
- arch: x64
|
327 |
+
s2arc: x64
|
328 |
+
jnaPath: win32-x86-64
|
329 |
+
- sdl2: ON
|
330 |
+
s2ver: 2.28.5
|
331 |
+
|
332 |
+
steps:
|
333 |
+
- name: Clone
|
334 |
+
uses: actions/checkout@v4
|
335 |
+
|
336 |
+
- name: Add msbuild to PATH
|
337 |
+
uses: microsoft/setup-msbuild@v2
|
338 |
+
|
339 |
+
- name: Fetch SDL2 and set SDL2_DIR
|
340 |
+
if: matrix.sdl2 == 'ON'
|
341 |
+
run: |
|
342 |
+
C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
|
343 |
+
7z x sdl2.zip
|
344 |
+
echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
|
345 |
+
|
346 |
+
- name: Configure
|
347 |
+
run: >
|
348 |
+
cmake -S . -B ./build -A ${{ matrix.arch }}
|
349 |
+
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
350 |
+
-DWHISPER_SDL2=${{ matrix.sdl2 }}
|
351 |
+
|
352 |
+
- name: Build
|
353 |
+
run: |
|
354 |
+
cd ./build
|
355 |
+
msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
|
356 |
+
|
357 |
+
- name: Copy SDL2.dll
|
358 |
+
if: matrix.sdl2 == 'ON'
|
359 |
+
run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
|
360 |
+
|
361 |
+
- name: Upload dll
|
362 |
+
uses: actions/upload-artifact@v4
|
363 |
+
with:
|
364 |
+
name: ${{ matrix.jnaPath }}_whisper.dll
|
365 |
+
path: build/bin/${{ matrix.build }}/whisper.dll
|
366 |
+
|
367 |
+
- name: Upload binaries
|
368 |
+
if: matrix.sdl2 == 'ON'
|
369 |
+
uses: actions/upload-artifact@v4
|
370 |
+
with:
|
371 |
+
name: whisper-bin-${{ matrix.arch }}
|
372 |
+
path: build/bin/${{ matrix.build }}
|
373 |
+
|
374 |
+
windows-blas:
|
375 |
+
runs-on: windows-latest
|
376 |
+
|
377 |
+
strategy:
|
378 |
+
matrix:
|
379 |
+
build: [Release]
|
380 |
+
arch: [Win32, x64]
|
381 |
+
blas: [ON]
|
382 |
+
sdl2: [ON]
|
383 |
+
include:
|
384 |
+
- arch: Win32
|
385 |
+
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x86.zip
|
386 |
+
s2arc: x86
|
387 |
+
- arch: x64
|
388 |
+
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x64.zip
|
389 |
+
s2arc: x64
|
390 |
+
- sdl2: ON
|
391 |
+
s2ver: 2.28.5
|
392 |
+
|
393 |
+
steps:
|
394 |
+
- name: Clone
|
395 |
+
uses: actions/checkout@v4
|
396 |
+
|
397 |
+
- name: Add msbuild to PATH
|
398 |
+
uses: microsoft/setup-msbuild@v2
|
399 |
+
|
400 |
+
- name: Fetch OpenBLAS
|
401 |
+
if: matrix.blas == 'ON'
|
402 |
+
run: |
|
403 |
+
C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
|
404 |
+
7z x blas.zip -oblas -y
|
405 |
+
copy blas/include/cblas.h .
|
406 |
+
copy blas/include/openblas_config.h .
|
407 |
+
echo "OPENBLAS_PATH=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
|
408 |
+
|
409 |
+
- name: Fetch SDL2 and set SDL2_DIR
|
410 |
+
if: matrix.sdl2 == 'ON'
|
411 |
+
run: |
|
412 |
+
C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
|
413 |
+
7z x sdl2.zip
|
414 |
+
echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
|
415 |
+
|
416 |
+
- name: Configure
|
417 |
+
run: >
|
418 |
+
cmake -S . -B ./build -A ${{ matrix.arch }}
|
419 |
+
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
420 |
+
-DGGML_OPENBLAS=${{ matrix.blas }}
|
421 |
+
-DCMAKE_LIBRARY_PATH="$env:OPENBLAS_PATH/lib"
|
422 |
+
-DWHISPER_SDL2=${{ matrix.sdl2 }}
|
423 |
+
|
424 |
+
- name: Build
|
425 |
+
run: |
|
426 |
+
cd ./build
|
427 |
+
msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
|
428 |
+
|
429 |
+
- name: Copy libopenblas.dll
|
430 |
+
if: matrix.blas == 'ON'
|
431 |
+
run: copy "$env:OPENBLAS_PATH/bin/libopenblas.dll" build/bin/${{ matrix.build }}
|
432 |
+
|
433 |
+
- name: Copy SDL2.dll
|
434 |
+
if: matrix.sdl2 == 'ON'
|
435 |
+
run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
|
436 |
+
|
437 |
+
- name: Upload binaries
|
438 |
+
if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
|
439 |
+
uses: actions/upload-artifact@v4
|
440 |
+
with:
|
441 |
+
name: whisper-blas-bin-${{ matrix.arch }}
|
442 |
+
path: build/bin/${{ matrix.build }}
|
443 |
+
|
444 |
+
windows-cublas:
|
445 |
+
runs-on: windows-2019
|
446 |
+
|
447 |
+
strategy:
|
448 |
+
matrix:
|
449 |
+
build: [Release]
|
450 |
+
arch: [x64]
|
451 |
+
cublas: [ON]
|
452 |
+
sdl2: [ON]
|
453 |
+
cuda-toolkit: [12.2.0, 11.8.0]
|
454 |
+
include:
|
455 |
+
- arch: x64
|
456 |
+
s2arc: x64
|
457 |
+
- sdl2: ON
|
458 |
+
s2ver: 2.28.5
|
459 |
+
|
460 |
+
steps:
|
461 |
+
- name: Clone
|
462 |
+
uses: actions/checkout@v4
|
463 |
+
|
464 |
+
- name: Add msbuild to PATH
|
465 |
+
uses: microsoft/setup-msbuild@v2
|
466 |
+
|
467 |
+
- name: Install CUDA Toolkit
|
468 |
+
id: cuda-toolkit
|
469 |
+
uses: Jimver/cuda-toolkit@v0.2.15
|
470 |
+
with:
|
471 |
+
cuda: '${{ matrix.cuda-toolkit }}'
|
472 |
+
|
473 |
+
- name: Fetch SDL2 and set SDL2_DIR
|
474 |
+
if: matrix.sdl2 == 'ON'
|
475 |
+
run: |
|
476 |
+
C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
|
477 |
+
7z x sdl2.zip
|
478 |
+
echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
|
479 |
+
|
480 |
+
- name: Configure
|
481 |
+
run: >
|
482 |
+
cmake -S . -B ./build -A ${{ matrix.arch }}
|
483 |
+
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
484 |
+
-DGGML_CUDA=${{ matrix.cublas }}
|
485 |
+
-DWHISPER_SDL2=${{ matrix.sdl2 }}
|
486 |
+
|
487 |
+
- name: Build ${{ matrix.cuda-toolkit }}
|
488 |
+
run: |
|
489 |
+
cd ./build
|
490 |
+
cmake --build . --config ${{ matrix.build }}
|
491 |
+
|
492 |
+
- name: Copy CUDA DLLs
|
493 |
+
run: >
|
494 |
+
Copy-Item -PassThru
|
495 |
+
-Path "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}/bin/*.dll"
|
496 |
+
-Include cudart64_*,cublas64_*,cublasLt64_*
|
497 |
+
-Destination build/bin/${{ matrix.build }}
|
498 |
+
|
499 |
+
- name: Copy SDL2.dll
|
500 |
+
if: matrix.sdl2 == 'ON'
|
501 |
+
run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
|
502 |
+
|
503 |
+
- name: Upload binaries
|
504 |
+
if: matrix.sdl2 == 'ON'
|
505 |
+
uses: actions/upload-artifact@v4
|
506 |
+
with:
|
507 |
+
name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}
|
508 |
+
path: build/bin/${{ matrix.build }}
|
509 |
+
|
510 |
+
emscripten:
|
511 |
+
runs-on: ubuntu-latest
|
512 |
+
|
513 |
+
strategy:
|
514 |
+
matrix:
|
515 |
+
build: [Release]
|
516 |
+
|
517 |
+
steps:
|
518 |
+
- name: Clone
|
519 |
+
uses: actions/checkout@v4
|
520 |
+
|
521 |
+
- name: Setup emsdk
|
522 |
+
uses: mymindstorm/setup-emsdk@v14
|
523 |
+
|
524 |
+
- name: Verify
|
525 |
+
run: emcc -v
|
526 |
+
|
527 |
+
- name: Build
|
528 |
+
run: |
|
529 |
+
emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
530 |
+
make
|
531 |
+
|
532 |
+
ios:
|
533 |
+
runs-on: macos-latest
|
534 |
+
|
535 |
+
strategy:
|
536 |
+
matrix:
|
537 |
+
build: [Release]
|
538 |
+
|
539 |
+
steps:
|
540 |
+
- name: Clone
|
541 |
+
uses: actions/checkout@v4
|
542 |
+
|
543 |
+
- name: Configure
|
544 |
+
run: |
|
545 |
+
cp models/for-tests-ggml-base.en.bin models/ggml-base.en.bin
|
546 |
+
mkdir models/ggml-base.en-encoder.mlmodelc
|
547 |
+
|
548 |
+
- name: Build objc example
|
549 |
+
run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphonesimulator build
|
550 |
+
|
551 |
+
- name: Build swiftui example
|
552 |
+
run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphonesimulator build
|
553 |
+
|
554 |
+
android:
|
555 |
+
runs-on: ubuntu-latest
|
556 |
+
|
557 |
+
steps:
|
558 |
+
- name: Clone
|
559 |
+
uses: actions/checkout@v4
|
560 |
+
with:
|
561 |
+
path: whisper
|
562 |
+
|
563 |
+
- name: Clone
|
564 |
+
uses: actions/checkout@v4
|
565 |
+
with:
|
566 |
+
repository: ggerganov/ggml
|
567 |
+
path: ggml
|
568 |
+
|
569 |
+
- name: Install Java
|
570 |
+
uses: actions/setup-java@v4
|
571 |
+
with:
|
572 |
+
distribution: zulu
|
573 |
+
java-version: 21
|
574 |
+
|
575 |
+
- name: Setup Android SDK
|
576 |
+
uses: android-actions/setup-android@v3
|
577 |
+
|
578 |
+
- name: Build
|
579 |
+
run: |
|
580 |
+
cd whisper/examples/whisper.android
|
581 |
+
./gradlew assembleRelease --no-daemon
|
582 |
+
|
583 |
+
- name: Build with external ggml
|
584 |
+
run: |
|
585 |
+
export PATH_TO_GGML=$PWD/ggml
|
586 |
+
cd whisper/examples/whisper.android
|
587 |
+
./gradlew assembleRelease --no-daemon -PGGML_HOME=$PATH_TO_GGML
|
588 |
+
|
589 |
+
android_java:
|
590 |
+
runs-on: ubuntu-latest
|
591 |
+
|
592 |
+
steps:
|
593 |
+
- name: Clone
|
594 |
+
uses: actions/checkout@v4
|
595 |
+
|
596 |
+
- name: set up JDK 11
|
597 |
+
uses: actions/setup-java@v4
|
598 |
+
with:
|
599 |
+
java-version: '11'
|
600 |
+
distribution: 'temurin'
|
601 |
+
cache: gradle
|
602 |
+
|
603 |
+
- name: Setup Android SDK
|
604 |
+
uses: android-actions/setup-android@v3
|
605 |
+
with:
|
606 |
+
cmdline-tools-version: 9.0
|
607 |
+
|
608 |
+
- name: Build
|
609 |
+
run: |
|
610 |
+
cd examples/whisper.android.java
|
611 |
+
chmod +x ./gradlew
|
612 |
+
./gradlew assembleRelease
|
613 |
+
|
614 |
+
# TODO: disabled because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/9686220096/job/26735899598
|
615 |
+
# java:
|
616 |
+
# needs: [ 'windows' ]
|
617 |
+
# runs-on: windows-latest
|
618 |
+
# steps:
|
619 |
+
# - uses: actions/checkout@v4
|
620 |
+
#
|
621 |
+
# - name: Install Java
|
622 |
+
# uses: actions/setup-java@v4
|
623 |
+
# with:
|
624 |
+
# distribution: zulu
|
625 |
+
# java-version: 20
|
626 |
+
#
|
627 |
+
# - name: Download Windows lib
|
628 |
+
# uses: actions/download-artifact@v4
|
629 |
+
# with:
|
630 |
+
# name: win32-x86-64_whisper.dll
|
631 |
+
# path: bindings/java/build/generated/resources/main/win32-x86-64
|
632 |
+
#
|
633 |
+
# - name: Build
|
634 |
+
# run: |
|
635 |
+
# models\download-ggml-model.cmd tiny.en
|
636 |
+
# cd bindings/java
|
637 |
+
# chmod +x ./gradlew
|
638 |
+
# ./gradlew build
|
639 |
+
#
|
640 |
+
# - name: Upload jar
|
641 |
+
# uses: actions/upload-artifact@v4
|
642 |
+
# with:
|
643 |
+
# name: whispercpp.jar
|
644 |
+
# path: bindings/java/build/libs/whispercpp-*.jar
|
645 |
+
#
|
646 |
+
# - name: Publish package
|
647 |
+
# if: ${{ github.ref == 'refs/heads/master' }}
|
648 |
+
# uses: gradle/gradle-build-action@v2.4.2
|
649 |
+
# with:
|
650 |
+
# arguments: publish
|
651 |
+
# build-root-directory: bindings/java
|
652 |
+
# env:
|
653 |
+
# MAVEN_USERNAME: ${{ secrets.JIRA_USER }}
|
654 |
+
# MAVEN_PASSWORD: ${{ secrets.JIRA_PASS }}
|
655 |
+
# PGP_SECRET: ${{ secrets.GPG_PRIVATE_KEY }}
|
656 |
+
# PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
|
657 |
+
|
658 |
+
quantize:
|
659 |
+
runs-on: ubuntu-latest
|
660 |
+
|
661 |
+
steps:
|
662 |
+
- name: Clone
|
663 |
+
uses: actions/checkout@v4
|
664 |
+
|
665 |
+
- name: Test quantize
|
666 |
+
run: |
|
667 |
+
./models/download-ggml-model.sh tiny.en
|
668 |
+
make quantize
|
669 |
+
./quantize models/ggml-tiny.en.bin models/ggml-tiny.en-q4_0.bin q4_0
|
.github/workflows/docker.yml
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Publish Docker image
|
2 |
+
|
3 |
+
on:
|
4 |
+
pull_request:
|
5 |
+
push:
|
6 |
+
branches:
|
7 |
+
- master
|
8 |
+
|
9 |
+
jobs:
|
10 |
+
push_to_registry:
|
11 |
+
name: Push Docker image to Docker Hub
|
12 |
+
if: github.event.pull_request.draft == false
|
13 |
+
|
14 |
+
runs-on: ubuntu-latest
|
15 |
+
env:
|
16 |
+
COMMIT_SHA: ${{ github.sha }}
|
17 |
+
strategy:
|
18 |
+
matrix:
|
19 |
+
config:
|
20 |
+
- { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64,linux/arm64" }
|
21 |
+
- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
|
22 |
+
|
23 |
+
steps:
|
24 |
+
- name: Check out the repo
|
25 |
+
uses: actions/checkout@v3
|
26 |
+
|
27 |
+
- name: Set up QEMU
|
28 |
+
uses: docker/setup-qemu-action@v3
|
29 |
+
|
30 |
+
- name: Set up Docker Buildx
|
31 |
+
uses: docker/setup-buildx-action@v3
|
32 |
+
|
33 |
+
- name: Log in to Docker Hub
|
34 |
+
uses: docker/login-action@v3
|
35 |
+
with:
|
36 |
+
registry: ghcr.io
|
37 |
+
username: ${{ github.repository_owner }}
|
38 |
+
password: ${{ secrets.GITHUB_TOKEN }}
|
39 |
+
|
40 |
+
- name: Build and push Docker image (versioned)
|
41 |
+
if: github.event_name == 'push'
|
42 |
+
uses: docker/build-push-action@v5
|
43 |
+
with:
|
44 |
+
context: .
|
45 |
+
push: true
|
46 |
+
platforms: ${{ matrix.config.platforms }}
|
47 |
+
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
|
48 |
+
file: ${{ matrix.config.dockerfile }}
|
49 |
+
|
50 |
+
- name: Build and push Docker image (tagged)
|
51 |
+
uses: docker/build-push-action@v4
|
52 |
+
with:
|
53 |
+
context: .
|
54 |
+
push: ${{ github.event_name == 'push' }}
|
55 |
+
platforms: ${{ matrix.config.platforms }}
|
56 |
+
tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
|
57 |
+
file: ${{ matrix.config.dockerfile }}
|
.github/workflows/examples.yml
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Examples Tests
|
2 |
+
on:
|
3 |
+
push:
|
4 |
+
paths:
|
5 |
+
- examples/addon.node/**
|
6 |
+
- whisper.h
|
7 |
+
pull_request:
|
8 |
+
paths:
|
9 |
+
- examples/addon.node/**
|
10 |
+
- whisper.h
|
11 |
+
|
12 |
+
jobs:
|
13 |
+
addon_node-ubuntu-latest:
|
14 |
+
runs-on: ubuntu-latest
|
15 |
+
strategy:
|
16 |
+
matrix:
|
17 |
+
node-version: [ 16.x, 18.x ]
|
18 |
+
steps:
|
19 |
+
- name: Clone
|
20 |
+
uses: actions/checkout@v1
|
21 |
+
|
22 |
+
- name: Dependencies
|
23 |
+
run: |
|
24 |
+
sudo apt-get update
|
25 |
+
sudo apt-get install build-essential
|
26 |
+
sudo apt-get install cmake
|
27 |
+
sudo apt-get install libsdl2-dev
|
28 |
+
|
29 |
+
- name: Use Node.js ${{ matrix.node-version }}
|
30 |
+
uses: actions/setup-node@v1
|
31 |
+
with:
|
32 |
+
node-version: ${{ matrix.node-version }}
|
33 |
+
cache: 'npm'
|
34 |
+
|
35 |
+
- name: Install package.json dependencies
|
36 |
+
working-directory: ./examples/addon.node
|
37 |
+
run: npm install
|
38 |
+
|
39 |
+
- name: Compile addon.node
|
40 |
+
run: npx cmake-js compile -T addon.node -B Release
|
41 |
+
|
42 |
+
- name: Download test model
|
43 |
+
run: |
|
44 |
+
bash ./models/download-ggml-model.sh base.en
|
45 |
+
- name: Test
|
46 |
+
run: |
|
47 |
+
cd examples/addon.node
|
48 |
+
npm run test
|
.gitignore
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.o
|
2 |
+
*.a
|
3 |
+
.cache/
|
4 |
+
.coreml/
|
5 |
+
.test/
|
6 |
+
.vs/
|
7 |
+
.vscode/
|
8 |
+
.DS_Store
|
9 |
+
.vimspector.json
|
10 |
+
/CMakeSettings.json
|
11 |
+
|
12 |
+
build/
|
13 |
+
build-*/
|
14 |
+
|
15 |
+
# SPM
|
16 |
+
.build/
|
17 |
+
.swiftpm
|
18 |
+
*.metallib
|
19 |
+
|
20 |
+
/main
|
21 |
+
/stream
|
22 |
+
/command
|
23 |
+
/talk
|
24 |
+
/talk-llama
|
25 |
+
/bench
|
26 |
+
/quantize
|
27 |
+
/server
|
28 |
+
/lsp
|
29 |
+
|
30 |
+
arm_neon.h
|
31 |
+
sync.sh
|
32 |
+
libwhisper.a
|
33 |
+
libwhisper.so
|
34 |
+
compile_commands.json
|
35 |
+
|
36 |
+
examples/arm_neon.h
|
37 |
+
examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
|
38 |
+
examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
|
39 |
+
examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
|
40 |
+
|
41 |
+
extra/bench-gg.txt
|
42 |
+
|
43 |
+
models/*.mlmodel
|
44 |
+
models/*.mlmodelc
|
45 |
+
models/*.mlpackage
|
46 |
+
bindings/java/.gradle/
|
47 |
+
bindings/java/.idea/
|
48 |
+
.idea/
|
49 |
+
|
50 |
+
benchmark_results.csv
|
51 |
+
cmake-build-debug/
|
52 |
+
.cxx/
|
53 |
+
.gradle/
|
54 |
+
local.properties
|
.gitmodules
ADDED
File without changes
|
AUTHORS
ADDED
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# date: Tue Apr 9 20:27:03 EEST 2024
|
2 |
+
# this file is auto-generated by scripts/gen-authors.sh
|
3 |
+
|
4 |
+
0/0 <zero@imaskeleton.me>
|
5 |
+
0cc4m <picard12@live.de>
|
6 |
+
0xsourcecode <134374803+0xsourcecode@users.noreply.github.com>
|
7 |
+
AT <manyoso@users.noreply.github.com>
|
8 |
+
Aarni Koskela <akx@iki.fi>
|
9 |
+
Aaron Pham <29749331+aarnphm@users.noreply.github.com>
|
10 |
+
Aaron Taylor <aaron@exphat.com>
|
11 |
+
Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
|
12 |
+
Abitofevrything <54505189+abitofevrything@users.noreply.github.com>
|
13 |
+
AfryMask <AfryMask@163.com>
|
14 |
+
Ahmad Bilal <ahmad.bilal@empglabs.com>
|
15 |
+
AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
|
16 |
+
Akash Mahajan <akash7190@gmail.com>
|
17 |
+
Akash Mahajan <akashmjn@stanford.edu>
|
18 |
+
Al Hoang <3811822-hoanga@users.noreply.gitlab.com>
|
19 |
+
Alan <unknown>
|
20 |
+
Aleksander Andrzejewski <18704749+aleksanderandrzejewski@users.noreply.github.com>
|
21 |
+
Alex Azarov <alex@azarov.by>
|
22 |
+
Alex Bacart <13940752+alex-bacart@users.noreply.github.com>
|
23 |
+
Alex Evgrashin <aevgrashin@yandex.ru>
|
24 |
+
Alexandr Graschenkov <alexandr.graschenkov91@gmail.com>
|
25 |
+
Alexandru Mariuti <alex@mariuti.com>
|
26 |
+
Alexey Kharlamov <alexey@kharlamov.biz>
|
27 |
+
Alfredo Montesinos <alfredo.montesinos@g.austincc.edu>
|
28 |
+
Ali Alameh <ali.alameh@isae.edu.lb>
|
29 |
+
Ananta Bastola <anantarajbastola@gmail.com>
|
30 |
+
Andreu Huguet <andreuhuguet@gmail.com>
|
31 |
+
Andrew Huynh <a5thuynh@gmail.com>
|
32 |
+
Andrew S <andrews54757@gmail.com>
|
33 |
+
Andy Maloney <asmaloney@gmail.com>
|
34 |
+
Anton Kostin <masguit42@users.noreply.github.com>
|
35 |
+
Artyom Mezin <psycho.fading@gmail.com>
|
36 |
+
Asad Memon <asad.lionpk@gmail.com>
|
37 |
+
Ashraful Islam <ashraful.meche@gmail.com>
|
38 |
+
AsukaMinato <asukaminato@nyan.eu.org>
|
39 |
+
AustinMroz <austinmroz@utexas.edu>
|
40 |
+
Avik Sengupta <avik@sengupta.net>
|
41 |
+
Bader-eddine Ouaich <49657842+baderouaich@users.noreply.github.com>
|
42 |
+
Baffin Lee <baffinlee@gmail.com>
|
43 |
+
Ben Nortier <bjnortier@gmail.com>
|
44 |
+
Benjamin Heiniger <benjamin.heiniger@bluewin.ch>
|
45 |
+
Bo-Yi Wu <appleboy.tw@gmail.com>
|
46 |
+
Boris Bliznioukov <blib@mail.com>
|
47 |
+
Borislav Stanimirov <b.stanimirov@abv.bg>
|
48 |
+
Brad Murray <59848399+bradmurray-dt@users.noreply.github.com>
|
49 |
+
Brian Murray <brian@bmurray.ca>
|
50 |
+
CRD716 <crd716@gmail.com>
|
51 |
+
Canis Lupus <Canis-UK@users.noreply.github.com>
|
52 |
+
Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
|
53 |
+
ChangSeok Oh <shivamidow@users.noreply.github.com>
|
54 |
+
Chaoqun <27287694+OpenWaygate@users.noreply.github.com>
|
55 |
+
Chia-Hsiang Cheng <88014292+garychia@users.noreply.github.com>
|
56 |
+
Chidi Williams <williamschidi1@gmail.com>
|
57 |
+
Christian <12550267+iceychris@users.noreply.github.com>
|
58 |
+
Clifford Heath <clifford.heath@gmail.com>
|
59 |
+
Colin <github@whoisc.cc>
|
60 |
+
DGdev91 <DGdev91@users.noreply.github.com>
|
61 |
+
Damian Czaja <trojan295@protonmail.com>
|
62 |
+
Daniel Bevenius <daniel.bevenius@gmail.com>
|
63 |
+
David <dnhkng@gmail.com>
|
64 |
+
David Thorpe <djt@mutablelogic.com>
|
65 |
+
Davidson Francis <davidsondfgl@gmail.com>
|
66 |
+
Dener Stassun <denerstassun@gmail.com>
|
67 |
+
Didzis Gosko <didzis@users.noreply.github.com>
|
68 |
+
Digipom <admin@digipom.com>
|
69 |
+
Dimo <dimo@ieee.org>
|
70 |
+
Dody Suria Wijaya <dodysw@gmail.com>
|
71 |
+
Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
|
72 |
+
Duncan McConnell <ddmcconnell4@gmail.com>
|
73 |
+
Egor Egorov <me@egorfine.com>
|
74 |
+
Elkana Bardugo <ttv200@gmail.com>
|
75 |
+
Emmanuel Schmidbauer <eschmidbauer@gmail.com>
|
76 |
+
Engininja2 <139037756+Engininja2@users.noreply.github.com>
|
77 |
+
Eric Swanson <eswanson@alloscomp.com>
|
78 |
+
Eric Tendian <erictendian@gmail.com>
|
79 |
+
Erik Scholz <Green-Sky@users.noreply.github.com>
|
80 |
+
Evan Jones <evan.q.jones@gmail.com>
|
81 |
+
Evan Martin <evan.martin@gmail.com>
|
82 |
+
Eve <139727413+netrunnereve@users.noreply.github.com>
|
83 |
+
Evgeny Kuznetsov <evgeny@kuznetsov.md>
|
84 |
+
F1L1P <78918286+F1L1Pv2@users.noreply.github.com>
|
85 |
+
Fangjun Kuang <csukuangfj@gmail.com>
|
86 |
+
Felix <stenbackfelix@gmail.com>
|
87 |
+
Finn Voorhees <finnvoorhees@gmail.com>
|
88 |
+
FlippFuzz <41221030+FlippFuzz@users.noreply.github.com>
|
89 |
+
Gang Chen <goncha@gmail.com>
|
90 |
+
Gavin Cai <gavin1818@hotmail.com>
|
91 |
+
George Hindle <george@georgehindle.com>
|
92 |
+
Georgi Gerganov <ggerganov@gmail.com>
|
93 |
+
GitAritron <103900385+GitAritron@users.noreply.github.com>
|
94 |
+
GiviMAD <GiviMAD@users.noreply.github.com>
|
95 |
+
Gleicon Moraes <gleicon@gmail.com>
|
96 |
+
Gregor Jasny <gjasny@googlemail.com>
|
97 |
+
Guillaume Wenzek <gwenzek@users.noreply.github.com>
|
98 |
+
HY. Kelvin Lee <34256578+hykelvinlee42@users.noreply.github.com>
|
99 |
+
Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
|
100 |
+
Hang <bebound@gmail.com>
|
101 |
+
Herman Semenov <GermanAizek@yandex.ru>
|
102 |
+
Hrishikesh Barman <geekodour@users.noreply.github.com>
|
103 |
+
Ian Bicking <ian@ianbicking.org>
|
104 |
+
Ian Bull <irbull@eclipsesource.com>
|
105 |
+
Ikko Ashimine <eltociear@gmail.com>
|
106 |
+
InconsolableCellist <23345188+InconsolableCellist@users.noreply.github.com>
|
107 |
+
Ismatulla Mansurov <47342870+sapoepsilon@users.noreply.github.com>
|
108 |
+
Ivan Gorin <ivangorin21@gmail.com>
|
109 |
+
JJ <103335846+computerscienceiscool@users.noreply.github.com>
|
110 |
+
Jack Mousseau <jmousseau@users.noreply.github.com>
|
111 |
+
JacobLinCool <jacoblincool@gmail.com>
|
112 |
+
Jakub Ráček <blizzcz@gmail.com>
|
113 |
+
Jared Van Bortel <jared@nomic.ai>
|
114 |
+
Jay Binks <jaybinks@gmail.com>
|
115 |
+
Jhen-Jie Hong <developer@jhen.me>
|
116 |
+
Jhen-Jie Hong <iainst0409@gmail.com>
|
117 |
+
JidongZhang-THU <1119708529@qq.com>
|
118 |
+
Jo Liss <joliss42@gmail.com>
|
119 |
+
Johan <jr.raffin@gmail.com>
|
120 |
+
Johannes Gäßler <johannesg@5d6.de>
|
121 |
+
John Balis <phobossystems@gmail.com>
|
122 |
+
Jonathan Soo <jcsoo@agora.com>
|
123 |
+
Jonno <1160532+razodactyl@users.noreply.github.com>
|
124 |
+
Joonas Pihlajamaa <joonas.pihlajamaa@iki.fi>
|
125 |
+
Jose <34888496+Jerry-Master@users.noreply.github.com>
|
126 |
+
Josh Bleecher Snyder <josharian@gmail.com>
|
127 |
+
Judd <foldl@users.noreply.github.com>
|
128 |
+
Jumper775 <78500318+jumpers775@users.noreply.github.com>
|
129 |
+
Justine Tunney <jtunney@gmail.com>
|
130 |
+
KP Kaiser <kirk@zothcorp.com>
|
131 |
+
Kamilake <exjang0@gmail.com>
|
132 |
+
Kartik Saranathan <278928+Kartiku@users.noreply.github.com>
|
133 |
+
Kasumi <90275229+kasumi-1@users.noreply.github.com>
|
134 |
+
Kawrakow <48489457+ikawrakow@users.noreply.github.com>
|
135 |
+
Kevin Brothaler <admin@digipom.com>
|
136 |
+
Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
|
137 |
+
Kreijstal <rainb@tfwno.gf>
|
138 |
+
Kylin <56434533+KyL0N@users.noreply.github.com>
|
139 |
+
LBlue <153975653+lbluep@users.noreply.github.com>
|
140 |
+
Larry Battle <larry.battle.tech@gmail.com>
|
141 |
+
Laytan Laats <laytanlaats@hotmail.com>
|
142 |
+
Leo Moll <leo.moll@yeasoft.com>
|
143 |
+
Lexevolution <31176843+Lexevolution@users.noreply.github.com>
|
144 |
+
LittleLoli <26589867+WhichWho@users.noreply.github.com>
|
145 |
+
Lucas Zanek <57494138+LucasZNK@users.noreply.github.com>
|
146 |
+
Luis Herrera <herrera-luis@users.noreply.github.com>
|
147 |
+
Lukas Rist <glaslos@gmail.com>
|
148 |
+
M. A. Ali <73258591+MightyStud@users.noreply.github.com>
|
149 |
+
M. Eren Akbiyik <erenakbiyik@gmail.com>
|
150 |
+
Maciek <maciek.mab122@gmail.com>
|
151 |
+
Marcin Mielniczuk <marmistrz.dev@zoho.eu>
|
152 |
+
Martin Warnaar <martinwarnaar@gmail.com>
|
153 |
+
Matheus de Sousa <23645013+keyehzy@users.noreply.github.com>
|
154 |
+
Mathijs de Bruin <mathijs@mathijsfietst.nl>
|
155 |
+
Matija Pevec <mightymatth@users.noreply.github.com>
|
156 |
+
Maximiliano Levi <8160966+maxilevi@users.noreply.github.com>
|
157 |
+
Meng, Hengyu <hengyu.meng@intel.com>
|
158 |
+
Michael Podvitskiy <podvitskiymichael@gmail.com>
|
159 |
+
Michael Rienstra <mrienstra@gmail.com>
|
160 |
+
Mikhail Grigorev <sleuthhound@gmail.com>
|
161 |
+
Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
|
162 |
+
Mohit Agarwal <mohit@sdf.org>
|
163 |
+
Murilo Santana <mvrilo@gmail.com>
|
164 |
+
Neil Chudleigh <nchudleigh@users.noreply.github.com>
|
165 |
+
Neo Zhang Jianyu <jianyu.zhang@intel.com>
|
166 |
+
Neuman Vong <neuman.vong@gmail.com>
|
167 |
+
Nicholas Albion <nalbion@yahoo.com>
|
168 |
+
Niels Mayer <Niels.Mayer@gmail.com>
|
169 |
+
Okabintaro <103938900+Okabintaro@users.noreply.github.com>
|
170 |
+
Oleg Sidorov <me@whitebox.io>
|
171 |
+
Oleg Sidorov <oleg@sidorov.nl>
|
172 |
+
Ondrej Kokes <ondrej.kokes@gmail.com>
|
173 |
+
Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
|
174 |
+
Paul Tsochantaris <ptsochantaris@icloud.com>
|
175 |
+
Philipp Zabel <philipp.zabel@gmail.com>
|
176 |
+
Philippe Normand <phil@base-art.net>
|
177 |
+
Przemysław Pawełczyk <przemoc@gmail.com>
|
178 |
+
Qianhe Chen <54462604+chenqianhe@users.noreply.github.com>
|
179 |
+
Radosław Gryta <radek.gryta@gmail.com>
|
180 |
+
Reinforce-II <fate@eastal.com>
|
181 |
+
Reinis Muiznieks <muiznieks.reinis@gmail.com>
|
182 |
+
RelatedTitle <r3latedtitle@gmail.com>
|
183 |
+
RhinoDevel <RhinoDevel@users.noreply.github.com>
|
184 |
+
Rich Jones <miserlou@gmail.com>
|
185 |
+
Robin <robin.xw@hotmail.com>
|
186 |
+
Roddur Dasgupta <roddurd@gmail.com>
|
187 |
+
Roland Rabien <figbug@gmail.com>
|
188 |
+
Rotem Dan <rotemdan@gmail.com>
|
189 |
+
Ryan Hitchman <hitchmanr@gmail.com>
|
190 |
+
Ryan Metcalfe <107415876+RyanMetcalfeInt8@users.noreply.github.com>
|
191 |
+
RyanChang <ftes90015@gmail.com>
|
192 |
+
Sam <49637763+Onlyartist9@users.noreply.github.com>
|
193 |
+
Sam Pullara <spullara@gmail.com>
|
194 |
+
Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>
|
195 |
+
Sergio López <slp@sinrega.org>
|
196 |
+
Siddharth Ramakrishnan <srr2141@columbia.edu>
|
197 |
+
Simon Moisselin <simon.moisstoll@gmail.com>
|
198 |
+
Sindre Sorhus <sindresorhus@gmail.com>
|
199 |
+
Slava Primenko <primenko.s@gmail.com>
|
200 |
+
Syahmi Azhar <prsyahmi@gmail.com>
|
201 |
+
Syed Jafri <syedjafri97@gmail.com>
|
202 |
+
Sơn Phan Trung <phantrungson17@gmail.com>
|
203 |
+
Taisei Mima <bhbstar.me@gmail.com>
|
204 |
+
Takeshi Inoue <inoue.takeshi@gmail.com>
|
205 |
+
Tamotsu Takahashi <ttakah+github@gmail.com>
|
206 |
+
Taras Glek <taras@thegp.com>
|
207 |
+
Tauseef Mohiuddin <35351464+tauseefmohammed2@users.noreply.github.com>
|
208 |
+
Thijs Raymakers <thijs@raymakers.nl>
|
209 |
+
Thomas Fitzsimmons <fitzsim@fitzsim.org>
|
210 |
+
Tiago Fassoni <tiagofassoni@users.noreply.github.com>
|
211 |
+
Tienshiao Ma <tienshiao@tienshiao.org>
|
212 |
+
Timothy Cronin <40186632+4imothy@users.noreply.github.com>
|
213 |
+
Tobrun <tobrun.van.nuland@gmail.com>
|
214 |
+
Todd <taf2@users.noreply.github.com>
|
215 |
+
Tong Li <31761981+litongjava@users.noreply.github.com>
|
216 |
+
Topping1 <78745143+Topping1@users.noreply.github.com>
|
217 |
+
Travis Cline <travis.cline@gmail.com>
|
218 |
+
UEXTM.com <84163508+uextm@users.noreply.github.com>
|
219 |
+
Vadim Peretokin <vperetokin@hey.com>
|
220 |
+
Valentin Gosu <1454649+valenting@users.noreply.github.com>
|
221 |
+
Vulcan <93451215+trholding@users.noreply.github.com>
|
222 |
+
WhiteOlivierus <36532695+WhiteOlivierus@users.noreply.github.com>
|
223 |
+
Xiang (Kevin) Li <kevinli020508@gmail.com>
|
224 |
+
Xiao-Yong Jin <jinxiaoyong@gmail.com>
|
225 |
+
XiaotaoChen <chenxiaotao1234@gmail.com>
|
226 |
+
Yajing Tang <phillis@google.com>
|
227 |
+
Yang Shen <aplshenyang@gmail.com>
|
228 |
+
Yunès <jean.baptiste.yunes@free.fr>
|
229 |
+
ZaBlazzingZephyrus <119159668+blazingzephyr@users.noreply.github.com>
|
230 |
+
Zigfrid Zvezdin <ziggerZZ@gmail.com>
|
231 |
+
Zollner <24618122+Zolliner@users.noreply.github.com>
|
232 |
+
ai-at-home <149282006+ai-at-home@users.noreply.github.com>
|
233 |
+
alonfaraj <alonfaraj@gmail.com>
|
234 |
+
andypayne <apayne@gmail.com>
|
235 |
+
ardfork <134447697+ardfork@users.noreply.github.com>
|
236 |
+
automaticcat <daogiatuank54@gmail.com>
|
237 |
+
be-next <jerome.ramette@gmail.com>
|
238 |
+
bert hubert <bert@hubertnet.nl>
|
239 |
+
bmwl <brian.marshall@tolko.com>
|
240 |
+
bobqianic <129547291+bobqianic@users.noreply.github.com>
|
241 |
+
bocytko <bocytko+github@gmail.com>
|
242 |
+
boolemancer <48014766+boolemancer@users.noreply.github.com>
|
243 |
+
boolemancer <boolemancer@gmail.com>
|
244 |
+
bradmit <151883577+bradmit@users.noreply.github.com>
|
245 |
+
brunofaustino <b.fa.amorim@gmail.com>
|
246 |
+
bssrdf <merlintiger@hotmail.com>
|
247 |
+
byte-6174 <88070277+byte-6174@users.noreply.github.com>
|
248 |
+
cdosoftei <ciprian.dosoftei@gmail.com>
|
249 |
+
clach04 <Chris.Clark@actian.com>
|
250 |
+
compilade <113953597+compilade@users.noreply.github.com>
|
251 |
+
conradg <conradjgodfrey@gmail.com>
|
252 |
+
ddpasa <112642920+ddpasa@users.noreply.github.com>
|
253 |
+
denersc <denerstassun@gmail.com>
|
254 |
+
dscripka <dscripka@users.noreply.github.com>
|
255 |
+
duthils <duthils@duthils.net>
|
256 |
+
ecneladis <ecneladis@users.noreply.github.com>
|
257 |
+
faker <nspyia2002@gmail.com>
|
258 |
+
fitzsim <fitzsim@fitzsim.org>
|
259 |
+
fraxy-v <65565042+fraxy-v@users.noreply.github.com>
|
260 |
+
genevera (she/her) <genevera@users.noreply.github.com>
|
261 |
+
geniusnut <geniusnut@gmail.com>
|
262 |
+
greeshmay <greeshmay@gmail.com>
|
263 |
+
hydai <z54981220@gmail.com>
|
264 |
+
iamthad <thadeus.j.fleming@gmail.com>
|
265 |
+
james wolf <contractorwolf@hotmail.com>
|
266 |
+
joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
|
267 |
+
jorismertz <35079666+jorismertz@users.noreply.github.com>
|
268 |
+
junkfood <69683722+JunkFood02@users.noreply.github.com>
|
269 |
+
jwijffels <jwijffels@bnosac.be>
|
270 |
+
kamranjon <kamranjon@gmail.com>
|
271 |
+
katsu560 <katsu560oo-@docomo.ne.jp>
|
272 |
+
kennethge <57784063+kenneth-ge@users.noreply.github.com>
|
273 |
+
keyehzy <msamuel@aluno.puc-rio.br>
|
274 |
+
leejet <leejet714@gmail.com>
|
275 |
+
litong <31761981+litongjava@users.noreply.github.com>
|
276 |
+
lnyan <lkwq007@gmail.com>
|
277 |
+
m.bell <m.bell@techsmith.com>
|
278 |
+
mkiol <mkiol@users.noreply.github.com>
|
279 |
+
novag <7754358+novag@users.noreply.github.com>
|
280 |
+
pajowu <pajowu@pajowu.de>
|
281 |
+
polarmoon <90010972+polarmoon@users.noreply.github.com>
|
282 |
+
rlapray <lapray.romain@gmail.com>
|
283 |
+
sandrohanea <40202887+sandrohanea@users.noreply.github.com>
|
284 |
+
semiformal-net <84111142+semiformal-net@users.noreply.github.com>
|
285 |
+
shibukazu <61775791+shibukazu@users.noreply.github.com>
|
286 |
+
shikokuchuo <53399081+shikokuchuo@users.noreply.github.com>
|
287 |
+
slaren <slarengh@gmail.com>
|
288 |
+
slashlib <slashlib@users.noreply.github.com>
|
289 |
+
snadampal <87143774+snadampal@users.noreply.github.com>
|
290 |
+
st-gr <38470677+st-gr@users.noreply.github.com>
|
291 |
+
texmex76 <40733439+texmex76@users.noreply.github.com>
|
292 |
+
thefinaldegree <thefinaldegree@gmail.com>
|
293 |
+
trixirt <trix@redhat.com>
|
294 |
+
ulatekh <ulatekh@yahoo.com>
|
295 |
+
undef <undefdev@gmail.com>
|
296 |
+
venkr <venkateshrameshkumar+1@gmail.com>
|
297 |
+
vicalloy <zbirder@gmail.com>
|
298 |
+
xdrudis <xavierdrudis@yahoo.es>
|
299 |
+
zhouwg <6889919+zhouwg@users.noreply.github.com>
|
300 |
+
布客飞龙 <562826179@qq.com>
|
301 |
+
Артём Земляк <azemlyak@smart-consulting.ru>
|
CMakeLists.txt
ADDED
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
|
2 |
+
project("whisper.cpp" C CXX)
|
3 |
+
project("whisper.cpp" VERSION 1.6.2)
|
4 |
+
include(CheckIncludeFileCXX)
|
5 |
+
|
6 |
+
set(SOVERSION 1)
|
7 |
+
|
8 |
+
#set(CMAKE_WARN_DEPRECATED YES)
|
9 |
+
set(CMAKE_WARN_UNUSED_CLI YES)
|
10 |
+
|
11 |
+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
12 |
+
|
13 |
+
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
14 |
+
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
15 |
+
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
16 |
+
endif()
|
17 |
+
|
18 |
+
# Add path to modules
|
19 |
+
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
20 |
+
|
21 |
+
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
22 |
+
|
23 |
+
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
24 |
+
set(WHISPER_STANDALONE ON)
|
25 |
+
|
26 |
+
include(git-vars)
|
27 |
+
|
28 |
+
# configure project version
|
29 |
+
configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
|
30 |
+
else()
|
31 |
+
set(WHISPER_STANDALONE OFF)
|
32 |
+
endif()
|
33 |
+
|
34 |
+
if (EMSCRIPTEN)
|
35 |
+
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
36 |
+
|
37 |
+
option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
|
38 |
+
|
39 |
+
# TODO: without these, we get the following error:
|
40 |
+
# wasm-ld: error: --shared-memory is disallowed by whisper.cpp.o because it was not compiled with 'atomics' or 'bulk-memory' features.
|
41 |
+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -s TOTAL_STACK=5242880")
|
42 |
+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
|
43 |
+
else()
|
44 |
+
if (MINGW)
|
45 |
+
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
46 |
+
else()
|
47 |
+
set(BUILD_SHARED_LIBS_DEFAULT ON)
|
48 |
+
endif()
|
49 |
+
endif()
|
50 |
+
|
51 |
+
option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
52 |
+
|
53 |
+
#
|
54 |
+
# option list
|
55 |
+
#
|
56 |
+
|
57 |
+
# general
|
58 |
+
option(WHISPER_CCACHE "whisper: use ccache if available" ON)
|
59 |
+
|
60 |
+
# debug
|
61 |
+
option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
|
62 |
+
option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
|
63 |
+
|
64 |
+
# build
|
65 |
+
option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)
|
66 |
+
|
67 |
+
# sanitizers
|
68 |
+
option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
|
69 |
+
option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
|
70 |
+
option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
|
71 |
+
|
72 |
+
# extra artifacts
|
73 |
+
option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
|
74 |
+
option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
|
75 |
+
option(WHISPER_BUILD_SERVER "whisper: build server example" ${WHISPER_STANDALONE})
|
76 |
+
|
77 |
+
# 3rd party libs
|
78 |
+
option(WHISPER_CURL "whisper: use libcurl to download model from an URL" OFF)
|
79 |
+
option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
|
80 |
+
|
81 |
+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
82 |
+
option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs (avcodec, swresample, ...)" OFF)
|
83 |
+
endif()
|
84 |
+
|
85 |
+
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
|
86 |
+
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
|
87 |
+
option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF)
|
88 |
+
|
89 |
+
# Required for relocatable CMake package
|
90 |
+
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
91 |
+
|
92 |
+
# override ggml options
|
93 |
+
set(GGML_CCACHE ${WHISPER_CCACHE})
|
94 |
+
set(GGML_SANITIZE_THREAD ${WHISPER_SANITIZE_THREAD})
|
95 |
+
set(GGML_SANITIZE_ADDRESS ${WHISPER_SANITIZE_ADDRESS})
|
96 |
+
set(GGML_SANITIZE_UNDEFINED ${WHISPER_SANITIZE_UNDEFINED})
|
97 |
+
set(GGML_ALL_WARNINGS ${WHISPER_ALL_WARNINGS})
|
98 |
+
set(GGML_FATAL_WARNINGS ${WHISPER_FATAL_WARNINGS})
|
99 |
+
|
100 |
+
# transition helpers
|
101 |
+
function (whisper_option_depr TYPE OLD NEW)
|
102 |
+
if (${OLD})
|
103 |
+
message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
|
104 |
+
set(${NEW} ON)
|
105 |
+
endif()
|
106 |
+
endfunction()
|
107 |
+
|
108 |
+
whisper_option_depr(FATAL_ERROR WHISPER_CUBLAS GGML_CUDA)
|
109 |
+
whisper_option_depr(WARNING WHISPER_CUDA GGML_CUDA)
|
110 |
+
whisper_option_depr(WARNING WHISPER_KOMPUTE GGML_KOMPUTE)
|
111 |
+
whisper_option_depr(WARNING WHISPER_METAL GGML_METAL)
|
112 |
+
whisper_option_depr(WARNING WHISPER_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
|
113 |
+
whisper_option_depr(WARNING WHISPER_NATIVE GGML_NATIVE)
|
114 |
+
whisper_option_depr(WARNING WHISPER_OPENMP GGML_OPENMP)
|
115 |
+
whisper_option_depr(WARNING WHISPER_RPC GGML_RPC)
|
116 |
+
whisper_option_depr(WARNING WHISPER_SYCL GGML_SYCL)
|
117 |
+
whisper_option_depr(WARNING WHISPER_SYCL_F16 GGML_SYCL_F16)
|
118 |
+
|
119 |
+
#
|
120 |
+
# build the library
|
121 |
+
#
|
122 |
+
|
123 |
+
if (NOT TARGET ggml)
|
124 |
+
add_subdirectory(ggml)
|
125 |
+
# ... otherwise assume ggml is added by a parent CMakeLists.txt
|
126 |
+
endif()
|
127 |
+
add_subdirectory(src)
|
128 |
+
|
129 |
+
#
|
130 |
+
# install
|
131 |
+
#
|
132 |
+
|
133 |
+
include(GNUInstallDirs)
|
134 |
+
include(CMakePackageConfigHelpers)
|
135 |
+
|
136 |
+
set(WHISPER_BUILD_NUMBER ${BUILD_NUMBER})
|
137 |
+
set(WHISPER_BUILD_COMMIT ${BUILD_COMMIT})
|
138 |
+
set(WHISPER_INSTALL_VERSION ${CMAKE_PROJECT_VERSION})
|
139 |
+
|
140 |
+
set(WHISPER_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
|
141 |
+
set(WHISPER_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
142 |
+
set(WHISPER_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
143 |
+
|
144 |
+
get_directory_property(WHISPER_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
|
145 |
+
|
146 |
+
set_target_properties(whisper PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/whisper.h)
|
147 |
+
install(TARGETS whisper LIBRARY PUBLIC_HEADER)
|
148 |
+
|
149 |
+
configure_package_config_file(
|
150 |
+
${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in
|
151 |
+
${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
|
152 |
+
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper
|
153 |
+
PATH_VARS
|
154 |
+
WHISPER_INCLUDE_INSTALL_DIR
|
155 |
+
WHISPER_LIB_INSTALL_DIR
|
156 |
+
WHISPER_BIN_INSTALL_DIR )
|
157 |
+
|
158 |
+
write_basic_package_version_file(
|
159 |
+
${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
|
160 |
+
VERSION ${WHISPER_INSTALL_VERSION}
|
161 |
+
COMPATIBILITY SameMajorVersion)
|
162 |
+
|
163 |
+
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
|
164 |
+
${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
|
165 |
+
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper)
|
166 |
+
|
167 |
+
configure_file(cmake/whisper.pc.in
|
168 |
+
"${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
169 |
+
@ONLY)
|
170 |
+
|
171 |
+
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
|
172 |
+
DESTINATION lib/pkgconfig)
|
173 |
+
|
174 |
+
#
|
175 |
+
# programs, examples and tests
|
176 |
+
#
|
177 |
+
|
178 |
+
if (WHISPER_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
179 |
+
#include(CTest)
|
180 |
+
#add_subdirectory(tests)
|
181 |
+
endif ()
|
182 |
+
|
183 |
+
if (WHISPER_BUILD_EXAMPLES)
|
184 |
+
add_subdirectory(examples)
|
185 |
+
endif()
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023-2024 The ggml authors
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
Makefile
CHANGED
@@ -971,7 +971,8 @@ $(LIB_WHISPER): \
|
|
971 |
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
972 |
|
973 |
$(LIB_WHISPER_S): \
|
974 |
-
$(OBJ_WHISPER)
|
|
|
975 |
ar rcs $(LIB_WHISPER_S) $^
|
976 |
|
977 |
# common
|
|
|
971 |
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
|
972 |
|
973 |
$(LIB_WHISPER_S): \
|
974 |
+
$(OBJ_WHISPER) \
|
975 |
+
$(OBJ_GGML)
|
976 |
ar rcs $(LIB_WHISPER_S) $^
|
977 |
|
978 |
# common
|
Package.swift
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// swift-tools-version:5.5
|
2 |
+
|
3 |
+
import PackageDescription
|
4 |
+
|
5 |
+
let package = Package(
|
6 |
+
name: "whisper",
|
7 |
+
platforms: [
|
8 |
+
.macOS(.v12),
|
9 |
+
.iOS(.v14),
|
10 |
+
.watchOS(.v4),
|
11 |
+
.tvOS(.v14)
|
12 |
+
],
|
13 |
+
products: [
|
14 |
+
.library(name: "whisper", targets: ["whisper"]),
|
15 |
+
],
|
16 |
+
targets: [
|
17 |
+
.target(
|
18 |
+
name: "whisper",
|
19 |
+
path: ".",
|
20 |
+
exclude: [
|
21 |
+
"bindings",
|
22 |
+
"cmake",
|
23 |
+
"coreml",
|
24 |
+
"examples",
|
25 |
+
"extra",
|
26 |
+
"models",
|
27 |
+
"samples",
|
28 |
+
"tests",
|
29 |
+
"CMakeLists.txt",
|
30 |
+
"Makefile"
|
31 |
+
],
|
32 |
+
sources: [
|
33 |
+
"ggml/src/ggml.c",
|
34 |
+
"src/whisper.cpp",
|
35 |
+
"ggml/src/ggml-aarch64.c",
|
36 |
+
"ggml/src/ggml-alloc.c",
|
37 |
+
"ggml/src/ggml-backend.c",
|
38 |
+
"ggml/src/ggml-quants.c",
|
39 |
+
"ggml/src/ggml-metal.m"
|
40 |
+
],
|
41 |
+
resources: [.process("ggml-metal.metal")],
|
42 |
+
publicHeadersPath: "spm-headers",
|
43 |
+
cSettings: [
|
44 |
+
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
|
45 |
+
.define("GGML_USE_ACCELERATE"),
|
46 |
+
.unsafeFlags(["-fno-objc-arc"]),
|
47 |
+
.define("GGML_USE_METAL")
|
48 |
+
// NOTE: NEW_LAPACK will required iOS version 16.4+
|
49 |
+
// We should consider add this in the future when we drop support for iOS 14
|
50 |
+
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
|
51 |
+
// .define("ACCELERATE_NEW_LAPACK"),
|
52 |
+
// .define("ACCELERATE_LAPACK_ILP64")
|
53 |
+
],
|
54 |
+
linkerSettings: [
|
55 |
+
.linkedFramework("Accelerate")
|
56 |
+
]
|
57 |
+
)
|
58 |
+
],
|
59 |
+
cxxLanguageStandard: .cxx11
|
60 |
+
)
|
README.md
CHANGED
@@ -1,13 +1,832 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
---
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# whisper.cpp
|
2 |
+
|
3 |
+
![whisper.cpp](https://user-images.githubusercontent.com/1991296/235238348-05d0f6a4-da44-4900-a1de-d0707e75b763.jpeg)
|
4 |
+
|
5 |
+
[![Actions Status](https://github.com/ggerganov/whisper.cpp/workflows/CI/badge.svg)](https://github.com/ggerganov/whisper.cpp/actions)
|
6 |
+
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
|
7 |
+
[![Conan Center](https://shields.io/conan/v/whisper-cpp)](https://conan.io/center/whisper-cpp)
|
8 |
+
[![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)
|
9 |
+
|
10 |
+
Stable: [v1.6.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.6.0) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
|
11 |
+
|
12 |
+
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
|
13 |
+
|
14 |
+
- Plain C/C++ implementation without dependencies
|
15 |
+
- Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](https://github.com/ggerganov/whisper.cpp#core-ml-support)
|
16 |
+
- AVX intrinsics support for x86 architectures
|
17 |
+
- VSX intrinsics support for POWER architectures
|
18 |
+
- Mixed F16 / F32 precision
|
19 |
+
- [4-bit and 5-bit integer quantization support](https://github.com/ggerganov/whisper.cpp#quantization)
|
20 |
+
- Zero memory allocations at runtime
|
21 |
+
- Support for CPU-only inference
|
22 |
+
- [Efficient GPU support for NVIDIA](https://github.com/ggerganov/whisper.cpp#nvidia-gpu-support-via-cublas)
|
23 |
+
- [OpenVINO Support](https://github.com/ggerganov/whisper.cpp#openvino-support)
|
24 |
+
- [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h)
|
25 |
+
|
26 |
+
Supported platforms:
|
27 |
+
|
28 |
+
- [x] Mac OS (Intel and Arm)
|
29 |
+
- [x] [iOS](examples/whisper.objc)
|
30 |
+
- [x] [Android](examples/whisper.android)
|
31 |
+
- [x] [Java](bindings/java/README.md)
|
32 |
+
- [x] Linux / [FreeBSD](https://github.com/ggerganov/whisper.cpp/issues/56#issuecomment-1350920264)
|
33 |
+
- [x] [WebAssembly](examples/whisper.wasm)
|
34 |
+
- [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
|
35 |
+
- [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
|
36 |
+
- [x] [Docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp)
|
37 |
+
|
38 |
+
The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp).
|
39 |
+
The rest of the code is part of the [`ggml`](https://github.com/ggerganov/ggml) machine learning library.
|
40 |
+
|
41 |
+
Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
|
42 |
+
As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc)
|
43 |
+
|
44 |
+
https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
|
45 |
+
|
46 |
+
You can also easily make your own offline voice assistant application: [command](examples/command)
|
47 |
+
|
48 |
+
https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
|
49 |
+
|
50 |
+
On Apple Silicon, the inference runs fully on the GPU via Metal:
|
51 |
+
|
52 |
+
https://github.com/ggerganov/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225
|
53 |
+
|
54 |
+
Or you can even run it straight in the browser: [talk.wasm](examples/talk.wasm)
|
55 |
+
|
56 |
+
## Implementation details
|
57 |
+
|
58 |
+
- The core tensor operations are implemented in C ([ggml.h](ggml/include/ggml.h) / [ggml.c](ggml/src/ggml.c))
|
59 |
+
- The transformer model and the high-level C-style API are implemented in C++ ([whisper.h](include/whisper.h) / [whisper.cpp](src/whisper.cpp))
|
60 |
+
- Sample usage is demonstrated in [main.cpp](examples/main)
|
61 |
+
- Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](examples/stream)
|
62 |
+
- Various other examples are available in the [examples](examples) folder
|
63 |
+
|
64 |
+
The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD intrinsics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products.
|
65 |
+
|
66 |
+
## Quick start
|
67 |
+
|
68 |
+
First clone the repository:
|
69 |
+
|
70 |
+
```bash
|
71 |
+
git clone https://github.com/ggerganov/whisper.cpp.git
|
72 |
+
```
|
73 |
+
|
74 |
+
Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example:
|
75 |
+
|
76 |
+
```bash
|
77 |
+
bash ./models/download-ggml-model.sh base.en
|
78 |
+
```
|
79 |
+
|
80 |
+
Now build the [main](examples/main) example and transcribe an audio file like this:
|
81 |
+
|
82 |
+
```bash
|
83 |
+
# build the main example
|
84 |
+
make
|
85 |
+
|
86 |
+
# transcribe an audio file
|
87 |
+
./main -f samples/jfk.wav
|
88 |
+
```
|
89 |
+
|
90 |
+
---
|
91 |
+
|
92 |
+
For a quick demo, simply run `make base.en`:
|
93 |
+
|
94 |
+
```text
|
95 |
+
$ make base.en
|
96 |
+
|
97 |
+
cc -I. -O3 -std=c11 -pthread -DGGML_USE_ACCELERATE -c ggml.c -o ggml.o
|
98 |
+
c++ -I. -I./examples -O3 -std=c++11 -pthread -c whisper.cpp -o whisper.o
|
99 |
+
c++ -I. -I./examples -O3 -std=c++11 -pthread examples/main/main.cpp whisper.o ggml.o -o main -framework Accelerate
|
100 |
+
./main -h
|
101 |
+
|
102 |
+
usage: ./main [options] file0.wav file1.wav ...
|
103 |
+
|
104 |
+
options:
|
105 |
+
-h, --help [default] show this help message and exit
|
106 |
+
-t N, --threads N [4 ] number of threads to use during computation
|
107 |
+
-p N, --processors N [1 ] number of processors to use during computation
|
108 |
+
-ot N, --offset-t N [0 ] time offset in milliseconds
|
109 |
+
-on N, --offset-n N [0 ] segment index offset
|
110 |
+
-d N, --duration N [0 ] duration of audio to process in milliseconds
|
111 |
+
-mc N, --max-context N [-1 ] maximum number of text context tokens to store
|
112 |
+
-ml N, --max-len N [0 ] maximum segment length in characters
|
113 |
+
-sow, --split-on-word [false ] split on word rather than on token
|
114 |
+
-bo N, --best-of N [5 ] number of best candidates to keep
|
115 |
+
-bs N, --beam-size N [5 ] beam size for beam search
|
116 |
+
-wt N, --word-thold N [0.01 ] word timestamp probability threshold
|
117 |
+
-et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
|
118 |
+
-lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
|
119 |
+
-debug, --debug-mode [false ] enable debug mode (eg. dump log_mel)
|
120 |
+
-tr, --translate [false ] translate from source language to english
|
121 |
+
-di, --diarize [false ] stereo audio diarization
|
122 |
+
-tdrz, --tinydiarize [false ] enable tinydiarize (requires a tdrz model)
|
123 |
+
-nf, --no-fallback [false ] do not use temperature fallback while decoding
|
124 |
+
-otxt, --output-txt [false ] output result in a text file
|
125 |
+
-ovtt, --output-vtt [false ] output result in a vtt file
|
126 |
+
-osrt, --output-srt [false ] output result in a srt file
|
127 |
+
-olrc, --output-lrc [false ] output result in a lrc file
|
128 |
+
-owts, --output-words [false ] output script for generating karaoke video
|
129 |
+
-fp, --font-path [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video
|
130 |
+
-ocsv, --output-csv [false ] output result in a CSV file
|
131 |
+
-oj, --output-json [false ] output result in a JSON file
|
132 |
+
-ojf, --output-json-full [false ] include more information in the JSON file
|
133 |
+
-of FNAME, --output-file FNAME [ ] output file path (without file extension)
|
134 |
+
-ps, --print-special [false ] print special tokens
|
135 |
+
-pc, --print-colors [false ] print colors
|
136 |
+
-pp, --print-progress [false ] print progress
|
137 |
+
-nt, --no-timestamps [false ] do not print timestamps
|
138 |
+
-l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)
|
139 |
+
-dl, --detect-language [false ] exit after automatically detecting language
|
140 |
+
--prompt PROMPT [ ] initial prompt
|
141 |
+
-m FNAME, --model FNAME [models/ggml-base.en.bin] model path
|
142 |
+
-f FNAME, --file FNAME [ ] input WAV file path
|
143 |
+
-oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
|
144 |
+
-ls, --log-score [false ] log best decoder scores of tokens
|
145 |
+
-ng, --no-gpu [false ] disable GPU
|
146 |
+
|
147 |
+
|
148 |
+
bash ./models/download-ggml-model.sh base.en
|
149 |
+
Downloading ggml model base.en ...
|
150 |
+
ggml-base.en.bin 100%[========================>] 141.11M 6.34MB/s in 24s
|
151 |
+
Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
|
152 |
+
You can now use it like this:
|
153 |
+
|
154 |
+
$ ./main -m models/ggml-base.en.bin -f samples/jfk.wav
|
155 |
+
|
156 |
+
|
157 |
+
===============================================
|
158 |
+
Running base.en on all samples in ./samples ...
|
159 |
+
===============================================
|
160 |
+
|
161 |
+
----------------------------------------------
|
162 |
+
[+] Running base.en on samples/jfk.wav ... (run 'ffplay samples/jfk.wav' to listen)
|
163 |
+
----------------------------------------------
|
164 |
+
|
165 |
+
whisper_init_from_file: loading model from 'models/ggml-base.en.bin'
|
166 |
+
whisper_model_load: loading model
|
167 |
+
whisper_model_load: n_vocab = 51864
|
168 |
+
whisper_model_load: n_audio_ctx = 1500
|
169 |
+
whisper_model_load: n_audio_state = 512
|
170 |
+
whisper_model_load: n_audio_head = 8
|
171 |
+
whisper_model_load: n_audio_layer = 6
|
172 |
+
whisper_model_load: n_text_ctx = 448
|
173 |
+
whisper_model_load: n_text_state = 512
|
174 |
+
whisper_model_load: n_text_head = 8
|
175 |
+
whisper_model_load: n_text_layer = 6
|
176 |
+
whisper_model_load: n_mels = 80
|
177 |
+
whisper_model_load: f16 = 1
|
178 |
+
whisper_model_load: type = 2
|
179 |
+
whisper_model_load: mem required = 215.00 MB (+ 6.00 MB per decoder)
|
180 |
+
whisper_model_load: kv self size = 5.25 MB
|
181 |
+
whisper_model_load: kv cross size = 17.58 MB
|
182 |
+
whisper_model_load: adding 1607 extra tokens
|
183 |
+
whisper_model_load: model ctx = 140.60 MB
|
184 |
+
whisper_model_load: model size = 140.54 MB
|
185 |
+
|
186 |
+
system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |
|
187 |
+
|
188 |
+
main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
|
189 |
+
|
190 |
+
|
191 |
+
[00:00:00.000 --> 00:00:11.000] And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
|
192 |
+
|
193 |
+
|
194 |
+
whisper_print_timings: fallbacks = 0 p / 0 h
|
195 |
+
whisper_print_timings: load time = 113.81 ms
|
196 |
+
whisper_print_timings: mel time = 15.40 ms
|
197 |
+
whisper_print_timings: sample time = 11.58 ms / 27 runs ( 0.43 ms per run)
|
198 |
+
whisper_print_timings: encode time = 266.60 ms / 1 runs ( 266.60 ms per run)
|
199 |
+
whisper_print_timings: decode time = 66.11 ms / 27 runs ( 2.45 ms per run)
|
200 |
+
whisper_print_timings: total time = 476.31 ms
|
201 |
+
```
|
202 |
+
|
203 |
+
The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.
|
204 |
+
|
205 |
+
For detailed usage instructions, run: `./main -h`
|
206 |
+
|
207 |
+
Note that the [main](examples/main) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
|
208 |
+
For example, you can use `ffmpeg` like this:
|
209 |
+
|
210 |
+
```bash
|
211 |
+
ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
|
212 |
+
```
|
213 |
+
|
214 |
+
## More audio samples
|
215 |
+
|
216 |
+
If you want some extra audio samples to play with, simply run:
|
217 |
+
|
218 |
+
```
|
219 |
+
make samples
|
220 |
+
```
|
221 |
+
|
222 |
+
This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`.
|
223 |
+
|
224 |
+
You can download and run the other models as follows:
|
225 |
+
|
226 |
+
```
|
227 |
+
make tiny.en
|
228 |
+
make tiny
|
229 |
+
make base.en
|
230 |
+
make base
|
231 |
+
make small.en
|
232 |
+
make small
|
233 |
+
make medium.en
|
234 |
+
make medium
|
235 |
+
make large-v1
|
236 |
+
make large-v2
|
237 |
+
make large-v3
|
238 |
+
```
|
239 |
+
|
240 |
+
## Memory usage
|
241 |
+
|
242 |
+
| Model | Disk | Mem |
|
243 |
+
| ------ | ------- | ------- |
|
244 |
+
| tiny | 75 MiB | ~273 MB |
|
245 |
+
| base | 142 MiB | ~388 MB |
|
246 |
+
| small | 466 MiB | ~852 MB |
|
247 |
+
| medium | 1.5 GiB | ~2.1 GB |
|
248 |
+
| large | 2.9 GiB | ~3.9 GB |
|
249 |
+
|
250 |
+
## Quantization
|
251 |
+
|
252 |
+
`whisper.cpp` supports integer quantization of the Whisper `ggml` models.
|
253 |
+
Quantized models require less memory and disk space and depending on the hardware can be processed more efficiently.
|
254 |
+
|
255 |
+
Here are the steps for creating and using a quantized model:
|
256 |
+
|
257 |
+
```bash
|
258 |
+
# quantize a model with Q5_0 method
|
259 |
+
make quantize
|
260 |
+
./quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0
|
261 |
+
|
262 |
+
# run the examples as usual, specifying the quantized model file
|
263 |
+
./main -m models/ggml-base.en-q5_0.bin ./samples/gb0.wav
|
264 |
+
```
|
265 |
+
|
266 |
+
## Core ML support
|
267 |
+
|
268 |
+
On Apple Silicon devices, the Encoder inference can be executed on the Apple Neural Engine (ANE) via Core ML. This can result in significant
|
269 |
+
speed-up - more than x3 faster compared with CPU-only execution. Here are the instructions for generating a Core ML model and using it with `whisper.cpp`:
|
270 |
+
|
271 |
+
- Install Python dependencies needed for the creation of the Core ML model:
|
272 |
+
|
273 |
+
```bash
|
274 |
+
pip install ane_transformers
|
275 |
+
pip install openai-whisper
|
276 |
+
pip install coremltools
|
277 |
+
```
|
278 |
+
|
279 |
+
- To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools.
|
280 |
+
- Python 3.10 is recommended.
|
281 |
+
- MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination.
|
282 |
+
- [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step:
|
283 |
+
- To create an environment, use: `conda create -n py310-whisper python=3.10 -y`
|
284 |
+
- To activate the environment, use: `conda activate py310-whisper`
|
285 |
+
|
286 |
+
- Generate a Core ML model. For example, to generate a `base.en` model, use:
|
287 |
+
|
288 |
+
```bash
|
289 |
+
./models/generate-coreml-model.sh base.en
|
290 |
+
```
|
291 |
+
|
292 |
+
This will generate the folder `models/ggml-base.en-encoder.mlmodelc`
|
293 |
+
|
294 |
+
- Build `whisper.cpp` with Core ML support:
|
295 |
+
|
296 |
+
```bash
|
297 |
+
# using Makefile
|
298 |
+
make clean
|
299 |
+
WHISPER_COREML=1 make -j
|
300 |
+
|
301 |
+
# using CMake
|
302 |
+
cmake -B build -DWHISPER_COREML=1
|
303 |
+
cmake --build build -j --config Release
|
304 |
+
```
|
305 |
+
|
306 |
+
- Run the examples as usual. For example:
|
307 |
+
|
308 |
+
```text
|
309 |
+
$ ./main -m models/ggml-base.en.bin -f samples/jfk.wav
|
310 |
+
|
311 |
+
...
|
312 |
+
|
313 |
+
whisper_init_state: loading Core ML model from 'models/ggml-base.en-encoder.mlmodelc'
|
314 |
+
whisper_init_state: first run on a device may take a while ...
|
315 |
+
whisper_init_state: Core ML model loaded
|
316 |
+
|
317 |
+
system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | COREML = 1 |
|
318 |
+
|
319 |
+
...
|
320 |
+
```
|
321 |
+
|
322 |
+
The first run on a device is slow, since the ANE service compiles the Core ML model to some device-specific format.
|
323 |
+
Next runs are faster.
|
324 |
+
|
325 |
+
For more information about the Core ML implementation please refer to PR [#566](https://github.com/ggerganov/whisper.cpp/pull/566).
|
326 |
+
|
327 |
+
## OpenVINO support
|
328 |
+
|
329 |
+
On platforms that support [OpenVINO](https://github.com/openvinotoolkit/openvino), the Encoder inference can be executed
|
330 |
+
on OpenVINO-supported devices including x86 CPUs and Intel GPUs (integrated & discrete).
|
331 |
+
|
332 |
+
This can result in significant speedup in encoder performance. Here are the instructions for generating the OpenVINO model and using it with `whisper.cpp`:
|
333 |
+
|
334 |
+
- First, setup python virtual env. and install python dependencies. Python 3.10 is recommended.
|
335 |
+
|
336 |
+
Windows:
|
337 |
+
|
338 |
+
```powershell
|
339 |
+
cd models
|
340 |
+
python -m venv openvino_conv_env
|
341 |
+
openvino_conv_env\Scripts\activate
|
342 |
+
python -m pip install --upgrade pip
|
343 |
+
pip install -r requirements-openvino.txt
|
344 |
+
```
|
345 |
+
|
346 |
+
Linux and macOS:
|
347 |
+
|
348 |
+
```bash
|
349 |
+
cd models
|
350 |
+
python3 -m venv openvino_conv_env
|
351 |
+
source openvino_conv_env/bin/activate
|
352 |
+
python -m pip install --upgrade pip
|
353 |
+
pip install -r requirements-openvino.txt
|
354 |
+
```
|
355 |
+
|
356 |
+
- Generate an OpenVINO encoder model. For example, to generate a `base.en` model, use:
|
357 |
+
|
358 |
+
```
|
359 |
+
python convert-whisper-to-openvino.py --model base.en
|
360 |
+
```
|
361 |
+
|
362 |
+
This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as `ggml` models, as that
|
363 |
+
is the default location that the OpenVINO extension will search at runtime.
|
364 |
+
|
365 |
+
- Build `whisper.cpp` with OpenVINO support:
|
366 |
+
|
367 |
+
Download OpenVINO package from [release page](https://github.com/openvinotoolkit/openvino/releases). The recommended version to use is [2023.0.0](https://github.com/openvinotoolkit/openvino/releases/tag/2023.0.0).
|
368 |
+
|
369 |
+
After downloading & extracting package onto your development system, set up required environment by sourcing setupvars script. For example:
|
370 |
+
|
371 |
+
Linux:
|
372 |
+
|
373 |
+
```bash
|
374 |
+
source /path/to/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/setupvars.sh
|
375 |
+
```
|
376 |
+
|
377 |
+
Windows (cmd):
|
378 |
+
|
379 |
+
```powershell
|
380 |
+
C:\Path\To\w_openvino_toolkit_windows_2023.0.0.10926.b4452d56304_x86_64\setupvars.bat
|
381 |
+
```
|
382 |
+
|
383 |
+
And then build the project using cmake:
|
384 |
+
|
385 |
+
```bash
|
386 |
+
cmake -B build -DWHISPER_OPENVINO=1
|
387 |
+
cmake --build build -j --config Release
|
388 |
+
```
|
389 |
+
|
390 |
+
- Run the examples as usual. For example:
|
391 |
+
|
392 |
+
```text
|
393 |
+
$ ./main -m models/ggml-base.en.bin -f samples/jfk.wav
|
394 |
+
|
395 |
+
...
|
396 |
+
|
397 |
+
whisper_ctx_init_openvino_encoder: loading OpenVINO model from 'models/ggml-base.en-encoder-openvino.xml'
|
398 |
+
whisper_ctx_init_openvino_encoder: first run on a device may take a while ...
|
399 |
+
whisper_openvino_init: path_model = models/ggml-base.en-encoder-openvino.xml, device = GPU, cache_dir = models/ggml-base.en-encoder-openvino-cache
|
400 |
+
whisper_ctx_init_openvino_encoder: OpenVINO model loaded
|
401 |
+
|
402 |
+
system_info: n_threads = 4 / 8 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | OPENVINO = 1 |
|
403 |
+
|
404 |
+
...
|
405 |
+
```
|
406 |
+
|
407 |
+
The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get
|
408 |
+
cached for the next run.
|
409 |
+
|
410 |
+
For more information about the Core ML implementation please refer to PR [#1037](https://github.com/ggerganov/whisper.cpp/pull/1037).
|
411 |
+
|
412 |
+
## NVIDIA GPU support
|
413 |
+
|
414 |
+
With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels.
|
415 |
+
First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads
|
416 |
+
|
417 |
+
Now build `whisper.cpp` with CUDA support:
|
418 |
+
|
419 |
+
```
|
420 |
+
make clean
|
421 |
+
GGML_CUDA=1 make -j
|
422 |
+
```
|
423 |
+
|
424 |
+
## BLAS CPU support via OpenBLAS
|
425 |
+
|
426 |
+
Encoder processing can be accelerated on the CPU via OpenBLAS.
|
427 |
+
First, make sure you have installed `openblas`: https://www.openblas.net/
|
428 |
+
|
429 |
+
Now build `whisper.cpp` with OpenBLAS support:
|
430 |
+
|
431 |
+
```
|
432 |
+
make clean
|
433 |
+
GGML_OPENBLAS=1 make -j
|
434 |
+
```
|
435 |
+
|
436 |
+
## BLAS CPU support via Intel MKL
|
437 |
+
|
438 |
+
Encoder processing can be accelerated on the CPU via the BLAS compatible interface of Intel's Math Kernel Library.
|
439 |
+
First, make sure you have installed Intel's MKL runtime and development packages: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-download.html
|
440 |
+
|
441 |
+
Now build `whisper.cpp` with Intel MKL BLAS support:
|
442 |
+
|
443 |
+
```
|
444 |
+
source /opt/intel/oneapi/setvars.sh
|
445 |
+
mkdir build
|
446 |
+
cd build
|
447 |
+
cmake -DWHISPER_MKL=ON ..
|
448 |
+
WHISPER_MKL=1 make -j
|
449 |
+
```
|
450 |
+
|
451 |
+
## Docker
|
452 |
+
|
453 |
+
### Prerequisites
|
454 |
+
|
455 |
+
- Docker must be installed and running on your system.
|
456 |
+
- Create a folder to store big models & intermediate files (ex. /whisper/models)
|
457 |
+
|
458 |
+
### Images
|
459 |
+
|
460 |
+
We have two Docker images available for this project:
|
461 |
+
|
462 |
+
1. `ghcr.io/ggerganov/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
|
463 |
+
2. `ghcr.io/ggerganov/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
|
464 |
+
|
465 |
+
### Usage
|
466 |
+
|
467 |
+
```shell
|
468 |
+
# download model and persist it in a local folder
|
469 |
+
docker run -it --rm \
|
470 |
+
-v path/to/models:/models \
|
471 |
+
whisper.cpp:main "./models/download-ggml-model.sh base /models"
|
472 |
+
# transcribe an audio file
|
473 |
+
docker run -it --rm \
|
474 |
+
-v path/to/models:/models \
|
475 |
+
-v path/to/audios:/audios \
|
476 |
+
whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
|
477 |
+
# transcribe an audio file in samples folder
|
478 |
+
docker run -it --rm \
|
479 |
+
-v path/to/models:/models \
|
480 |
+
whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
|
481 |
+
```
|
482 |
+
|
483 |
+
## Installing with Conan
|
484 |
+
|
485 |
+
You can install pre-built binaries for whisper.cpp or build it from source using [Conan](https://conan.io/). Use the following command:
|
486 |
+
|
487 |
+
```
|
488 |
+
conan install --requires="whisper-cpp/[*]" --build=missing
|
489 |
+
```
|
490 |
+
|
491 |
+
For detailed instructions on how to use Conan, please refer to the [Conan documentation](https://docs.conan.io/2/).
|
492 |
+
|
493 |
+
## Limitations
|
494 |
+
|
495 |
+
- Inference only
|
496 |
+
|
497 |
+
## Another example
|
498 |
+
|
499 |
+
Here is another example of transcribing a [3:24 min speech](https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg)
|
500 |
+
in about half a minute on a MacBook M1 Pro, using `medium.en` model:
|
501 |
+
|
502 |
+
<details>
|
503 |
+
<summary>Expand to see the result</summary>
|
504 |
+
|
505 |
+
```text
|
506 |
+
$ ./main -m models/ggml-medium.en.bin -f samples/gb1.wav -t 8
|
507 |
+
|
508 |
+
whisper_init_from_file: loading model from 'models/ggml-medium.en.bin'
|
509 |
+
whisper_model_load: loading model
|
510 |
+
whisper_model_load: n_vocab = 51864
|
511 |
+
whisper_model_load: n_audio_ctx = 1500
|
512 |
+
whisper_model_load: n_audio_state = 1024
|
513 |
+
whisper_model_load: n_audio_head = 16
|
514 |
+
whisper_model_load: n_audio_layer = 24
|
515 |
+
whisper_model_load: n_text_ctx = 448
|
516 |
+
whisper_model_load: n_text_state = 1024
|
517 |
+
whisper_model_load: n_text_head = 16
|
518 |
+
whisper_model_load: n_text_layer = 24
|
519 |
+
whisper_model_load: n_mels = 80
|
520 |
+
whisper_model_load: f16 = 1
|
521 |
+
whisper_model_load: type = 4
|
522 |
+
whisper_model_load: mem required = 1720.00 MB (+ 43.00 MB per decoder)
|
523 |
+
whisper_model_load: kv self size = 42.00 MB
|
524 |
+
whisper_model_load: kv cross size = 140.62 MB
|
525 |
+
whisper_model_load: adding 1607 extra tokens
|
526 |
+
whisper_model_load: model ctx = 1462.35 MB
|
527 |
+
whisper_model_load: model size = 1462.12 MB
|
528 |
+
|
529 |
+
system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |
|
530 |
+
|
531 |
+
main: processing 'samples/gb1.wav' (3179750 samples, 198.7 sec), 8 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
|
532 |
+
|
533 |
+
|
534 |
+
[00:00:00.000 --> 00:00:08.000] My fellow Americans, this day has brought terrible news and great sadness to our country.
|
535 |
+
[00:00:08.000 --> 00:00:17.000] At nine o'clock this morning, Mission Control in Houston lost contact with our Space Shuttle Columbia.
|
536 |
+
[00:00:17.000 --> 00:00:23.000] A short time later, debris was seen falling from the skies above Texas.
|
537 |
+
[00:00:23.000 --> 00:00:29.000] The Columbia's lost. There are no survivors.
|
538 |
+
[00:00:29.000 --> 00:00:32.000] On board was a crew of seven.
|
539 |
+
[00:00:32.000 --> 00:00:39.000] Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark,
|
540 |
+
[00:00:39.000 --> 00:00:48.000] Captain David Brown, Commander William McCool, Dr. Kultna Shavla, and Ilan Ramon,
|
541 |
+
[00:00:48.000 --> 00:00:52.000] a colonel in the Israeli Air Force.
|
542 |
+
[00:00:52.000 --> 00:00:58.000] These men and women assumed great risk in the service to all humanity.
|
543 |
+
[00:00:58.000 --> 00:01:03.000] In an age when space flight has come to seem almost routine,
|
544 |
+
[00:01:03.000 --> 00:01:07.000] it is easy to overlook the dangers of travel by rocket
|
545 |
+
[00:01:07.000 --> 00:01:12.000] and the difficulties of navigating the fierce outer atmosphere of the Earth.
|
546 |
+
[00:01:12.000 --> 00:01:18.000] These astronauts knew the dangers, and they faced them willingly,
|
547 |
+
[00:01:18.000 --> 00:01:23.000] knowing they had a high and noble purpose in life.
|
548 |
+
[00:01:23.000 --> 00:01:31.000] Because of their courage and daring and idealism, we will miss them all the more.
|
549 |
+
[00:01:31.000 --> 00:01:36.000] All Americans today are thinking as well of the families of these men and women
|
550 |
+
[00:01:36.000 --> 00:01:40.000] who have been given this sudden shock and grief.
|
551 |
+
[00:01:40.000 --> 00:01:45.000] You're not alone. Our entire nation grieves with you,
|
552 |
+
[00:01:45.000 --> 00:01:52.000] and those you love will always have the respect and gratitude of this country.
|
553 |
+
[00:01:52.000 --> 00:01:56.000] The cause in which they died will continue.
|
554 |
+
[00:01:56.000 --> 00:02:04.000] Mankind is led into the darkness beyond our world by the inspiration of discovery
|
555 |
+
[00:02:04.000 --> 00:02:11.000] and the longing to understand. Our journey into space will go on.
|
556 |
+
[00:02:11.000 --> 00:02:16.000] In the skies today, we saw destruction and tragedy.
|
557 |
+
[00:02:16.000 --> 00:02:22.000] Yet farther than we can see, there is comfort and hope.
|
558 |
+
[00:02:22.000 --> 00:02:29.000] In the words of the prophet Isaiah, "Lift your eyes and look to the heavens
|
559 |
+
[00:02:29.000 --> 00:02:35.000] who created all these. He who brings out the starry hosts one by one
|
560 |
+
[00:02:35.000 --> 00:02:39.000] and calls them each by name."
|
561 |
+
[00:02:39.000 --> 00:02:46.000] Because of His great power and mighty strength, not one of them is missing.
|
562 |
+
[00:02:46.000 --> 00:02:55.000] The same Creator who names the stars also knows the names of the seven souls we mourn today.
|
563 |
+
[00:02:55.000 --> 00:03:01.000] The crew of the shuttle Columbia did not return safely to earth,
|
564 |
+
[00:03:01.000 --> 00:03:05.000] yet we can pray that all are safely home.
|
565 |
+
[00:03:05.000 --> 00:03:13.000] May God bless the grieving families, and may God continue to bless America.
|
566 |
+
[00:03:13.000 --> 00:03:19.000] [Silence]
|
567 |
+
|
568 |
+
|
569 |
+
whisper_print_timings: fallbacks = 1 p / 0 h
|
570 |
+
whisper_print_timings: load time = 569.03 ms
|
571 |
+
whisper_print_timings: mel time = 146.85 ms
|
572 |
+
whisper_print_timings: sample time = 238.66 ms / 553 runs ( 0.43 ms per run)
|
573 |
+
whisper_print_timings: encode time = 18665.10 ms / 9 runs ( 2073.90 ms per run)
|
574 |
+
whisper_print_timings: decode time = 13090.93 ms / 549 runs ( 23.85 ms per run)
|
575 |
+
whisper_print_timings: total time = 32733.52 ms
|
576 |
+
```
|
577 |
+
|
578 |
+
</details>
|
579 |
+
|
580 |
+
## Real-time audio input example
|
581 |
+
|
582 |
+
This is a naive example of performing real-time inference on audio from your microphone.
|
583 |
+
The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously.
|
584 |
+
More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
|
585 |
+
|
586 |
+
```bash
|
587 |
+
make stream
|
588 |
+
./stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
|
589 |
+
```
|
590 |
+
|
591 |
+
https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
|
592 |
+
|
593 |
+
## Confidence color-coding
|
594 |
+
|
595 |
+
Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy
|
596 |
+
to highlight words with high or low confidence:
|
597 |
+
|
598 |
+
```bash
|
599 |
+
./main -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors
|
600 |
+
```
|
601 |
+
|
602 |
+
<img width="965" alt="image" src="https://user-images.githubusercontent.com/1991296/197356445-311c8643-9397-4e5e-b46e-0b4b4daa2530.png">
|
603 |
+
|
604 |
+
## Controlling the length of the generated text segments (experimental)
|
605 |
+
|
606 |
+
For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`:
|
607 |
+
|
608 |
+
```text
|
609 |
+
$ ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
|
610 |
+
|
611 |
+
whisper_model_load: loading model from './models/ggml-base.en.bin'
|
612 |
+
...
|
613 |
+
system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
|
614 |
+
|
615 |
+
main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
|
616 |
+
|
617 |
+
[00:00:00.000 --> 00:00:00.850] And so my
|
618 |
+
[00:00:00.850 --> 00:00:01.590] fellow
|
619 |
+
[00:00:01.590 --> 00:00:04.140] Americans, ask
|
620 |
+
[00:00:04.140 --> 00:00:05.660] not what your
|
621 |
+
[00:00:05.660 --> 00:00:06.840] country can do
|
622 |
+
[00:00:06.840 --> 00:00:08.430] for you, ask
|
623 |
+
[00:00:08.430 --> 00:00:09.440] what you can do
|
624 |
+
[00:00:09.440 --> 00:00:10.020] for your
|
625 |
+
[00:00:10.020 --> 00:00:11.000] country.
|
626 |
+
```
|
627 |
+
|
628 |
+
## Word-level timestamp (experimental)
|
629 |
+
|
630 |
+
The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`:
|
631 |
+
|
632 |
+
```text
|
633 |
+
$ ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
|
634 |
+
|
635 |
+
whisper_model_load: loading model from './models/ggml-base.en.bin'
|
636 |
+
...
|
637 |
+
system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
|
638 |
+
|
639 |
+
main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
|
640 |
+
|
641 |
+
[00:00:00.000 --> 00:00:00.320]
|
642 |
+
[00:00:00.320 --> 00:00:00.370] And
|
643 |
+
[00:00:00.370 --> 00:00:00.690] so
|
644 |
+
[00:00:00.690 --> 00:00:00.850] my
|
645 |
+
[00:00:00.850 --> 00:00:01.590] fellow
|
646 |
+
[00:00:01.590 --> 00:00:02.850] Americans
|
647 |
+
[00:00:02.850 --> 00:00:03.300] ,
|
648 |
+
[00:00:03.300 --> 00:00:04.140] ask
|
649 |
+
[00:00:04.140 --> 00:00:04.990] not
|
650 |
+
[00:00:04.990 --> 00:00:05.410] what
|
651 |
+
[00:00:05.410 --> 00:00:05.660] your
|
652 |
+
[00:00:05.660 --> 00:00:06.260] country
|
653 |
+
[00:00:06.260 --> 00:00:06.600] can
|
654 |
+
[00:00:06.600 --> 00:00:06.840] do
|
655 |
+
[00:00:06.840 --> 00:00:07.010] for
|
656 |
+
[00:00:07.010 --> 00:00:08.170] you
|
657 |
+
[00:00:08.170 --> 00:00:08.190] ,
|
658 |
+
[00:00:08.190 --> 00:00:08.430] ask
|
659 |
+
[00:00:08.430 --> 00:00:08.910] what
|
660 |
+
[00:00:08.910 --> 00:00:09.040] you
|
661 |
+
[00:00:09.040 --> 00:00:09.320] can
|
662 |
+
[00:00:09.320 --> 00:00:09.440] do
|
663 |
+
[00:00:09.440 --> 00:00:09.760] for
|
664 |
+
[00:00:09.760 --> 00:00:10.020] your
|
665 |
+
[00:00:10.020 --> 00:00:10.510] country
|
666 |
+
[00:00:10.510 --> 00:00:11.000] .
|
667 |
+
```
|
668 |
+
|
669 |
+
## Speaker segmentation via tinydiarize (experimental)
|
670 |
+
|
671 |
+
More information about this approach is available here: https://github.com/ggerganov/whisper.cpp/pull/1058
|
672 |
+
|
673 |
+
Sample usage:
|
674 |
+
|
675 |
+
```py
|
676 |
+
# download a tinydiarize compatible model
|
677 |
+
./models/download-ggml-model.sh small.en-tdrz
|
678 |
+
|
679 |
+
# run as usual, adding the "-tdrz" command-line argument
|
680 |
+
./main -f ./samples/a13.wav -m ./models/ggml-small.en-tdrz.bin -tdrz
|
681 |
+
...
|
682 |
+
main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, tdrz = 1, timestamps = 1 ...
|
683 |
+
...
|
684 |
+
[00:00:00.000 --> 00:00:03.800] Okay Houston, we've had a problem here. [SPEAKER_TURN]
|
685 |
+
[00:00:03.800 --> 00:00:06.200] This is Houston. Say again please. [SPEAKER_TURN]
|
686 |
+
[00:00:06.200 --> 00:00:08.260] Uh Houston we've had a problem.
|
687 |
+
[00:00:08.260 --> 00:00:11.320] We've had a main beam up on a volt. [SPEAKER_TURN]
|
688 |
+
[00:00:11.320 --> 00:00:13.820] Roger main beam interval. [SPEAKER_TURN]
|
689 |
+
[00:00:13.820 --> 00:00:15.100] Uh uh [SPEAKER_TURN]
|
690 |
+
[00:00:15.100 --> 00:00:18.020] So okay stand, by thirteen we're looking at it. [SPEAKER_TURN]
|
691 |
+
[00:00:18.020 --> 00:00:25.740] Okay uh right now uh Houston the uh voltage is uh is looking good um.
|
692 |
+
[00:00:27.620 --> 00:00:29.940] And we had a a pretty large bank or so.
|
693 |
+
```
|
694 |
+
|
695 |
+
## Karaoke-style movie generation (experimental)
|
696 |
+
|
697 |
+
The [main](examples/main) example provides support for output of karaoke-style movies, where the
|
698 |
+
currently pronounced word is highlighted. Use the `-wts` argument and run the generated bash script.
|
699 |
+
This requires to have `ffmpeg` installed.
|
700 |
+
|
701 |
+
Here are a few _"typical"_ examples:
|
702 |
+
|
703 |
+
```bash
|
704 |
+
./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
|
705 |
+
source ./samples/jfk.wav.wts
|
706 |
+
ffplay ./samples/jfk.wav.mp4
|
707 |
+
```
|
708 |
+
|
709 |
+
https://user-images.githubusercontent.com/1991296/199337465-dbee4b5e-9aeb-48a3-b1c6-323ac4db5b2c.mp4
|
710 |
+
|
711 |
---
|
712 |
+
|
713 |
+
```bash
|
714 |
+
./main -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
|
715 |
+
source ./samples/mm0.wav.wts
|
716 |
+
ffplay ./samples/mm0.wav.mp4
|
717 |
+
```
|
718 |
+
|
719 |
+
https://user-images.githubusercontent.com/1991296/199337504-cc8fd233-0cb7-4920-95f9-4227de3570aa.mp4
|
720 |
+
|
721 |
+
---
|
722 |
+
|
723 |
+
```bash
|
724 |
+
./main -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
|
725 |
+
source ./samples/gb0.wav.wts
|
726 |
+
ffplay ./samples/gb0.wav.mp4
|
727 |
+
```
|
728 |
+
|
729 |
+
https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a0cd-f28a317987ba.mp4
|
730 |
+
|
731 |
---
|
732 |
|
733 |
+
## Video comparison of different models
|
734 |
+
|
735 |
+
Use the [scripts/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format:
|
736 |
+
|
737 |
+
```bash
|
738 |
+
./scripts/bench-wts.sh samples/jfk.wav
|
739 |
+
ffplay ./samples/jfk.wav.all.mp4
|
740 |
+
```
|
741 |
+
|
742 |
+
https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8c3b-eb9f9c39d6fc.mp4
|
743 |
+
|
744 |
+
---
|
745 |
+
|
746 |
+
## Benchmarks
|
747 |
+
|
748 |
+
In order to have an objective comparison of the performance of the inference across different system configurations,
|
749 |
+
use the [bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
|
750 |
+
took to execute it. The results are summarized in the following Github issue:
|
751 |
+
|
752 |
+
[Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89)
|
753 |
+
|
754 |
+
Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py).
|
755 |
+
|
756 |
+
You can run it with the following command, by default it will run against any standard model in the models folder.
|
757 |
+
|
758 |
+
```bash
|
759 |
+
python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2
|
760 |
+
```
|
761 |
+
|
762 |
+
It is written in python with the intention of being easy to modify and extend for your benchmarking use case.
|
763 |
+
|
764 |
+
It outputs a csv file with the results of the benchmarking.
|
765 |
+
|
766 |
+
## `ggml` format
|
767 |
+
|
768 |
+
The original models are converted to a custom binary format. This allows to pack everything needed into a single file:
|
769 |
+
|
770 |
+
- model parameters
|
771 |
+
- mel filters
|
772 |
+
- vocabulary
|
773 |
+
- weights
|
774 |
+
|
775 |
+
You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script
|
776 |
+
or manually from here:
|
777 |
+
|
778 |
+
- https://huggingface.co/ggerganov/whisper.cpp
|
779 |
+
- https://ggml.ggerganov.com
|
780 |
+
|
781 |
+
For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md).
|
782 |
+
|
783 |
+
## [Bindings](https://github.com/ggerganov/whisper.cpp/discussions/categories/bindings)
|
784 |
+
|
785 |
+
- [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310)
|
786 |
+
- [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309)
|
787 |
+
- React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn)
|
788 |
+
- [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312)
|
789 |
+
- [x] Java:
|
790 |
+
- [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni)
|
791 |
+
- [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggerganov/whisper.cpp/discussions/507)
|
792 |
+
- [x] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313)
|
793 |
+
- [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper)
|
794 |
+
- [x] .NET: | [#422](https://github.com/ggerganov/whisper.cpp/discussions/422)
|
795 |
+
- [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net)
|
796 |
+
- [NickDarvey/whisper](https://github.com/NickDarvey/whisper)
|
797 |
+
- [x] Python: | [#9](https://github.com/ggerganov/whisper.cpp/issues/9)
|
798 |
+
- [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
|
799 |
+
- [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp)
|
800 |
+
- [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
|
801 |
+
- [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)
|
802 |
+
- [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity)
|
803 |
+
|
804 |
+
## Examples
|
805 |
+
|
806 |
+
There are various examples of using the library for different projects in the [examples](examples) folder.
|
807 |
+
Some of the examples are even ported to run in the browser using WebAssembly. Check them out!
|
808 |
+
|
809 |
+
| Example | Web | Description |
|
810 |
+
| --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
|
811 |
+
| [main](examples/main) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
|
812 |
+
| [bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
|
813 |
+
| [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
|
814 |
+
| [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
|
815 |
+
| [wchess](examples/wchess) | [wchess.wasm](examples/wchess) | Voice-controlled chess |
|
816 |
+
| [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot |
|
817 |
+
| [talk-llama](examples/talk-llama) | | Talk with a LLaMA bot |
|
818 |
+
| [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
|
819 |
+
| [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp |
|
820 |
+
| [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp |
|
821 |
+
| [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
|
822 |
+
| [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
|
823 |
+
| [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
|
824 |
+
| [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
|
825 |
+
| [server](examples/server) | | HTTP transcription server with OAI-like API |
|
826 |
+
|
827 |
+
## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions)
|
828 |
+
|
829 |
+
If you have any kind of feedback about this project feel free to use the Discussions section and open a new topic.
|
830 |
+
You can use the [Show and tell](https://github.com/ggerganov/whisper.cpp/discussions/categories/show-and-tell) category
|
831 |
+
to share your own projects that use `whisper.cpp`. If you have a question, make sure to check the
|
832 |
+
[Frequently asked questions (#126)](https://github.com/ggerganov/whisper.cpp/discussions/126) discussion.
|
README_sycl.md
ADDED
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# whisper.cpp for SYCL
|
2 |
+
|
3 |
+
[Background](#background)
|
4 |
+
|
5 |
+
[OS](#os)
|
6 |
+
|
7 |
+
[Intel GPU](#intel-gpu)
|
8 |
+
|
9 |
+
[Linux](#linux)
|
10 |
+
|
11 |
+
[Environment Variable](#environment-variable)
|
12 |
+
|
13 |
+
[Known Issue](#known-issue)
|
14 |
+
|
15 |
+
[Todo](#todo)
|
16 |
+
|
17 |
+
## Background
|
18 |
+
|
19 |
+
SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators�such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17.
|
20 |
+
|
21 |
+
oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms.
|
22 |
+
|
23 |
+
Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs.
|
24 |
+
|
25 |
+
To avoid re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel� DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL.
|
26 |
+
|
27 |
+
The whisper.cpp for SYCL is used to support Intel GPUs.
|
28 |
+
|
29 |
+
For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build).
|
30 |
+
|
31 |
+
## OS
|
32 |
+
|
33 |
+
|OS|Status|Verified|
|
34 |
+
|-|-|-|
|
35 |
+
|Linux|Support|Ubuntu 22.04|
|
36 |
+
|Windows|Ongoing| |
|
37 |
+
|
38 |
+
|
39 |
+
## Intel GPU
|
40 |
+
|
41 |
+
|Intel GPU| Status | Verified Model|
|
42 |
+
|-|-|-|
|
43 |
+
|Intel Data Center Max Series| Support| Max 1550|
|
44 |
+
|Intel Data Center Flex Series| Support| Flex 170|
|
45 |
+
|Intel Arc Series| Support| Arc 770|
|
46 |
+
|Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake|
|
47 |
+
|Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7|
|
48 |
+
|
49 |
+
|
50 |
+
## Linux
|
51 |
+
|
52 |
+
### Setup Environment
|
53 |
+
|
54 |
+
1. Install Intel GPU driver.
|
55 |
+
|
56 |
+
a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html).
|
57 |
+
|
58 |
+
Note: for iGPU, please install the client GPU driver.
|
59 |
+
|
60 |
+
b. Add user to group: video, render.
|
61 |
+
|
62 |
+
```
|
63 |
+
sudo usermod -aG render username
|
64 |
+
sudo usermod -aG video username
|
65 |
+
```
|
66 |
+
|
67 |
+
Note: re-login to enable it.
|
68 |
+
|
69 |
+
c. Check
|
70 |
+
|
71 |
+
```
|
72 |
+
sudo apt install clinfo
|
73 |
+
sudo clinfo -l
|
74 |
+
```
|
75 |
+
|
76 |
+
Output (example):
|
77 |
+
|
78 |
+
```
|
79 |
+
Platform #0: Intel(R) OpenCL Graphics
|
80 |
+
`-- Device #0: Intel(R) Arc(TM) A770 Graphics
|
81 |
+
|
82 |
+
|
83 |
+
Platform #0: Intel(R) OpenCL HD Graphics
|
84 |
+
`-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]
|
85 |
+
```
|
86 |
+
|
87 |
+
2. Install Intel� oneAPI Base toolkit.
|
88 |
+
|
89 |
+
|
90 |
+
a. Please follow the procedure in [Get the Intel� oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).
|
91 |
+
|
92 |
+
Recommend to install to default folder: **/opt/intel/oneapi**.
|
93 |
+
|
94 |
+
Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder.
|
95 |
+
|
96 |
+
b. Check
|
97 |
+
|
98 |
+
```
|
99 |
+
source /opt/intel/oneapi/setvars.sh
|
100 |
+
|
101 |
+
sycl-ls
|
102 |
+
```
|
103 |
+
|
104 |
+
There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.
|
105 |
+
|
106 |
+
Output (example):
|
107 |
+
```
|
108 |
+
[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]
|
109 |
+
[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
|
110 |
+
[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50]
|
111 |
+
[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
|
112 |
+
|
113 |
+
```
|
114 |
+
|
115 |
+
2. Build locally:
|
116 |
+
|
117 |
+
```
|
118 |
+
mkdir -p build
|
119 |
+
cd build
|
120 |
+
source /opt/intel/oneapi/setvars.sh
|
121 |
+
|
122 |
+
#for FP16
|
123 |
+
#cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON
|
124 |
+
|
125 |
+
#for FP32
|
126 |
+
cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
127 |
+
|
128 |
+
#build example/main only
|
129 |
+
#cmake --build . --config Release --target main
|
130 |
+
|
131 |
+
#build all binary
|
132 |
+
cmake --build . --config Release -v
|
133 |
+
|
134 |
+
```
|
135 |
+
|
136 |
+
or
|
137 |
+
|
138 |
+
```
|
139 |
+
./examples/sycl/build.sh
|
140 |
+
```
|
141 |
+
|
142 |
+
Note:
|
143 |
+
|
144 |
+
- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
|
145 |
+
|
146 |
+
### Run
|
147 |
+
|
148 |
+
1. Put model file to folder **models**
|
149 |
+
|
150 |
+
2. Enable oneAPI running environment
|
151 |
+
|
152 |
+
```
|
153 |
+
source /opt/intel/oneapi/setvars.sh
|
154 |
+
```
|
155 |
+
|
156 |
+
3. List device ID
|
157 |
+
|
158 |
+
Run without parameter:
|
159 |
+
|
160 |
+
```
|
161 |
+
./build/bin/ls-sycl-device
|
162 |
+
|
163 |
+
or
|
164 |
+
|
165 |
+
./build/bin/main
|
166 |
+
```
|
167 |
+
|
168 |
+
Check the ID in startup log, like:
|
169 |
+
|
170 |
+
```
|
171 |
+
found 4 SYCL devices:
|
172 |
+
Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
|
173 |
+
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
174 |
+
Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
|
175 |
+
max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
|
176 |
+
Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
|
177 |
+
max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
|
178 |
+
Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
|
179 |
+
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
|
180 |
+
|
181 |
+
```
|
182 |
+
|
183 |
+
|Attribute|Note|
|
184 |
+
|-|-|
|
185 |
+
|compute capability 1.3|Level-zero running time, recommended |
|
186 |
+
|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
|
187 |
+
|
188 |
+
4. Set device ID and execute whisper.cpp
|
189 |
+
|
190 |
+
Set device ID = 0 by **GGML_SYCL_DEVICE=0**
|
191 |
+
|
192 |
+
```
|
193 |
+
GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
|
194 |
+
```
|
195 |
+
or run by script:
|
196 |
+
|
197 |
+
```
|
198 |
+
./examples/sycl/run_whisper.sh
|
199 |
+
```
|
200 |
+
|
201 |
+
|
202 |
+
|
203 |
+
5. Check the device ID in output
|
204 |
+
|
205 |
+
Like:
|
206 |
+
```
|
207 |
+
Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
208 |
+
```
|
209 |
+
|
210 |
+
|
211 |
+
## Environment Variable
|
212 |
+
|
213 |
+
#### Build
|
214 |
+
|
215 |
+
|Name|Value|Function|
|
216 |
+
|-|-|-|
|
217 |
+
|WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, WHISPER_SYCL=ON is mandatory.|
|
218 |
+
|WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.|
|
219 |
+
|CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
|
220 |
+
|CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|
|
221 |
+
|
222 |
+
#### Running
|
223 |
+
|
224 |
+
|
225 |
+
|Name|Value|Function|
|
226 |
+
|-|-|-|
|
227 |
+
|GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
|
228 |
+
|GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
|
229 |
+
|
230 |
+
## Known Issue
|
231 |
+
|
232 |
+
- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
|
233 |
+
|
234 |
+
Miss to enable oneAPI running environment.
|
235 |
+
|
236 |
+
Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
|
237 |
+
|
238 |
+
|
239 |
+
- Hang during startup
|
240 |
+
|
241 |
+
llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
|
242 |
+
|
243 |
+
Solution: add **--no-mmap**.
|
244 |
+
|
245 |
+
## Todo
|
246 |
+
|
247 |
+
- Support to build in Windows.
|
248 |
+
|
249 |
+
- Support multiple cards.
|
bindings/CMakeLists.txt
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
if (EMSCRIPTEN)
|
2 |
+
add_subdirectory(javascript)
|
3 |
+
|
4 |
+
add_custom_command(
|
5 |
+
OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/javascript/publish.log
|
6 |
+
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/whisper.js
|
7 |
+
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/libwhisper.worker.js
|
8 |
+
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/package.json
|
9 |
+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/javascript
|
10 |
+
COMMAND npm publish
|
11 |
+
COMMAND touch publish.log
|
12 |
+
COMMENT "Publishing npm module v${PROJECT_VERSION}"
|
13 |
+
VERBATIM
|
14 |
+
)
|
15 |
+
|
16 |
+
add_custom_target(publish-npm
|
17 |
+
DEPENDS javascript/publish.log
|
18 |
+
)
|
19 |
+
endif()
|
bindings/go/.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
build
|
2 |
+
models
|
bindings/go/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2022 David Thorpe
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
bindings/go/Makefile
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ifndef UNAME_S
|
2 |
+
UNAME_S := $(shell uname -s)
|
3 |
+
endif
|
4 |
+
|
5 |
+
ifndef UNAME_P
|
6 |
+
UNAME_P := $(shell uname -p)
|
7 |
+
endif
|
8 |
+
|
9 |
+
ifndef UNAME_M
|
10 |
+
UNAME_M := $(shell uname -m)
|
11 |
+
endif
|
12 |
+
|
13 |
+
GGML_METAL_PATH_RESOURCES := $(abspath ../..)
|
14 |
+
BUILD_DIR := build
|
15 |
+
MODELS_DIR := models
|
16 |
+
EXAMPLES_DIR := $(wildcard examples/*)
|
17 |
+
INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
|
18 |
+
LIBRARY_PATH := $(abspath ../..)
|
19 |
+
|
20 |
+
ifeq ($(UNAME_S),Darwin)
|
21 |
+
EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit
|
22 |
+
endif
|
23 |
+
|
24 |
+
all: clean whisper examples
|
25 |
+
|
26 |
+
whisper: mkdir
|
27 |
+
@echo Build whisper
|
28 |
+
@${MAKE} -C ../.. libwhisper.a
|
29 |
+
|
30 |
+
test: model-small whisper modtidy
|
31 |
+
ifeq ($(UNAME_S),Darwin)
|
32 |
+
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v .
|
33 |
+
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v ./pkg/whisper/...
|
34 |
+
else
|
35 |
+
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v .
|
36 |
+
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v ./pkg/whisper/...
|
37 |
+
endif
|
38 |
+
|
39 |
+
examples: $(EXAMPLES_DIR)
|
40 |
+
|
41 |
+
model-small: mkdir examples/go-model-download
|
42 |
+
@${BUILD_DIR}/go-model-download -out models ggml-small.en.bin
|
43 |
+
|
44 |
+
$(EXAMPLES_DIR): mkdir whisper modtidy
|
45 |
+
@echo Build example $(notdir $@)
|
46 |
+
ifeq ($(UNAME_S),Darwin)
|
47 |
+
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go build ${BUILD_FLAGS} -ldflags "-extldflags '$(EXT_LDFLAGS)'" -o ${BUILD_DIR}/$(notdir $@) ./$@
|
48 |
+
else
|
49 |
+
@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
|
50 |
+
endif
|
51 |
+
|
52 |
+
mkdir:
|
53 |
+
@echo Mkdir ${BUILD_DIR}
|
54 |
+
@install -d ${BUILD_DIR}
|
55 |
+
@echo Mkdir ${MODELS_DIR}
|
56 |
+
@install -d ${MODELS_DIR}
|
57 |
+
|
58 |
+
modtidy:
|
59 |
+
@go mod tidy
|
60 |
+
|
61 |
+
clean:
|
62 |
+
@echo Clean
|
63 |
+
@rm -fr $(BUILD_DIR)
|
64 |
+
@go clean
|
bindings/go/README.md
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Go bindings for Whisper
|
2 |
+
|
3 |
+
This package provides Go bindings for whisper.cpp. They have been tested on:
|
4 |
+
|
5 |
+
* Darwin (OS X) 12.6 on x64_64
|
6 |
+
* Debian Linux on arm64
|
7 |
+
* Fedora Linux on x86_64
|
8 |
+
|
9 |
+
The "low level" bindings are in the `bindings/go` directory and there is a more
|
10 |
+
Go-style package in the `bindings/go/pkg/whisper` directory. The most simple usage
|
11 |
+
is as follows:
|
12 |
+
|
13 |
+
```go
|
14 |
+
import (
|
15 |
+
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
16 |
+
)
|
17 |
+
|
18 |
+
func main() {
|
19 |
+
var modelpath string // Path to the model
|
20 |
+
var samples []float32 // Samples to process
|
21 |
+
|
22 |
+
// Load the model
|
23 |
+
model, err := whisper.New(modelpath)
|
24 |
+
if err != nil {
|
25 |
+
panic(err)
|
26 |
+
}
|
27 |
+
defer model.Close()
|
28 |
+
|
29 |
+
// Process samples
|
30 |
+
context, err := model.NewContext()
|
31 |
+
if err != nil {
|
32 |
+
panic(err)
|
33 |
+
}
|
34 |
+
if err := context.Process(samples, nil, nil); err != nil {
|
35 |
+
return err
|
36 |
+
}
|
37 |
+
|
38 |
+
// Print out the results
|
39 |
+
for {
|
40 |
+
segment, err := context.NextSegment()
|
41 |
+
if err != nil {
|
42 |
+
break
|
43 |
+
}
|
44 |
+
fmt.Printf("[%6s->%6s] %s\n", segment.Start, segment.End, segment.Text)
|
45 |
+
}
|
46 |
+
}
|
47 |
+
```
|
48 |
+
|
49 |
+
## Building & Testing
|
50 |
+
|
51 |
+
In order to build, you need to have the Go compiler installed. You can get it from [here](https://golang.org/dl/). Run the tests with:
|
52 |
+
|
53 |
+
```bash
|
54 |
+
git clone https://github.com/ggerganov/whisper.cpp.git
|
55 |
+
cd whisper.cpp/bindings/go
|
56 |
+
make test
|
57 |
+
```
|
58 |
+
|
59 |
+
This will compile a static `libwhisper.a` in a `build` folder, download a model file, then run the tests. To build the examples:
|
60 |
+
|
61 |
+
```bash
|
62 |
+
make examples
|
63 |
+
```
|
64 |
+
|
65 |
+
The examples are placed in the `build` directory. Once built, you can download all the models with the following command:
|
66 |
+
|
67 |
+
```bash
|
68 |
+
./build/go-model-download -out models
|
69 |
+
```
|
70 |
+
|
71 |
+
And you can then test a model against samples with the following command:
|
72 |
+
|
73 |
+
```bash
|
74 |
+
./build/go-whisper -model models/ggml-tiny.en.bin samples/jfk.wav
|
75 |
+
```
|
76 |
+
|
77 |
+
## Using the bindings
|
78 |
+
|
79 |
+
To use the bindings in your own software,
|
80 |
+
|
81 |
+
1. Import `github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper` (or `github.com/ggerganov/whisper.cpp/bindings/go` into your package;
|
82 |
+
2. Compile `libwhisper.a` (you can use `make whisper` in the `bindings/go` directory);
|
83 |
+
3. Link your go binary against whisper by setting the environment variables `C_INCLUDE_PATH` and `LIBRARY_PATH`
|
84 |
+
to point to the `whisper.h` file directory and `libwhisper.a` file directory respectively.
|
85 |
+
|
86 |
+
Look at the `Makefile` in the `bindings/go` directory for an example.
|
87 |
+
|
88 |
+
The API Documentation:
|
89 |
+
|
90 |
+
* https://pkg.go.dev/github.com/ggerganov/whisper.cpp/bindings/go
|
91 |
+
* https://pkg.go.dev/github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper
|
92 |
+
|
93 |
+
Getting help:
|
94 |
+
|
95 |
+
* Follow the discussion for the go bindings [here](https://github.com/ggerganov/whisper.cpp/discussions/312)
|
96 |
+
|
97 |
+
## License
|
98 |
+
|
99 |
+
The license for the Go bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
|
100 |
+
|
bindings/go/doc.go
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
github.com/ggerganov/whisper.cpp/bindings/go
|
3 |
+
provides a speech-to-text service bindings for the Go programming language.
|
4 |
+
*/
|
5 |
+
package whisper
|
bindings/go/examples/go-model-download/context.go
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package main
|
2 |
+
|
3 |
+
import (
|
4 |
+
"context"
|
5 |
+
"os"
|
6 |
+
"os/signal"
|
7 |
+
)
|
8 |
+
|
9 |
+
// ContextForSignal returns a context object which is cancelled when a signal
|
10 |
+
// is received. It returns nil if no signal parameter is provided
|
11 |
+
func ContextForSignal(signals ...os.Signal) context.Context {
|
12 |
+
if len(signals) == 0 {
|
13 |
+
return nil
|
14 |
+
}
|
15 |
+
|
16 |
+
ch := make(chan os.Signal)
|
17 |
+
ctx, cancel := context.WithCancel(context.Background())
|
18 |
+
|
19 |
+
// Send message on channel when signal received
|
20 |
+
signal.Notify(ch, signals...)
|
21 |
+
|
22 |
+
// When any signal received, call cancel
|
23 |
+
go func() {
|
24 |
+
<-ch
|
25 |
+
cancel()
|
26 |
+
}()
|
27 |
+
|
28 |
+
// Return success
|
29 |
+
return ctx
|
30 |
+
}
|
bindings/go/examples/go-model-download/main.go
ADDED
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package main
|
2 |
+
|
3 |
+
import (
|
4 |
+
"context"
|
5 |
+
"flag"
|
6 |
+
"fmt"
|
7 |
+
"io"
|
8 |
+
"net/http"
|
9 |
+
"net/url"
|
10 |
+
"os"
|
11 |
+
"path/filepath"
|
12 |
+
"syscall"
|
13 |
+
"time"
|
14 |
+
)
|
15 |
+
|
16 |
+
///////////////////////////////////////////////////////////////////////////////
|
17 |
+
// CONSTANTS
|
18 |
+
|
19 |
+
const (
|
20 |
+
srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main" // The location of the models
|
21 |
+
srcExt = ".bin" // Filename extension
|
22 |
+
bufSize = 1024 * 64 // Size of the buffer used for downloading the model
|
23 |
+
)
|
24 |
+
|
25 |
+
var (
|
26 |
+
// The models which will be downloaded, if no model is specified as an argument
|
27 |
+
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3"}
|
28 |
+
)
|
29 |
+
|
30 |
+
var (
|
31 |
+
// The output folder. When not set, use current working directory.
|
32 |
+
flagOut = flag.String("out", "", "Output folder")
|
33 |
+
|
34 |
+
// HTTP timeout parameter - will timeout if takes longer than this to download a model
|
35 |
+
flagTimeout = flag.Duration("timeout", 30*time.Minute, "HTTP timeout")
|
36 |
+
|
37 |
+
// Quiet parameter - will not print progress if set
|
38 |
+
flagQuiet = flag.Bool("quiet", false, "Quiet mode")
|
39 |
+
)
|
40 |
+
|
41 |
+
///////////////////////////////////////////////////////////////////////////////
|
42 |
+
// MAIN
|
43 |
+
|
44 |
+
func main() {
|
45 |
+
flag.Usage = func() {
|
46 |
+
name := filepath.Base(flag.CommandLine.Name())
|
47 |
+
fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] <model>\n\n", name)
|
48 |
+
flag.PrintDefaults()
|
49 |
+
}
|
50 |
+
flag.Parse()
|
51 |
+
|
52 |
+
// Get output path
|
53 |
+
out, err := GetOut()
|
54 |
+
if err != nil {
|
55 |
+
fmt.Fprintln(os.Stderr, "Error:", err)
|
56 |
+
os.Exit(-1)
|
57 |
+
}
|
58 |
+
|
59 |
+
// Create context which quits on SIGINT or SIGQUIT
|
60 |
+
ctx := ContextForSignal(os.Interrupt, syscall.SIGQUIT)
|
61 |
+
|
62 |
+
// Progress filehandle
|
63 |
+
progress := os.Stdout
|
64 |
+
if *flagQuiet {
|
65 |
+
progress, err = os.Open(os.DevNull)
|
66 |
+
if err != nil {
|
67 |
+
fmt.Fprintln(os.Stderr, "Error:", err)
|
68 |
+
os.Exit(-1)
|
69 |
+
}
|
70 |
+
defer progress.Close()
|
71 |
+
}
|
72 |
+
|
73 |
+
// Download models - exit on error or interrupt
|
74 |
+
for _, model := range GetModels() {
|
75 |
+
url, err := URLForModel(model)
|
76 |
+
if err != nil {
|
77 |
+
fmt.Fprintln(os.Stderr, "Error:", err)
|
78 |
+
continue
|
79 |
+
} else if path, err := Download(ctx, progress, url, out); err == nil || err == io.EOF {
|
80 |
+
continue
|
81 |
+
} else if err == context.Canceled {
|
82 |
+
os.Remove(path)
|
83 |
+
fmt.Fprintln(progress, "\nInterrupted")
|
84 |
+
break
|
85 |
+
} else if err == context.DeadlineExceeded {
|
86 |
+
os.Remove(path)
|
87 |
+
fmt.Fprintln(progress, "Timeout downloading model")
|
88 |
+
continue
|
89 |
+
} else {
|
90 |
+
os.Remove(path)
|
91 |
+
fmt.Fprintln(os.Stderr, "Error:", err)
|
92 |
+
break
|
93 |
+
}
|
94 |
+
}
|
95 |
+
}
|
96 |
+
|
97 |
+
///////////////////////////////////////////////////////////////////////////////
|
98 |
+
// PUBLIC METHODS
|
99 |
+
|
100 |
+
// GetOut returns the path to the output directory
|
101 |
+
func GetOut() (string, error) {
|
102 |
+
if *flagOut == "" {
|
103 |
+
return os.Getwd()
|
104 |
+
}
|
105 |
+
if info, err := os.Stat(*flagOut); err != nil {
|
106 |
+
return "", err
|
107 |
+
} else if !info.IsDir() {
|
108 |
+
return "", fmt.Errorf("not a directory: %s", info.Name())
|
109 |
+
} else {
|
110 |
+
return *flagOut, nil
|
111 |
+
}
|
112 |
+
}
|
113 |
+
|
114 |
+
// GetModels returns the list of models to download
|
115 |
+
func GetModels() []string {
|
116 |
+
if flag.NArg() == 0 {
|
117 |
+
return modelNames
|
118 |
+
} else {
|
119 |
+
return flag.Args()
|
120 |
+
}
|
121 |
+
}
|
122 |
+
|
123 |
+
// URLForModel returns the URL for the given model on huggingface.co
|
124 |
+
func URLForModel(model string) (string, error) {
|
125 |
+
if filepath.Ext(model) != srcExt {
|
126 |
+
model += srcExt
|
127 |
+
}
|
128 |
+
url, err := url.Parse(srcUrl)
|
129 |
+
if err != nil {
|
130 |
+
return "", err
|
131 |
+
} else {
|
132 |
+
url.Path = filepath.Join(url.Path, model)
|
133 |
+
}
|
134 |
+
return url.String(), nil
|
135 |
+
}
|
136 |
+
|
137 |
+
// Download downloads the model from the given URL to the given output directory
|
138 |
+
func Download(ctx context.Context, p io.Writer, model, out string) (string, error) {
|
139 |
+
// Create HTTP client
|
140 |
+
client := http.Client{
|
141 |
+
Timeout: *flagTimeout,
|
142 |
+
}
|
143 |
+
|
144 |
+
// Initiate the download
|
145 |
+
req, err := http.NewRequest("GET", model, nil)
|
146 |
+
if err != nil {
|
147 |
+
return "", err
|
148 |
+
}
|
149 |
+
resp, err := client.Do(req)
|
150 |
+
if err != nil {
|
151 |
+
return "", err
|
152 |
+
}
|
153 |
+
defer resp.Body.Close()
|
154 |
+
if resp.StatusCode != http.StatusOK {
|
155 |
+
return "", fmt.Errorf("%s: %s", model, resp.Status)
|
156 |
+
}
|
157 |
+
|
158 |
+
// If output file exists and is the same size as the model, skip
|
159 |
+
path := filepath.Join(out, filepath.Base(model))
|
160 |
+
if info, err := os.Stat(path); err == nil && info.Size() == resp.ContentLength {
|
161 |
+
fmt.Fprintln(p, "Skipping", model, "as it already exists")
|
162 |
+
return "", nil
|
163 |
+
}
|
164 |
+
|
165 |
+
// Create file
|
166 |
+
w, err := os.Create(path)
|
167 |
+
if err != nil {
|
168 |
+
return "", err
|
169 |
+
}
|
170 |
+
defer w.Close()
|
171 |
+
|
172 |
+
// Report
|
173 |
+
fmt.Fprintln(p, "Downloading", model, "to", out)
|
174 |
+
|
175 |
+
// Progressively download the model
|
176 |
+
data := make([]byte, bufSize)
|
177 |
+
count, pct := int64(0), int64(0)
|
178 |
+
ticker := time.NewTicker(5 * time.Second)
|
179 |
+
for {
|
180 |
+
select {
|
181 |
+
case <-ctx.Done():
|
182 |
+
// Cancelled, return error
|
183 |
+
return path, ctx.Err()
|
184 |
+
case <-ticker.C:
|
185 |
+
pct = DownloadReport(p, pct, count, resp.ContentLength)
|
186 |
+
default:
|
187 |
+
// Read body
|
188 |
+
n, err := resp.Body.Read(data)
|
189 |
+
if err != nil {
|
190 |
+
DownloadReport(p, pct, count, resp.ContentLength)
|
191 |
+
return path, err
|
192 |
+
} else if m, err := w.Write(data[:n]); err != nil {
|
193 |
+
return path, err
|
194 |
+
} else {
|
195 |
+
count += int64(m)
|
196 |
+
}
|
197 |
+
}
|
198 |
+
}
|
199 |
+
}
|
200 |
+
|
201 |
+
// Report periodically reports the download progress when percentage changes
|
202 |
+
func DownloadReport(w io.Writer, pct, count, total int64) int64 {
|
203 |
+
pct_ := count * 100 / total
|
204 |
+
if pct_ > pct {
|
205 |
+
fmt.Fprintf(w, " ...%d MB written (%d%%)\n", count/1e6, pct_)
|
206 |
+
}
|
207 |
+
return pct_
|
208 |
+
}
|
bindings/go/examples/go-whisper/color.go
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package main
|
2 |
+
|
3 |
+
import "fmt"
|
4 |
+
|
5 |
+
///////////////////////////////////////////////////////////////////////////////
|
6 |
+
// CONSTANTS
|
7 |
+
|
8 |
+
const (
|
9 |
+
Reset = "\033[0m"
|
10 |
+
RGBPrefix = "\033[38;5;" // followed by RGB values in decimal format separated by colons
|
11 |
+
RGBSuffix = "m"
|
12 |
+
)
|
13 |
+
|
14 |
+
///////////////////////////////////////////////////////////////////////////////
|
15 |
+
// PUBLIC METHODS
|
16 |
+
|
17 |
+
// Colorize text with RGB values, from 0 to 23
|
18 |
+
func Colorize(text string, v int) string {
|
19 |
+
// https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit
|
20 |
+
// Grayscale colors are in the range 232-255
|
21 |
+
return RGBPrefix + fmt.Sprint(v%24+232) + RGBSuffix + text + Reset
|
22 |
+
}
|
bindings/go/examples/go-whisper/flags.go
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package main
|
2 |
+
|
3 |
+
import (
|
4 |
+
"flag"
|
5 |
+
"fmt"
|
6 |
+
"strings"
|
7 |
+
"time"
|
8 |
+
|
9 |
+
// Packages
|
10 |
+
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
11 |
+
)
|
12 |
+
|
13 |
+
///////////////////////////////////////////////////////////////////////////////
|
14 |
+
// TYPES
|
15 |
+
|
16 |
+
type Flags struct {
|
17 |
+
*flag.FlagSet
|
18 |
+
}
|
19 |
+
|
20 |
+
///////////////////////////////////////////////////////////////////////////////
|
21 |
+
// LIFECYCLE
|
22 |
+
|
23 |
+
func NewFlags(name string, args []string) (*Flags, error) {
|
24 |
+
flags := &Flags{
|
25 |
+
FlagSet: flag.NewFlagSet(name, flag.ContinueOnError),
|
26 |
+
}
|
27 |
+
|
28 |
+
// Register the command line arguments
|
29 |
+
registerFlags(flags)
|
30 |
+
|
31 |
+
// Parse command line
|
32 |
+
if err := flags.Parse(args); err != nil {
|
33 |
+
return nil, err
|
34 |
+
}
|
35 |
+
|
36 |
+
// Return success
|
37 |
+
return flags, nil
|
38 |
+
}
|
39 |
+
|
40 |
+
///////////////////////////////////////////////////////////////////////////////
|
41 |
+
// PUBLIC METHODS
|
42 |
+
|
43 |
+
func (flags *Flags) GetModel() string {
|
44 |
+
return flags.Lookup("model").Value.String()
|
45 |
+
}
|
46 |
+
|
47 |
+
func (flags *Flags) GetLanguage() string {
|
48 |
+
return flags.Lookup("language").Value.String()
|
49 |
+
}
|
50 |
+
|
51 |
+
func (flags *Flags) IsTranslate() bool {
|
52 |
+
return flags.Lookup("translate").Value.(flag.Getter).Get().(bool)
|
53 |
+
}
|
54 |
+
|
55 |
+
func (flags *Flags) GetOffset() time.Duration {
|
56 |
+
return flags.Lookup("offset").Value.(flag.Getter).Get().(time.Duration)
|
57 |
+
}
|
58 |
+
|
59 |
+
func (flags *Flags) GetDuration() time.Duration {
|
60 |
+
return flags.Lookup("duration").Value.(flag.Getter).Get().(time.Duration)
|
61 |
+
}
|
62 |
+
|
63 |
+
func (flags *Flags) GetThreads() uint {
|
64 |
+
return flags.Lookup("threads").Value.(flag.Getter).Get().(uint)
|
65 |
+
}
|
66 |
+
|
67 |
+
func (flags *Flags) GetOut() string {
|
68 |
+
return strings.ToLower(flags.Lookup("out").Value.String())
|
69 |
+
}
|
70 |
+
|
71 |
+
func (flags *Flags) IsTokens() bool {
|
72 |
+
return flags.Lookup("tokens").Value.String() == "true"
|
73 |
+
}
|
74 |
+
|
75 |
+
func (flags *Flags) IsColorize() bool {
|
76 |
+
return flags.Lookup("colorize").Value.String() == "true"
|
77 |
+
}
|
78 |
+
|
79 |
+
func (flags *Flags) GetMaxLen() uint {
|
80 |
+
return flags.Lookup("max-len").Value.(flag.Getter).Get().(uint)
|
81 |
+
}
|
82 |
+
|
83 |
+
func (flags *Flags) GetMaxTokens() uint {
|
84 |
+
return flags.Lookup("max-tokens").Value.(flag.Getter).Get().(uint)
|
85 |
+
}
|
86 |
+
|
87 |
+
func (flags *Flags) GetWordThreshold() float32 {
|
88 |
+
return float32(flags.Lookup("word-thold").Value.(flag.Getter).Get().(float64))
|
89 |
+
}
|
90 |
+
|
91 |
+
func (flags *Flags) SetParams(context whisper.Context) error {
|
92 |
+
if lang := flags.GetLanguage(); lang != "" && lang != "auto" {
|
93 |
+
fmt.Fprintf(flags.Output(), "Setting language to %q\n", lang)
|
94 |
+
if err := context.SetLanguage(lang); err != nil {
|
95 |
+
return err
|
96 |
+
}
|
97 |
+
}
|
98 |
+
if flags.IsTranslate() && context.IsMultilingual() {
|
99 |
+
fmt.Fprintf(flags.Output(), "Setting translate to true\n")
|
100 |
+
context.SetTranslate(true)
|
101 |
+
}
|
102 |
+
if offset := flags.GetOffset(); offset != 0 {
|
103 |
+
fmt.Fprintf(flags.Output(), "Setting offset to %v\n", offset)
|
104 |
+
context.SetOffset(offset)
|
105 |
+
}
|
106 |
+
if duration := flags.GetDuration(); duration != 0 {
|
107 |
+
fmt.Fprintf(flags.Output(), "Setting duration to %v\n", duration)
|
108 |
+
context.SetDuration(duration)
|
109 |
+
}
|
110 |
+
if threads := flags.GetThreads(); threads != 0 {
|
111 |
+
fmt.Fprintf(flags.Output(), "Setting threads to %d\n", threads)
|
112 |
+
context.SetThreads(threads)
|
113 |
+
}
|
114 |
+
if max_len := flags.GetMaxLen(); max_len != 0 {
|
115 |
+
fmt.Fprintf(flags.Output(), "Setting max_segment_length to %d\n", max_len)
|
116 |
+
context.SetMaxSegmentLength(max_len)
|
117 |
+
}
|
118 |
+
if max_tokens := flags.GetMaxTokens(); max_tokens != 0 {
|
119 |
+
fmt.Fprintf(flags.Output(), "Setting max_tokens to %d\n", max_tokens)
|
120 |
+
context.SetMaxTokensPerSegment(max_tokens)
|
121 |
+
}
|
122 |
+
if word_threshold := flags.GetWordThreshold(); word_threshold != 0 {
|
123 |
+
fmt.Fprintf(flags.Output(), "Setting word_threshold to %f\n", word_threshold)
|
124 |
+
context.SetTokenThreshold(word_threshold)
|
125 |
+
}
|
126 |
+
|
127 |
+
// Return success
|
128 |
+
return nil
|
129 |
+
}
|
130 |
+
|
131 |
+
///////////////////////////////////////////////////////////////////////////////
|
132 |
+
// PRIVATE METHODS
|
133 |
+
|
134 |
+
func registerFlags(flag *Flags) {
|
135 |
+
flag.String("model", "", "Path to the model file")
|
136 |
+
flag.String("language", "", "Spoken language")
|
137 |
+
flag.Bool("translate", false, "Translate from source language to english")
|
138 |
+
flag.Duration("offset", 0, "Time offset")
|
139 |
+
flag.Duration("duration", 0, "Duration of audio to process")
|
140 |
+
flag.Uint("threads", 0, "Number of threads to use")
|
141 |
+
flag.Uint("max-len", 0, "Maximum segment length in characters")
|
142 |
+
flag.Uint("max-tokens", 0, "Maximum tokens per segment")
|
143 |
+
flag.Float64("word-thold", 0, "Maximum segment score")
|
144 |
+
flag.Bool("tokens", false, "Display tokens")
|
145 |
+
flag.Bool("colorize", false, "Colorize tokens")
|
146 |
+
flag.String("out", "", "Output format (srt, none or leave as empty string)")
|
147 |
+
}
|
bindings/go/examples/go-whisper/main.go
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package main
|
2 |
+
|
3 |
+
import (
|
4 |
+
"flag"
|
5 |
+
"fmt"
|
6 |
+
"os"
|
7 |
+
"path/filepath"
|
8 |
+
|
9 |
+
// Packages
|
10 |
+
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
11 |
+
)
|
12 |
+
|
13 |
+
func main() {
|
14 |
+
flags, err := NewFlags(filepath.Base(os.Args[0]), os.Args[1:])
|
15 |
+
if err == flag.ErrHelp {
|
16 |
+
os.Exit(0)
|
17 |
+
} else if err != nil {
|
18 |
+
fmt.Fprintln(os.Stderr, err)
|
19 |
+
os.Exit(1)
|
20 |
+
} else if flags.GetModel() == "" {
|
21 |
+
fmt.Fprintln(os.Stderr, "Use -model flag to specify which model file to use")
|
22 |
+
os.Exit(1)
|
23 |
+
} else if flags.NArg() == 0 {
|
24 |
+
fmt.Fprintln(os.Stderr, "No input files specified")
|
25 |
+
os.Exit(1)
|
26 |
+
}
|
27 |
+
|
28 |
+
// Load model
|
29 |
+
model, err := whisper.New(flags.GetModel())
|
30 |
+
if err != nil {
|
31 |
+
fmt.Fprintln(os.Stderr, err)
|
32 |
+
os.Exit(1)
|
33 |
+
}
|
34 |
+
defer model.Close()
|
35 |
+
|
36 |
+
// Process files
|
37 |
+
for _, filename := range flags.Args() {
|
38 |
+
if err := Process(model, filename, flags); err != nil {
|
39 |
+
fmt.Fprintln(os.Stderr, err)
|
40 |
+
continue
|
41 |
+
}
|
42 |
+
}
|
43 |
+
}
|
bindings/go/examples/go-whisper/process.go
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package main
|
2 |
+
|
3 |
+
import (
|
4 |
+
"fmt"
|
5 |
+
"io"
|
6 |
+
"os"
|
7 |
+
"time"
|
8 |
+
|
9 |
+
// Package imports
|
10 |
+
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
11 |
+
wav "github.com/go-audio/wav"
|
12 |
+
)
|
13 |
+
|
14 |
+
func Process(model whisper.Model, path string, flags *Flags) error {
|
15 |
+
var data []float32
|
16 |
+
|
17 |
+
// Create processing context
|
18 |
+
context, err := model.NewContext()
|
19 |
+
if err != nil {
|
20 |
+
return err
|
21 |
+
}
|
22 |
+
|
23 |
+
// Set the parameters
|
24 |
+
if err := flags.SetParams(context); err != nil {
|
25 |
+
return err
|
26 |
+
}
|
27 |
+
|
28 |
+
fmt.Printf("\n%s\n", context.SystemInfo())
|
29 |
+
|
30 |
+
// Open the file
|
31 |
+
fmt.Fprintf(flags.Output(), "Loading %q\n", path)
|
32 |
+
fh, err := os.Open(path)
|
33 |
+
if err != nil {
|
34 |
+
return err
|
35 |
+
}
|
36 |
+
defer fh.Close()
|
37 |
+
|
38 |
+
// Decode the WAV file - load the full buffer
|
39 |
+
dec := wav.NewDecoder(fh)
|
40 |
+
if buf, err := dec.FullPCMBuffer(); err != nil {
|
41 |
+
return err
|
42 |
+
} else if dec.SampleRate != whisper.SampleRate {
|
43 |
+
return fmt.Errorf("unsupported sample rate: %d", dec.SampleRate)
|
44 |
+
} else if dec.NumChans != 1 {
|
45 |
+
return fmt.Errorf("unsupported number of channels: %d", dec.NumChans)
|
46 |
+
} else {
|
47 |
+
data = buf.AsFloat32Buffer().Data
|
48 |
+
}
|
49 |
+
|
50 |
+
// Segment callback when -tokens is specified
|
51 |
+
var cb whisper.SegmentCallback
|
52 |
+
if flags.IsTokens() {
|
53 |
+
cb = func(segment whisper.Segment) {
|
54 |
+
fmt.Fprintf(flags.Output(), "%02d [%6s->%6s] ", segment.Num, segment.Start.Truncate(time.Millisecond), segment.End.Truncate(time.Millisecond))
|
55 |
+
for _, token := range segment.Tokens {
|
56 |
+
if flags.IsColorize() && context.IsText(token) {
|
57 |
+
fmt.Fprint(flags.Output(), Colorize(token.Text, int(token.P*24.0)), " ")
|
58 |
+
} else {
|
59 |
+
fmt.Fprint(flags.Output(), token.Text, " ")
|
60 |
+
}
|
61 |
+
}
|
62 |
+
fmt.Fprintln(flags.Output(), "")
|
63 |
+
fmt.Fprintln(flags.Output(), "")
|
64 |
+
}
|
65 |
+
}
|
66 |
+
|
67 |
+
// Process the data
|
68 |
+
fmt.Fprintf(flags.Output(), " ...processing %q\n", path)
|
69 |
+
context.ResetTimings()
|
70 |
+
if err := context.Process(data, cb, nil); err != nil {
|
71 |
+
return err
|
72 |
+
}
|
73 |
+
|
74 |
+
context.PrintTimings()
|
75 |
+
|
76 |
+
// Print out the results
|
77 |
+
switch {
|
78 |
+
case flags.GetOut() == "srt":
|
79 |
+
return OutputSRT(os.Stdout, context)
|
80 |
+
case flags.GetOut() == "none":
|
81 |
+
return nil
|
82 |
+
default:
|
83 |
+
return Output(os.Stdout, context, flags.IsColorize())
|
84 |
+
}
|
85 |
+
}
|
86 |
+
|
87 |
+
// Output text as SRT file
|
88 |
+
func OutputSRT(w io.Writer, context whisper.Context) error {
|
89 |
+
n := 1
|
90 |
+
for {
|
91 |
+
segment, err := context.NextSegment()
|
92 |
+
if err == io.EOF {
|
93 |
+
return nil
|
94 |
+
} else if err != nil {
|
95 |
+
return err
|
96 |
+
}
|
97 |
+
fmt.Fprintln(w, n)
|
98 |
+
fmt.Fprintln(w, srtTimestamp(segment.Start), " --> ", srtTimestamp(segment.End))
|
99 |
+
fmt.Fprintln(w, segment.Text)
|
100 |
+
fmt.Fprintln(w, "")
|
101 |
+
n++
|
102 |
+
}
|
103 |
+
}
|
104 |
+
|
105 |
+
// Output text to terminal
|
106 |
+
func Output(w io.Writer, context whisper.Context, colorize bool) error {
|
107 |
+
for {
|
108 |
+
segment, err := context.NextSegment()
|
109 |
+
if err == io.EOF {
|
110 |
+
return nil
|
111 |
+
} else if err != nil {
|
112 |
+
return err
|
113 |
+
}
|
114 |
+
fmt.Fprintf(w, "[%6s->%6s]", segment.Start.Truncate(time.Millisecond), segment.End.Truncate(time.Millisecond))
|
115 |
+
if colorize {
|
116 |
+
for _, token := range segment.Tokens {
|
117 |
+
if !context.IsText(token) {
|
118 |
+
continue
|
119 |
+
}
|
120 |
+
fmt.Fprint(w, " ", Colorize(token.Text, int(token.P*24.0)))
|
121 |
+
}
|
122 |
+
fmt.Fprint(w, "\n")
|
123 |
+
} else {
|
124 |
+
fmt.Fprintln(w, " ", segment.Text)
|
125 |
+
}
|
126 |
+
}
|
127 |
+
}
|
128 |
+
|
129 |
+
// Return srtTimestamp
|
130 |
+
func srtTimestamp(t time.Duration) string {
|
131 |
+
return fmt.Sprintf("%02d:%02d:%02d,%03d", t/time.Hour, (t%time.Hour)/time.Minute, (t%time.Minute)/time.Second, (t%time.Second)/time.Millisecond)
|
132 |
+
}
|
bindings/go/go.mod
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
module github.com/ggerganov/whisper.cpp/bindings/go
|
2 |
+
|
3 |
+
go 1.19
|
4 |
+
|
5 |
+
require (
|
6 |
+
github.com/go-audio/wav v1.1.0
|
7 |
+
github.com/stretchr/testify v1.8.1
|
8 |
+
)
|
9 |
+
|
10 |
+
require (
|
11 |
+
github.com/davecgh/go-spew v1.1.1 // indirect
|
12 |
+
github.com/go-audio/audio v1.0.0 // indirect
|
13 |
+
github.com/go-audio/riff v1.0.0 // indirect
|
14 |
+
github.com/pmezard/go-difflib v1.0.0 // indirect
|
15 |
+
gopkg.in/yaml.v3 v3.0.1 // indirect
|
16 |
+
)
|
bindings/go/go.sum
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
2 |
+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
3 |
+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
4 |
+
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
|
5 |
+
github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
|
6 |
+
github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
|
7 |
+
github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498=
|
8 |
+
github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
|
9 |
+
github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
|
10 |
+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
11 |
+
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
12 |
+
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
13 |
+
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
14 |
+
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
15 |
+
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
16 |
+
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
17 |
+
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
18 |
+
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
19 |
+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
20 |
+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
21 |
+
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
22 |
+
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
23 |
+
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
bindings/go/params.go
ADDED
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package whisper
|
2 |
+
|
3 |
+
import (
|
4 |
+
"fmt"
|
5 |
+
)
|
6 |
+
|
7 |
+
///////////////////////////////////////////////////////////////////////////////
|
8 |
+
// CGO
|
9 |
+
|
10 |
+
/*
|
11 |
+
#include <whisper.h>
|
12 |
+
*/
|
13 |
+
import "C"
|
14 |
+
|
15 |
+
///////////////////////////////////////////////////////////////////////////////
|
16 |
+
// PUBLIC METHODS
|
17 |
+
|
18 |
+
func (p *Params) SetTranslate(v bool) {
|
19 |
+
p.translate = toBool(v)
|
20 |
+
}
|
21 |
+
|
22 |
+
func (p *Params) SetSplitOnWord(v bool) {
|
23 |
+
p.split_on_word = toBool(v)
|
24 |
+
}
|
25 |
+
|
26 |
+
func (p *Params) SetNoContext(v bool) {
|
27 |
+
p.no_context = toBool(v)
|
28 |
+
}
|
29 |
+
|
30 |
+
func (p *Params) SetSingleSegment(v bool) {
|
31 |
+
p.single_segment = toBool(v)
|
32 |
+
}
|
33 |
+
|
34 |
+
func (p *Params) SetPrintSpecial(v bool) {
|
35 |
+
p.print_special = toBool(v)
|
36 |
+
}
|
37 |
+
|
38 |
+
func (p *Params) SetPrintProgress(v bool) {
|
39 |
+
p.print_progress = toBool(v)
|
40 |
+
}
|
41 |
+
|
42 |
+
func (p *Params) SetPrintRealtime(v bool) {
|
43 |
+
p.print_realtime = toBool(v)
|
44 |
+
}
|
45 |
+
|
46 |
+
func (p *Params) SetPrintTimestamps(v bool) {
|
47 |
+
p.print_timestamps = toBool(v)
|
48 |
+
}
|
49 |
+
|
50 |
+
// Set language id
|
51 |
+
func (p *Params) SetLanguage(lang int) error {
|
52 |
+
if lang == -1 {
|
53 |
+
p.language = nil
|
54 |
+
return nil
|
55 |
+
}
|
56 |
+
str := C.whisper_lang_str(C.int(lang))
|
57 |
+
if str == nil {
|
58 |
+
return ErrInvalidLanguage
|
59 |
+
} else {
|
60 |
+
p.language = str
|
61 |
+
}
|
62 |
+
return nil
|
63 |
+
}
|
64 |
+
|
65 |
+
// Get language id
|
66 |
+
func (p *Params) Language() int {
|
67 |
+
if p.language == nil {
|
68 |
+
return -1
|
69 |
+
}
|
70 |
+
return int(C.whisper_lang_id(p.language))
|
71 |
+
}
|
72 |
+
|
73 |
+
// Threads available
|
74 |
+
func (p *Params) Threads() int {
|
75 |
+
return int(p.n_threads)
|
76 |
+
}
|
77 |
+
|
78 |
+
// Set number of threads to use
|
79 |
+
func (p *Params) SetThreads(threads int) {
|
80 |
+
p.n_threads = C.int(threads)
|
81 |
+
}
|
82 |
+
|
83 |
+
// Set start offset in ms
|
84 |
+
func (p *Params) SetOffset(offset_ms int) {
|
85 |
+
p.offset_ms = C.int(offset_ms)
|
86 |
+
}
|
87 |
+
|
88 |
+
// Set audio duration to process in ms
|
89 |
+
func (p *Params) SetDuration(duration_ms int) {
|
90 |
+
p.duration_ms = C.int(duration_ms)
|
91 |
+
}
|
92 |
+
|
93 |
+
// Set timestamp token probability threshold (~0.01)
|
94 |
+
func (p *Params) SetTokenThreshold(t float32) {
|
95 |
+
p.thold_pt = C.float(t)
|
96 |
+
}
|
97 |
+
|
98 |
+
// Set timestamp token sum probability threshold (~0.01)
|
99 |
+
func (p *Params) SetTokenSumThreshold(t float32) {
|
100 |
+
p.thold_ptsum = C.float(t)
|
101 |
+
}
|
102 |
+
|
103 |
+
// Set max segment length in characters
|
104 |
+
func (p *Params) SetMaxSegmentLength(n int) {
|
105 |
+
p.max_len = C.int(n)
|
106 |
+
}
|
107 |
+
|
108 |
+
func (p *Params) SetTokenTimestamps(b bool) {
|
109 |
+
p.token_timestamps = toBool(b)
|
110 |
+
}
|
111 |
+
|
112 |
+
// Set max tokens per segment (0 = no limit)
|
113 |
+
func (p *Params) SetMaxTokensPerSegment(n int) {
|
114 |
+
p.max_tokens = C.int(n)
|
115 |
+
}
|
116 |
+
|
117 |
+
// Set audio encoder context
|
118 |
+
func (p *Params) SetAudioCtx(n int) {
|
119 |
+
p.audio_ctx = C.int(n)
|
120 |
+
}
|
121 |
+
|
122 |
+
func (p *Params) SetMaxContext(n int) {
|
123 |
+
p.n_max_text_ctx = C.int(n)
|
124 |
+
}
|
125 |
+
|
126 |
+
func (p *Params) SetBeamSize(n int) {
|
127 |
+
p.beam_search.beam_size = C.int(n)
|
128 |
+
}
|
129 |
+
|
130 |
+
func (p *Params) SetEntropyThold(t float32) {
|
131 |
+
p.entropy_thold = C.float(t)
|
132 |
+
}
|
133 |
+
|
134 |
+
// Set initial prompt
|
135 |
+
func (p *Params) SetInitialPrompt(prompt string) {
|
136 |
+
p.initial_prompt = C.CString(prompt)
|
137 |
+
}
|
138 |
+
|
139 |
+
///////////////////////////////////////////////////////////////////////////////
|
140 |
+
// PRIVATE METHODS
|
141 |
+
|
142 |
+
func toBool(v bool) C.bool {
|
143 |
+
if v {
|
144 |
+
return C.bool(true)
|
145 |
+
}
|
146 |
+
return C.bool(false)
|
147 |
+
}
|
148 |
+
|
149 |
+
///////////////////////////////////////////////////////////////////////////////
|
150 |
+
// STRINGIFY
|
151 |
+
|
152 |
+
func (p *Params) String() string {
|
153 |
+
str := "<whisper.params"
|
154 |
+
str += fmt.Sprintf(" strategy=%v", p.strategy)
|
155 |
+
str += fmt.Sprintf(" n_threads=%d", p.n_threads)
|
156 |
+
if p.language != nil {
|
157 |
+
str += fmt.Sprintf(" language=%s", C.GoString(p.language))
|
158 |
+
}
|
159 |
+
str += fmt.Sprintf(" n_max_text_ctx=%d", p.n_max_text_ctx)
|
160 |
+
str += fmt.Sprintf(" offset_ms=%d", p.offset_ms)
|
161 |
+
str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
|
162 |
+
str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
|
163 |
+
str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
|
164 |
+
str += fmt.Sprintf(" entropy_thold=%f", p.entropy_thold)
|
165 |
+
str += fmt.Sprintf(" beam_size=%d", p.beam_search.beam_size)
|
166 |
+
if p.translate {
|
167 |
+
str += " translate"
|
168 |
+
}
|
169 |
+
if p.no_context {
|
170 |
+
str += " no_context"
|
171 |
+
}
|
172 |
+
if p.single_segment {
|
173 |
+
str += " single_segment"
|
174 |
+
}
|
175 |
+
if p.print_special {
|
176 |
+
str += " print_special"
|
177 |
+
}
|
178 |
+
if p.print_progress {
|
179 |
+
str += " print_progress"
|
180 |
+
}
|
181 |
+
if p.print_realtime {
|
182 |
+
str += " print_realtime"
|
183 |
+
}
|
184 |
+
if p.print_timestamps {
|
185 |
+
str += " print_timestamps"
|
186 |
+
}
|
187 |
+
if p.token_timestamps {
|
188 |
+
str += " token_timestamps"
|
189 |
+
}
|
190 |
+
|
191 |
+
return str + ">"
|
192 |
+
}
|
bindings/go/pkg/whisper/consts.go
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package whisper
|
2 |
+
|
3 |
+
import (
|
4 |
+
"errors"
|
5 |
+
|
6 |
+
// Bindings
|
7 |
+
whisper "github.com/ggerganov/whisper.cpp/bindings/go"
|
8 |
+
)
|
9 |
+
|
10 |
+
///////////////////////////////////////////////////////////////////////////////
|
11 |
+
// ERRORS
|
12 |
+
|
13 |
+
var (
|
14 |
+
ErrUnableToLoadModel = errors.New("unable to load model")
|
15 |
+
ErrInternalAppError = errors.New("internal application error")
|
16 |
+
ErrProcessingFailed = errors.New("processing failed")
|
17 |
+
ErrUnsupportedLanguage = errors.New("unsupported language")
|
18 |
+
ErrModelNotMultilingual = errors.New("model is not multilingual")
|
19 |
+
)
|
20 |
+
|
21 |
+
///////////////////////////////////////////////////////////////////////////////
|
22 |
+
// CONSTANTS
|
23 |
+
|
24 |
+
// SampleRate is the sample rate of the audio data.
|
25 |
+
const SampleRate = whisper.SampleRate
|
26 |
+
|
27 |
+
// SampleBits is the number of bytes per sample.
|
28 |
+
const SampleBits = whisper.SampleBits
|
bindings/go/pkg/whisper/context.go
ADDED
@@ -0,0 +1,331 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package whisper
|
2 |
+
|
3 |
+
import (
|
4 |
+
"fmt"
|
5 |
+
"io"
|
6 |
+
"runtime"
|
7 |
+
"strings"
|
8 |
+
"time"
|
9 |
+
|
10 |
+
// Bindings
|
11 |
+
whisper "github.com/ggerganov/whisper.cpp/bindings/go"
|
12 |
+
)
|
13 |
+
|
14 |
+
///////////////////////////////////////////////////////////////////////////////
|
15 |
+
// TYPES
|
16 |
+
|
17 |
+
type context struct {
|
18 |
+
n int
|
19 |
+
model *model
|
20 |
+
params whisper.Params
|
21 |
+
}
|
22 |
+
|
23 |
+
// Make sure context adheres to the interface
|
24 |
+
var _ Context = (*context)(nil)
|
25 |
+
|
26 |
+
///////////////////////////////////////////////////////////////////////////////
|
27 |
+
// LIFECYCLE
|
28 |
+
|
29 |
+
func newContext(model *model, params whisper.Params) (Context, error) {
|
30 |
+
context := new(context)
|
31 |
+
context.model = model
|
32 |
+
context.params = params
|
33 |
+
|
34 |
+
// Return success
|
35 |
+
return context, nil
|
36 |
+
}
|
37 |
+
|
38 |
+
///////////////////////////////////////////////////////////////////////////////
|
39 |
+
// PUBLIC METHODS
|
40 |
+
|
41 |
+
// Set the language to use for speech recognition.
|
42 |
+
func (context *context) SetLanguage(lang string) error {
|
43 |
+
if context.model.ctx == nil {
|
44 |
+
return ErrInternalAppError
|
45 |
+
}
|
46 |
+
if !context.model.IsMultilingual() {
|
47 |
+
return ErrModelNotMultilingual
|
48 |
+
}
|
49 |
+
|
50 |
+
if lang == "auto" {
|
51 |
+
context.params.SetLanguage(-1)
|
52 |
+
} else if id := context.model.ctx.Whisper_lang_id(lang); id < 0 {
|
53 |
+
return ErrUnsupportedLanguage
|
54 |
+
} else if err := context.params.SetLanguage(id); err != nil {
|
55 |
+
return err
|
56 |
+
}
|
57 |
+
// Return success
|
58 |
+
return nil
|
59 |
+
}
|
60 |
+
|
61 |
+
func (context *context) IsMultilingual() bool {
|
62 |
+
return context.model.IsMultilingual()
|
63 |
+
}
|
64 |
+
|
65 |
+
// Get language
|
66 |
+
func (context *context) Language() string {
|
67 |
+
id := context.params.Language()
|
68 |
+
if id == -1 {
|
69 |
+
return "auto"
|
70 |
+
}
|
71 |
+
return whisper.Whisper_lang_str(context.params.Language())
|
72 |
+
}
|
73 |
+
|
74 |
+
// Set translate flag
|
75 |
+
func (context *context) SetTranslate(v bool) {
|
76 |
+
context.params.SetTranslate(v)
|
77 |
+
}
|
78 |
+
|
79 |
+
func (context *context) SetSplitOnWord(v bool) {
|
80 |
+
context.params.SetSplitOnWord(v)
|
81 |
+
}
|
82 |
+
|
83 |
+
// Set number of threads to use
|
84 |
+
func (context *context) SetThreads(v uint) {
|
85 |
+
context.params.SetThreads(int(v))
|
86 |
+
}
|
87 |
+
|
88 |
+
// Set time offset
|
89 |
+
func (context *context) SetOffset(v time.Duration) {
|
90 |
+
context.params.SetOffset(int(v.Milliseconds()))
|
91 |
+
}
|
92 |
+
|
93 |
+
// Set duration of audio to process
|
94 |
+
func (context *context) SetDuration(v time.Duration) {
|
95 |
+
context.params.SetDuration(int(v.Milliseconds()))
|
96 |
+
}
|
97 |
+
|
98 |
+
// Set timestamp token probability threshold (~0.01)
|
99 |
+
func (context *context) SetTokenThreshold(t float32) {
|
100 |
+
context.params.SetTokenThreshold(t)
|
101 |
+
}
|
102 |
+
|
103 |
+
// Set timestamp token sum probability threshold (~0.01)
|
104 |
+
func (context *context) SetTokenSumThreshold(t float32) {
|
105 |
+
context.params.SetTokenSumThreshold(t)
|
106 |
+
}
|
107 |
+
|
108 |
+
// Set max segment length in characters
|
109 |
+
func (context *context) SetMaxSegmentLength(n uint) {
|
110 |
+
context.params.SetMaxSegmentLength(int(n))
|
111 |
+
}
|
112 |
+
|
113 |
+
// Set token timestamps flag
|
114 |
+
func (context *context) SetTokenTimestamps(b bool) {
|
115 |
+
context.params.SetTokenTimestamps(b)
|
116 |
+
}
|
117 |
+
|
118 |
+
// Set max tokens per segment (0 = no limit)
|
119 |
+
func (context *context) SetMaxTokensPerSegment(n uint) {
|
120 |
+
context.params.SetMaxTokensPerSegment(int(n))
|
121 |
+
}
|
122 |
+
|
123 |
+
// Set audio encoder context
|
124 |
+
func (context *context) SetAudioCtx(n uint) {
|
125 |
+
context.params.SetAudioCtx(int(n))
|
126 |
+
}
|
127 |
+
|
128 |
+
// Set maximum number of text context tokens to store
|
129 |
+
func (context *context) SetMaxContext(n int) {
|
130 |
+
context.params.SetMaxContext(n)
|
131 |
+
}
|
132 |
+
|
133 |
+
// Set Beam Size
|
134 |
+
func (context *context) SetBeamSize(n int) {
|
135 |
+
context.params.SetBeamSize(n)
|
136 |
+
}
|
137 |
+
|
138 |
+
// Set Entropy threshold
|
139 |
+
func (context *context) SetEntropyThold(t float32) {
|
140 |
+
context.params.SetEntropyThold(t)
|
141 |
+
}
|
142 |
+
|
143 |
+
// Set initial prompt
|
144 |
+
func (context *context) SetInitialPrompt(prompt string) {
|
145 |
+
context.params.SetInitialPrompt(prompt)
|
146 |
+
}
|
147 |
+
|
148 |
+
// ResetTimings resets the mode timings. Should be called before processing
|
149 |
+
func (context *context) ResetTimings() {
|
150 |
+
context.model.ctx.Whisper_reset_timings()
|
151 |
+
}
|
152 |
+
|
153 |
+
// PrintTimings prints the model timings to stdout.
|
154 |
+
func (context *context) PrintTimings() {
|
155 |
+
context.model.ctx.Whisper_print_timings()
|
156 |
+
}
|
157 |
+
|
158 |
+
// SystemInfo returns the system information
|
159 |
+
func (context *context) SystemInfo() string {
|
160 |
+
return fmt.Sprintf("system_info: n_threads = %d / %d | %s\n",
|
161 |
+
context.params.Threads(),
|
162 |
+
runtime.NumCPU(),
|
163 |
+
whisper.Whisper_print_system_info(),
|
164 |
+
)
|
165 |
+
}
|
166 |
+
|
167 |
+
// Use mel data at offset_ms to try and auto-detect the spoken language
|
168 |
+
// Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
|
169 |
+
// Returns the probabilities of all languages.
|
170 |
+
func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]float32, error) {
|
171 |
+
langProbs, err := context.model.ctx.Whisper_lang_auto_detect(offset_ms, n_threads)
|
172 |
+
if err != nil {
|
173 |
+
return nil, err
|
174 |
+
}
|
175 |
+
return langProbs, nil
|
176 |
+
}
|
177 |
+
|
178 |
+
// Process new sample data and return any errors
|
179 |
+
func (context *context) Process(
|
180 |
+
data []float32,
|
181 |
+
callNewSegment SegmentCallback,
|
182 |
+
callProgress ProgressCallback,
|
183 |
+
) error {
|
184 |
+
if context.model.ctx == nil {
|
185 |
+
return ErrInternalAppError
|
186 |
+
}
|
187 |
+
// If the callback is defined then we force on single_segment mode
|
188 |
+
if callNewSegment != nil {
|
189 |
+
context.params.SetSingleSegment(true)
|
190 |
+
}
|
191 |
+
|
192 |
+
// We don't do parallel processing at the moment
|
193 |
+
processors := 0
|
194 |
+
if processors > 1 {
|
195 |
+
if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, nil, func(new int) {
|
196 |
+
if callNewSegment != nil {
|
197 |
+
num_segments := context.model.ctx.Whisper_full_n_segments()
|
198 |
+
s0 := num_segments - new
|
199 |
+
for i := s0; i < num_segments; i++ {
|
200 |
+
callNewSegment(toSegment(context.model.ctx, i))
|
201 |
+
}
|
202 |
+
}
|
203 |
+
}); err != nil {
|
204 |
+
return err
|
205 |
+
}
|
206 |
+
} else if err := context.model.ctx.Whisper_full(context.params, data, nil, func(new int) {
|
207 |
+
if callNewSegment != nil {
|
208 |
+
num_segments := context.model.ctx.Whisper_full_n_segments()
|
209 |
+
s0 := num_segments - new
|
210 |
+
for i := s0; i < num_segments; i++ {
|
211 |
+
callNewSegment(toSegment(context.model.ctx, i))
|
212 |
+
}
|
213 |
+
}
|
214 |
+
}, func(progress int) {
|
215 |
+
if callProgress != nil {
|
216 |
+
callProgress(progress)
|
217 |
+
}
|
218 |
+
}); err != nil {
|
219 |
+
return err
|
220 |
+
}
|
221 |
+
|
222 |
+
// Return success
|
223 |
+
return nil
|
224 |
+
}
|
225 |
+
|
226 |
+
// Return the next segment of tokens
|
227 |
+
func (context *context) NextSegment() (Segment, error) {
|
228 |
+
if context.model.ctx == nil {
|
229 |
+
return Segment{}, ErrInternalAppError
|
230 |
+
}
|
231 |
+
if context.n >= context.model.ctx.Whisper_full_n_segments() {
|
232 |
+
return Segment{}, io.EOF
|
233 |
+
}
|
234 |
+
|
235 |
+
// Populate result
|
236 |
+
result := toSegment(context.model.ctx, context.n)
|
237 |
+
|
238 |
+
// Increment the cursor
|
239 |
+
context.n++
|
240 |
+
|
241 |
+
// Return success
|
242 |
+
return result, nil
|
243 |
+
}
|
244 |
+
|
245 |
+
// Test for text tokens
|
246 |
+
func (context *context) IsText(t Token) bool {
|
247 |
+
switch {
|
248 |
+
case context.IsBEG(t):
|
249 |
+
return false
|
250 |
+
case context.IsSOT(t):
|
251 |
+
return false
|
252 |
+
case whisper.Token(t.Id) >= context.model.ctx.Whisper_token_eot():
|
253 |
+
return false
|
254 |
+
case context.IsPREV(t):
|
255 |
+
return false
|
256 |
+
case context.IsSOLM(t):
|
257 |
+
return false
|
258 |
+
case context.IsNOT(t):
|
259 |
+
return false
|
260 |
+
default:
|
261 |
+
return true
|
262 |
+
}
|
263 |
+
}
|
264 |
+
|
265 |
+
// Test for "begin" token
|
266 |
+
func (context *context) IsBEG(t Token) bool {
|
267 |
+
return whisper.Token(t.Id) == context.model.ctx.Whisper_token_beg()
|
268 |
+
}
|
269 |
+
|
270 |
+
// Test for "start of transcription" token
|
271 |
+
func (context *context) IsSOT(t Token) bool {
|
272 |
+
return whisper.Token(t.Id) == context.model.ctx.Whisper_token_sot()
|
273 |
+
}
|
274 |
+
|
275 |
+
// Test for "end of transcription" token
|
276 |
+
func (context *context) IsEOT(t Token) bool {
|
277 |
+
return whisper.Token(t.Id) == context.model.ctx.Whisper_token_eot()
|
278 |
+
}
|
279 |
+
|
280 |
+
// Test for "start of prev" token
|
281 |
+
func (context *context) IsPREV(t Token) bool {
|
282 |
+
return whisper.Token(t.Id) == context.model.ctx.Whisper_token_prev()
|
283 |
+
}
|
284 |
+
|
285 |
+
// Test for "start of lm" token
|
286 |
+
func (context *context) IsSOLM(t Token) bool {
|
287 |
+
return whisper.Token(t.Id) == context.model.ctx.Whisper_token_solm()
|
288 |
+
}
|
289 |
+
|
290 |
+
// Test for "No timestamps" token
|
291 |
+
func (context *context) IsNOT(t Token) bool {
|
292 |
+
return whisper.Token(t.Id) == context.model.ctx.Whisper_token_not()
|
293 |
+
}
|
294 |
+
|
295 |
+
// Test for token associated with a specific language
|
296 |
+
func (context *context) IsLANG(t Token, lang string) bool {
|
297 |
+
if id := context.model.ctx.Whisper_lang_id(lang); id >= 0 {
|
298 |
+
return whisper.Token(t.Id) == context.model.ctx.Whisper_token_lang(id)
|
299 |
+
} else {
|
300 |
+
return false
|
301 |
+
}
|
302 |
+
}
|
303 |
+
|
304 |
+
///////////////////////////////////////////////////////////////////////////////
|
305 |
+
// PRIVATE METHODS
|
306 |
+
|
307 |
+
func toSegment(ctx *whisper.Context, n int) Segment {
|
308 |
+
return Segment{
|
309 |
+
Num: n,
|
310 |
+
Text: strings.TrimSpace(ctx.Whisper_full_get_segment_text(n)),
|
311 |
+
Start: time.Duration(ctx.Whisper_full_get_segment_t0(n)) * time.Millisecond * 10,
|
312 |
+
End: time.Duration(ctx.Whisper_full_get_segment_t1(n)) * time.Millisecond * 10,
|
313 |
+
Tokens: toTokens(ctx, n),
|
314 |
+
}
|
315 |
+
}
|
316 |
+
|
317 |
+
func toTokens(ctx *whisper.Context, n int) []Token {
|
318 |
+
result := make([]Token, ctx.Whisper_full_n_tokens(n))
|
319 |
+
for i := 0; i < len(result); i++ {
|
320 |
+
data := ctx.Whisper_full_get_token_data(n, i)
|
321 |
+
|
322 |
+
result[i] = Token{
|
323 |
+
Id: int(ctx.Whisper_full_get_token_id(n, i)),
|
324 |
+
Text: ctx.Whisper_full_get_token_text(n, i),
|
325 |
+
P: ctx.Whisper_full_get_token_p(n, i),
|
326 |
+
Start: time.Duration(data.T0()) * time.Millisecond * 10,
|
327 |
+
End: time.Duration(data.T1()) * time.Millisecond * 10,
|
328 |
+
}
|
329 |
+
}
|
330 |
+
return result
|
331 |
+
}
|
bindings/go/pkg/whisper/context_test.go
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package whisper_test
|
2 |
+
|
3 |
+
import (
|
4 |
+
"os"
|
5 |
+
"testing"
|
6 |
+
|
7 |
+
// Packages
|
8 |
+
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
9 |
+
assert "github.com/stretchr/testify/assert"
|
10 |
+
)
|
11 |
+
|
12 |
+
const (
|
13 |
+
ModelPath = "../../models/ggml-tiny.bin"
|
14 |
+
SamplePath = "../../samples/jfk.wav"
|
15 |
+
)
|
16 |
+
|
17 |
+
func Test_Whisper_000(t *testing.T) {
|
18 |
+
assert := assert.New(t)
|
19 |
+
if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
|
20 |
+
t.Skip("Skipping test, model not found:", ModelPath)
|
21 |
+
}
|
22 |
+
if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
|
23 |
+
t.Skip("Skipping test, sample not found:", SamplePath)
|
24 |
+
}
|
25 |
+
|
26 |
+
// Load model
|
27 |
+
model, err := whisper.New(ModelPath)
|
28 |
+
assert.NoError(err)
|
29 |
+
assert.NotNil(model)
|
30 |
+
assert.NoError(model.Close())
|
31 |
+
|
32 |
+
t.Log("languages=", model.Languages())
|
33 |
+
}
|
34 |
+
|
35 |
+
func Test_Whisper_001(t *testing.T) {
|
36 |
+
assert := assert.New(t)
|
37 |
+
if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
|
38 |
+
t.Skip("Skipping test, model not found:", ModelPath)
|
39 |
+
}
|
40 |
+
if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
|
41 |
+
t.Skip("Skipping test, sample not found:", SamplePath)
|
42 |
+
}
|
43 |
+
|
44 |
+
// Load model
|
45 |
+
model, err := whisper.New(ModelPath)
|
46 |
+
assert.NoError(err)
|
47 |
+
assert.NotNil(model)
|
48 |
+
defer model.Close()
|
49 |
+
|
50 |
+
// Get context for decoding
|
51 |
+
ctx, err := model.NewContext()
|
52 |
+
assert.NoError(err)
|
53 |
+
assert.NotNil(ctx)
|
54 |
+
|
55 |
+
}
|
bindings/go/pkg/whisper/doc.go
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*
|
2 |
+
This is the higher-level speech-to-text whisper.cpp API for go
|
3 |
+
*/
|
4 |
+
package whisper
|
bindings/go/pkg/whisper/interface.go
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package whisper
|
2 |
+
|
3 |
+
import (
|
4 |
+
"io"
|
5 |
+
"time"
|
6 |
+
)
|
7 |
+
|
8 |
+
///////////////////////////////////////////////////////////////////////////////
|
9 |
+
// TYPES
|
10 |
+
|
11 |
+
// SegmentCallback is the callback function for processing segments in real
|
12 |
+
// time. It is called during the Process function
|
13 |
+
type SegmentCallback func(Segment)
|
14 |
+
|
15 |
+
// ProgressCallback is the callback function for reporting progress during
|
16 |
+
// processing. It is called during the Process function
|
17 |
+
type ProgressCallback func(int)
|
18 |
+
|
19 |
+
// Model is the interface to a whisper model. Create a new model with the
|
20 |
+
// function whisper.New(string)
|
21 |
+
type Model interface {
|
22 |
+
io.Closer
|
23 |
+
|
24 |
+
// Return a new speech-to-text context.
|
25 |
+
NewContext() (Context, error)
|
26 |
+
|
27 |
+
// Return true if the model is multilingual.
|
28 |
+
IsMultilingual() bool
|
29 |
+
|
30 |
+
// Return all languages supported.
|
31 |
+
Languages() []string
|
32 |
+
}
|
33 |
+
|
34 |
+
// Context is the speach recognition context.
|
35 |
+
type Context interface {
|
36 |
+
SetLanguage(string) error // Set the language to use for speech recognition, use "auto" for auto detect language.
|
37 |
+
SetTranslate(bool) // Set translate flag
|
38 |
+
IsMultilingual() bool // Return true if the model is multilingual.
|
39 |
+
Language() string // Get language
|
40 |
+
|
41 |
+
SetOffset(time.Duration) // Set offset
|
42 |
+
SetDuration(time.Duration) // Set duration
|
43 |
+
SetThreads(uint) // Set number of threads to use
|
44 |
+
SetSplitOnWord(bool) // Set split on word flag
|
45 |
+
SetTokenThreshold(float32) // Set timestamp token probability threshold
|
46 |
+
SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
|
47 |
+
SetMaxSegmentLength(uint) // Set max segment length in characters
|
48 |
+
SetTokenTimestamps(bool) // Set token timestamps flag
|
49 |
+
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
|
50 |
+
SetAudioCtx(uint) // Set audio encoder context
|
51 |
+
SetMaxContext(n int) // Set maximum number of text context tokens to store
|
52 |
+
SetBeamSize(n int) // Set Beam Size
|
53 |
+
SetEntropyThold(t float32) // Set Entropy threshold
|
54 |
+
SetInitialPrompt(prompt string) // Set initial prompt
|
55 |
+
|
56 |
+
// Process mono audio data and return any errors.
|
57 |
+
// If defined, newly generated segments are passed to the
|
58 |
+
// callback function during processing.
|
59 |
+
Process([]float32, SegmentCallback, ProgressCallback) error
|
60 |
+
|
61 |
+
// After process is called, return segments until the end of the stream
|
62 |
+
// is reached, when io.EOF is returned.
|
63 |
+
NextSegment() (Segment, error)
|
64 |
+
|
65 |
+
IsBEG(Token) bool // Test for "begin" token
|
66 |
+
IsSOT(Token) bool // Test for "start of transcription" token
|
67 |
+
IsEOT(Token) bool // Test for "end of transcription" token
|
68 |
+
IsPREV(Token) bool // Test for "start of prev" token
|
69 |
+
IsSOLM(Token) bool // Test for "start of lm" token
|
70 |
+
IsNOT(Token) bool // Test for "No timestamps" token
|
71 |
+
IsLANG(Token, string) bool // Test for token associated with a specific language
|
72 |
+
IsText(Token) bool // Test for text token
|
73 |
+
|
74 |
+
// Timings
|
75 |
+
PrintTimings()
|
76 |
+
ResetTimings()
|
77 |
+
|
78 |
+
SystemInfo() string
|
79 |
+
}
|
80 |
+
|
81 |
+
// Segment is the text result of a speech recognition.
|
82 |
+
type Segment struct {
|
83 |
+
// Segment Number
|
84 |
+
Num int
|
85 |
+
|
86 |
+
// Time beginning and end timestamps for the segment.
|
87 |
+
Start, End time.Duration
|
88 |
+
|
89 |
+
// The text of the segment.
|
90 |
+
Text string
|
91 |
+
|
92 |
+
// The tokens of the segment.
|
93 |
+
Tokens []Token
|
94 |
+
}
|
95 |
+
|
96 |
+
// Token is a text or special token
|
97 |
+
type Token struct {
|
98 |
+
Id int
|
99 |
+
Text string
|
100 |
+
P float32
|
101 |
+
Start, End time.Duration
|
102 |
+
}
|
bindings/go/pkg/whisper/model.go
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package whisper
|
2 |
+
|
3 |
+
import (
|
4 |
+
"fmt"
|
5 |
+
"os"
|
6 |
+
"runtime"
|
7 |
+
|
8 |
+
// Bindings
|
9 |
+
whisper "github.com/ggerganov/whisper.cpp/bindings/go"
|
10 |
+
)
|
11 |
+
|
12 |
+
///////////////////////////////////////////////////////////////////////////////
|
13 |
+
// TYPES
|
14 |
+
|
15 |
+
type model struct {
|
16 |
+
path string
|
17 |
+
ctx *whisper.Context
|
18 |
+
}
|
19 |
+
|
20 |
+
// Make sure model adheres to the interface
|
21 |
+
var _ Model = (*model)(nil)
|
22 |
+
|
23 |
+
///////////////////////////////////////////////////////////////////////////////
|
24 |
+
// LIFECYCLE
|
25 |
+
|
26 |
+
func New(path string) (Model, error) {
|
27 |
+
model := new(model)
|
28 |
+
if _, err := os.Stat(path); err != nil {
|
29 |
+
return nil, err
|
30 |
+
} else if ctx := whisper.Whisper_init(path); ctx == nil {
|
31 |
+
return nil, ErrUnableToLoadModel
|
32 |
+
} else {
|
33 |
+
model.ctx = ctx
|
34 |
+
model.path = path
|
35 |
+
}
|
36 |
+
|
37 |
+
// Return success
|
38 |
+
return model, nil
|
39 |
+
}
|
40 |
+
|
41 |
+
func (model *model) Close() error {
|
42 |
+
if model.ctx != nil {
|
43 |
+
model.ctx.Whisper_free()
|
44 |
+
}
|
45 |
+
|
46 |
+
// Release resources
|
47 |
+
model.ctx = nil
|
48 |
+
|
49 |
+
// Return success
|
50 |
+
return nil
|
51 |
+
}
|
52 |
+
|
53 |
+
///////////////////////////////////////////////////////////////////////////////
|
54 |
+
// STRINGIFY
|
55 |
+
|
56 |
+
func (model *model) String() string {
|
57 |
+
str := "<whisper.model"
|
58 |
+
if model.ctx != nil {
|
59 |
+
str += fmt.Sprintf(" model=%q", model.path)
|
60 |
+
}
|
61 |
+
return str + ">"
|
62 |
+
}
|
63 |
+
|
64 |
+
///////////////////////////////////////////////////////////////////////////////
|
65 |
+
// PUBLIC METHODS
|
66 |
+
|
67 |
+
// Return true if model is multilingual (language and translation options are supported)
|
68 |
+
func (model *model) IsMultilingual() bool {
|
69 |
+
return model.ctx.Whisper_is_multilingual() != 0
|
70 |
+
}
|
71 |
+
|
72 |
+
// Return all recognized languages. Initially it is set to auto-detect
|
73 |
+
func (model *model) Languages() []string {
|
74 |
+
result := make([]string, 0, whisper.Whisper_lang_max_id())
|
75 |
+
for i := 0; i < whisper.Whisper_lang_max_id(); i++ {
|
76 |
+
str := whisper.Whisper_lang_str(i)
|
77 |
+
if model.ctx.Whisper_lang_id(str) >= 0 {
|
78 |
+
result = append(result, str)
|
79 |
+
}
|
80 |
+
}
|
81 |
+
return result
|
82 |
+
}
|
83 |
+
|
84 |
+
func (model *model) NewContext() (Context, error) {
|
85 |
+
if model.ctx == nil {
|
86 |
+
return nil, ErrInternalAppError
|
87 |
+
}
|
88 |
+
|
89 |
+
// Create new context
|
90 |
+
params := model.ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY)
|
91 |
+
params.SetTranslate(false)
|
92 |
+
params.SetPrintSpecial(false)
|
93 |
+
params.SetPrintProgress(false)
|
94 |
+
params.SetPrintRealtime(false)
|
95 |
+
params.SetPrintTimestamps(false)
|
96 |
+
params.SetThreads(runtime.NumCPU())
|
97 |
+
params.SetNoContext(true)
|
98 |
+
|
99 |
+
// Return new context
|
100 |
+
return newContext(model, params)
|
101 |
+
}
|
bindings/go/samples/jfk.wav
ADDED
Binary file (352 kB). View file
|
|
bindings/go/whisper.go
ADDED
@@ -0,0 +1,468 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package whisper
|
2 |
+
|
3 |
+
import (
|
4 |
+
"errors"
|
5 |
+
"unsafe"
|
6 |
+
)
|
7 |
+
|
8 |
+
///////////////////////////////////////////////////////////////////////////////
|
9 |
+
// CGO
|
10 |
+
|
11 |
+
/*
|
12 |
+
#cgo LDFLAGS: -lwhisper -lm -lstdc++ -fopenmp
|
13 |
+
#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
|
14 |
+
#include <whisper.h>
|
15 |
+
#include <stdlib.h>
|
16 |
+
|
17 |
+
extern void callNewSegment(void* user_data, int new);
|
18 |
+
extern void callProgress(void* user_data, int progress);
|
19 |
+
extern bool callEncoderBegin(void* user_data);
|
20 |
+
|
21 |
+
// Text segment callback
|
22 |
+
// Called on every newly generated text segment
|
23 |
+
// Use the whisper_full_...() functions to obtain the text segments
|
24 |
+
static void whisper_new_segment_cb(struct whisper_context* ctx, struct whisper_state* state, int n_new, void* user_data) {
|
25 |
+
if(user_data != NULL && ctx != NULL) {
|
26 |
+
callNewSegment(user_data, n_new);
|
27 |
+
}
|
28 |
+
}
|
29 |
+
|
30 |
+
// Progress callback
|
31 |
+
// Called on every newly generated text segment
|
32 |
+
// Use the whisper_full_...() functions to obtain the text segments
|
33 |
+
static void whisper_progress_cb(struct whisper_context* ctx, struct whisper_state* state, int progress, void* user_data) {
|
34 |
+
if(user_data != NULL && ctx != NULL) {
|
35 |
+
callProgress(user_data, progress);
|
36 |
+
}
|
37 |
+
}
|
38 |
+
|
39 |
+
// Encoder begin callback
|
40 |
+
// If not NULL, called before the encoder starts
|
41 |
+
// If it returns false, the computation is aborted
|
42 |
+
static bool whisper_encoder_begin_cb(struct whisper_context* ctx, struct whisper_state* state, void* user_data) {
|
43 |
+
if(user_data != NULL && ctx != NULL) {
|
44 |
+
return callEncoderBegin(user_data);
|
45 |
+
}
|
46 |
+
return false;
|
47 |
+
}
|
48 |
+
|
49 |
+
// Get default parameters and set callbacks
|
50 |
+
static struct whisper_full_params whisper_full_default_params_cb(struct whisper_context* ctx, enum whisper_sampling_strategy strategy) {
|
51 |
+
struct whisper_full_params params = whisper_full_default_params(strategy);
|
52 |
+
params.new_segment_callback = whisper_new_segment_cb;
|
53 |
+
params.new_segment_callback_user_data = (void*)(ctx);
|
54 |
+
params.encoder_begin_callback = whisper_encoder_begin_cb;
|
55 |
+
params.encoder_begin_callback_user_data = (void*)(ctx);
|
56 |
+
params.progress_callback = whisper_progress_cb;
|
57 |
+
params.progress_callback_user_data = (void*)(ctx);
|
58 |
+
return params;
|
59 |
+
}
|
60 |
+
*/
|
61 |
+
import "C"
|
62 |
+
|
63 |
+
///////////////////////////////////////////////////////////////////////////////
|
64 |
+
// TYPES
|
65 |
+
|
66 |
+
type (
|
67 |
+
Context C.struct_whisper_context
|
68 |
+
Token C.whisper_token
|
69 |
+
TokenData C.struct_whisper_token_data
|
70 |
+
SamplingStrategy C.enum_whisper_sampling_strategy
|
71 |
+
Params C.struct_whisper_full_params
|
72 |
+
)
|
73 |
+
|
74 |
+
///////////////////////////////////////////////////////////////////////////////
|
75 |
+
// GLOBALS
|
76 |
+
|
77 |
+
const (
|
78 |
+
SAMPLING_GREEDY SamplingStrategy = C.WHISPER_SAMPLING_GREEDY
|
79 |
+
SAMPLING_BEAM_SEARCH SamplingStrategy = C.WHISPER_SAMPLING_BEAM_SEARCH
|
80 |
+
)
|
81 |
+
|
82 |
+
const (
|
83 |
+
SampleRate = C.WHISPER_SAMPLE_RATE // Expected sample rate, samples per second
|
84 |
+
SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits
|
85 |
+
NumFFT = C.WHISPER_N_FFT
|
86 |
+
HopLength = C.WHISPER_HOP_LENGTH
|
87 |
+
ChunkSize = C.WHISPER_CHUNK_SIZE
|
88 |
+
)
|
89 |
+
|
90 |
+
var (
|
91 |
+
ErrTokenizerFailed = errors.New("whisper_tokenize failed")
|
92 |
+
ErrAutoDetectFailed = errors.New("whisper_lang_auto_detect failed")
|
93 |
+
ErrConversionFailed = errors.New("whisper_convert failed")
|
94 |
+
ErrInvalidLanguage = errors.New("invalid language")
|
95 |
+
)
|
96 |
+
|
97 |
+
///////////////////////////////////////////////////////////////////////////////
|
98 |
+
// PUBLIC METHODS
|
99 |
+
|
100 |
+
// Allocates all memory needed for the model and loads the model from the given file.
|
101 |
+
// Returns NULL on failure.
|
102 |
+
func Whisper_init(path string) *Context {
|
103 |
+
cPath := C.CString(path)
|
104 |
+
defer C.free(unsafe.Pointer(cPath))
|
105 |
+
if ctx := C.whisper_init_from_file_with_params(cPath, C.whisper_context_default_params()); ctx != nil {
|
106 |
+
return (*Context)(ctx)
|
107 |
+
} else {
|
108 |
+
return nil
|
109 |
+
}
|
110 |
+
}
|
111 |
+
|
112 |
+
// Frees all memory allocated by the model.
|
113 |
+
func (ctx *Context) Whisper_free() {
|
114 |
+
C.whisper_free((*C.struct_whisper_context)(ctx))
|
115 |
+
}
|
116 |
+
|
117 |
+
// Convert RAW PCM audio to log mel spectrogram.
|
118 |
+
// The resulting spectrogram is stored inside the provided whisper context.
|
119 |
+
func (ctx *Context) Whisper_pcm_to_mel(data []float32, threads int) error {
|
120 |
+
if C.whisper_pcm_to_mel((*C.struct_whisper_context)(ctx), (*C.float)(&data[0]), C.int(len(data)), C.int(threads)) == 0 {
|
121 |
+
return nil
|
122 |
+
} else {
|
123 |
+
return ErrConversionFailed
|
124 |
+
}
|
125 |
+
}
|
126 |
+
|
127 |
+
// This can be used to set a custom log mel spectrogram inside the provided whisper context.
|
128 |
+
// Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
|
129 |
+
// n_mel must be 80
|
130 |
+
func (ctx *Context) Whisper_set_mel(data []float32, n_mel int) error {
|
131 |
+
if C.whisper_set_mel((*C.struct_whisper_context)(ctx), (*C.float)(&data[0]), C.int(len(data)), C.int(n_mel)) == 0 {
|
132 |
+
return nil
|
133 |
+
} else {
|
134 |
+
return ErrConversionFailed
|
135 |
+
}
|
136 |
+
}
|
137 |
+
|
138 |
+
// Run the Whisper encoder on the log mel spectrogram stored inside the provided whisper context.
|
139 |
+
// Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
|
140 |
+
// offset can be used to specify the offset of the first frame in the spectrogram.
|
141 |
+
func (ctx *Context) Whisper_encode(offset, threads int) error {
|
142 |
+
if C.whisper_encode((*C.struct_whisper_context)(ctx), C.int(offset), C.int(threads)) == 0 {
|
143 |
+
return nil
|
144 |
+
} else {
|
145 |
+
return ErrConversionFailed
|
146 |
+
}
|
147 |
+
}
|
148 |
+
|
149 |
+
// Run the Whisper decoder to obtain the logits and probabilities for the next token.
|
150 |
+
// Make sure to call whisper_encode() first.
|
151 |
+
// tokens + n_tokens is the provided context for the decoder.
|
152 |
+
// n_past is the number of tokens to use from previous decoder calls.
|
153 |
+
func (ctx *Context) Whisper_decode(tokens []Token, past, threads int) error {
|
154 |
+
if C.whisper_decode((*C.struct_whisper_context)(ctx), (*C.whisper_token)(&tokens[0]), C.int(len(tokens)), C.int(past), C.int(threads)) == 0 {
|
155 |
+
return nil
|
156 |
+
} else {
|
157 |
+
return ErrConversionFailed
|
158 |
+
}
|
159 |
+
}
|
160 |
+
|
161 |
+
// Convert the provided text into tokens. The tokens pointer must be large enough to hold the resulting tokens.
|
162 |
+
// Returns the number of tokens on success
|
163 |
+
func (ctx *Context) Whisper_tokenize(text string, tokens []Token) (int, error) {
|
164 |
+
cText := C.CString(text)
|
165 |
+
defer C.free(unsafe.Pointer(cText))
|
166 |
+
if n := C.whisper_tokenize((*C.struct_whisper_context)(ctx), cText, (*C.whisper_token)(&tokens[0]), C.int(len(tokens))); n >= 0 {
|
167 |
+
return int(n), nil
|
168 |
+
} else {
|
169 |
+
return 0, ErrTokenizerFailed
|
170 |
+
}
|
171 |
+
}
|
172 |
+
|
173 |
+
// Return the id of the specified language, returns -1 if not found
|
174 |
+
// Examples:
|
175 |
+
//
|
176 |
+
// "de" -> 2
|
177 |
+
// "german" -> 2
|
178 |
+
func (ctx *Context) Whisper_lang_id(lang string) int {
|
179 |
+
return int(C.whisper_lang_id(C.CString(lang)))
|
180 |
+
}
|
181 |
+
|
182 |
+
// Largest language id (i.e. number of available languages - 1)
|
183 |
+
func Whisper_lang_max_id() int {
|
184 |
+
return int(C.whisper_lang_max_id())
|
185 |
+
}
|
186 |
+
|
187 |
+
// Return the short string of the specified language id (e.g. 2 -> "de"),
|
188 |
+
// returns empty string if not found
|
189 |
+
func Whisper_lang_str(id int) string {
|
190 |
+
return C.GoString(C.whisper_lang_str(C.int(id)))
|
191 |
+
}
|
192 |
+
|
193 |
+
// Use mel data at offset_ms to try and auto-detect the spoken language
|
194 |
+
// Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
|
195 |
+
// Returns the probabilities of all languages.
|
196 |
+
// ref: https://github.com/openai/whisper/blob/main/whisper/decoding.py#L18-L69
|
197 |
+
func (ctx *Context) Whisper_lang_auto_detect(offset_ms, n_threads int) ([]float32, error) {
|
198 |
+
probs := make([]float32, Whisper_lang_max_id()+1)
|
199 |
+
if n := int(C.whisper_lang_auto_detect((*C.struct_whisper_context)(ctx), C.int(offset_ms), C.int(n_threads), (*C.float)(&probs[0]))); n < 0 {
|
200 |
+
return nil, ErrAutoDetectFailed
|
201 |
+
} else {
|
202 |
+
return probs, nil
|
203 |
+
}
|
204 |
+
}
|
205 |
+
|
206 |
+
func (ctx *Context) Whisper_n_len() int {
|
207 |
+
return int(C.whisper_n_len((*C.struct_whisper_context)(ctx)))
|
208 |
+
}
|
209 |
+
|
210 |
+
func (ctx *Context) Whisper_n_vocab() int {
|
211 |
+
return int(C.whisper_n_vocab((*C.struct_whisper_context)(ctx)))
|
212 |
+
}
|
213 |
+
|
214 |
+
func (ctx *Context) Whisper_n_text_ctx() int {
|
215 |
+
return int(C.whisper_n_text_ctx((*C.struct_whisper_context)(ctx)))
|
216 |
+
}
|
217 |
+
|
218 |
+
func (ctx *Context) Whisper_n_audio_ctx() int {
|
219 |
+
return int(C.whisper_n_audio_ctx((*C.struct_whisper_context)(ctx)))
|
220 |
+
}
|
221 |
+
|
222 |
+
func (ctx *Context) Whisper_is_multilingual() int {
|
223 |
+
return int(C.whisper_is_multilingual((*C.struct_whisper_context)(ctx)))
|
224 |
+
}
|
225 |
+
|
226 |
+
// The probabilities for the next token
|
227 |
+
//func (ctx *Whisper_context) Whisper_get_probs() []float32 {
|
228 |
+
// return (*[1 << 30]float32)(unsafe.Pointer(C.whisper_get_probs((*C.struct_whisper_context)(ctx))))[:ctx.Whisper_n_vocab()]
|
229 |
+
//}
|
230 |
+
|
231 |
+
// Token Id -> String. Uses the vocabulary in the provided context
|
232 |
+
func (ctx *Context) Whisper_token_to_str(token Token) string {
|
233 |
+
return C.GoString(C.whisper_token_to_str((*C.struct_whisper_context)(ctx), C.whisper_token(token)))
|
234 |
+
}
|
235 |
+
|
236 |
+
// Special tokens
|
237 |
+
func (ctx *Context) Whisper_token_eot() Token {
|
238 |
+
return Token(C.whisper_token_eot((*C.struct_whisper_context)(ctx)))
|
239 |
+
}
|
240 |
+
|
241 |
+
// Special tokens
|
242 |
+
func (ctx *Context) Whisper_token_sot() Token {
|
243 |
+
return Token(C.whisper_token_sot((*C.struct_whisper_context)(ctx)))
|
244 |
+
}
|
245 |
+
|
246 |
+
// Special tokens
|
247 |
+
func (ctx *Context) Whisper_token_prev() Token {
|
248 |
+
return Token(C.whisper_token_prev((*C.struct_whisper_context)(ctx)))
|
249 |
+
}
|
250 |
+
|
251 |
+
// Special tokens
|
252 |
+
func (ctx *Context) Whisper_token_solm() Token {
|
253 |
+
return Token(C.whisper_token_solm((*C.struct_whisper_context)(ctx)))
|
254 |
+
}
|
255 |
+
|
256 |
+
// Special tokens
|
257 |
+
func (ctx *Context) Whisper_token_not() Token {
|
258 |
+
return Token(C.whisper_token_not((*C.struct_whisper_context)(ctx)))
|
259 |
+
}
|
260 |
+
|
261 |
+
// Special tokens
|
262 |
+
func (ctx *Context) Whisper_token_beg() Token {
|
263 |
+
return Token(C.whisper_token_beg((*C.struct_whisper_context)(ctx)))
|
264 |
+
}
|
265 |
+
|
266 |
+
// Special tokens
|
267 |
+
func (ctx *Context) Whisper_token_lang(lang_id int) Token {
|
268 |
+
return Token(C.whisper_token_lang((*C.struct_whisper_context)(ctx), C.int(lang_id)))
|
269 |
+
}
|
270 |
+
|
271 |
+
// Task tokens
|
272 |
+
func (ctx *Context) Whisper_token_translate() Token {
|
273 |
+
return Token(C.whisper_token_translate((*C.struct_whisper_context)(ctx)))
|
274 |
+
}
|
275 |
+
|
276 |
+
// Task tokens
|
277 |
+
func (ctx *Context) Whisper_token_transcribe() Token {
|
278 |
+
return Token(C.whisper_token_transcribe((*C.struct_whisper_context)(ctx)))
|
279 |
+
}
|
280 |
+
|
281 |
+
// Performance information
|
282 |
+
func (ctx *Context) Whisper_print_timings() {
|
283 |
+
C.whisper_print_timings((*C.struct_whisper_context)(ctx))
|
284 |
+
}
|
285 |
+
|
286 |
+
// Performance information
|
287 |
+
func (ctx *Context) Whisper_reset_timings() {
|
288 |
+
C.whisper_reset_timings((*C.struct_whisper_context)(ctx))
|
289 |
+
}
|
290 |
+
|
291 |
+
// Print system information
|
292 |
+
func Whisper_print_system_info() string {
|
293 |
+
return C.GoString(C.whisper_print_system_info())
|
294 |
+
}
|
295 |
+
|
296 |
+
// Return default parameters for a strategy
|
297 |
+
func (ctx *Context) Whisper_full_default_params(strategy SamplingStrategy) Params {
|
298 |
+
// Get default parameters
|
299 |
+
return Params(C.whisper_full_default_params_cb((*C.struct_whisper_context)(ctx), C.enum_whisper_sampling_strategy(strategy)))
|
300 |
+
}
|
301 |
+
|
302 |
+
// Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
303 |
+
// Uses the specified decoding strategy to obtain the text.
|
304 |
+
func (ctx *Context) Whisper_full(
|
305 |
+
params Params,
|
306 |
+
samples []float32,
|
307 |
+
encoderBeginCallback func() bool,
|
308 |
+
newSegmentCallback func(int),
|
309 |
+
progressCallback func(int),
|
310 |
+
) error {
|
311 |
+
registerEncoderBeginCallback(ctx, encoderBeginCallback)
|
312 |
+
registerNewSegmentCallback(ctx, newSegmentCallback)
|
313 |
+
registerProgressCallback(ctx, progressCallback)
|
314 |
+
defer registerEncoderBeginCallback(ctx, nil)
|
315 |
+
defer registerNewSegmentCallback(ctx, nil)
|
316 |
+
defer registerProgressCallback(ctx, nil)
|
317 |
+
if C.whisper_full((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples))) == 0 {
|
318 |
+
return nil
|
319 |
+
} else {
|
320 |
+
return ErrConversionFailed
|
321 |
+
}
|
322 |
+
}
|
323 |
+
|
324 |
+
// Split the input audio in chunks and process each chunk separately using whisper_full()
|
325 |
+
// It seems this approach can offer some speedup in some cases.
|
326 |
+
// However, the transcription accuracy can be worse at the beginning and end of each chunk.
|
327 |
+
func (ctx *Context) Whisper_full_parallel(params Params, samples []float32, processors int, encoderBeginCallback func() bool, newSegmentCallback func(int)) error {
|
328 |
+
registerEncoderBeginCallback(ctx, encoderBeginCallback)
|
329 |
+
registerNewSegmentCallback(ctx, newSegmentCallback)
|
330 |
+
defer registerEncoderBeginCallback(ctx, nil)
|
331 |
+
defer registerNewSegmentCallback(ctx, nil)
|
332 |
+
|
333 |
+
if C.whisper_full_parallel((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples)), C.int(processors)) == 0 {
|
334 |
+
return nil
|
335 |
+
} else {
|
336 |
+
return ErrConversionFailed
|
337 |
+
}
|
338 |
+
}
|
339 |
+
|
340 |
+
// Return the id of the autodetected language, returns -1 if not found
|
341 |
+
// Added to whisper.cpp in
|
342 |
+
// https://github.com/ggerganov/whisper.cpp/commit/a1c1583cc7cd8b75222857afc936f0638c5683d6
|
343 |
+
//
|
344 |
+
// Examples:
|
345 |
+
//
|
346 |
+
// "de" -> 2
|
347 |
+
// "german" -> 2
|
348 |
+
func (ctx *Context) Whisper_full_lang_id() int {
|
349 |
+
return int(C.whisper_full_lang_id((*C.struct_whisper_context)(ctx)))
|
350 |
+
}
|
351 |
+
|
352 |
+
// Number of generated text segments.
|
353 |
+
// A segment can be a few words, a sentence, or even a paragraph.
|
354 |
+
func (ctx *Context) Whisper_full_n_segments() int {
|
355 |
+
return int(C.whisper_full_n_segments((*C.struct_whisper_context)(ctx)))
|
356 |
+
}
|
357 |
+
|
358 |
+
// Get the start and end time of the specified segment.
|
359 |
+
func (ctx *Context) Whisper_full_get_segment_t0(segment int) int64 {
|
360 |
+
return int64(C.whisper_full_get_segment_t0((*C.struct_whisper_context)(ctx), C.int(segment)))
|
361 |
+
}
|
362 |
+
|
363 |
+
// Get the start and end time of the specified segment.
|
364 |
+
func (ctx *Context) Whisper_full_get_segment_t1(segment int) int64 {
|
365 |
+
return int64(C.whisper_full_get_segment_t1((*C.struct_whisper_context)(ctx), C.int(segment)))
|
366 |
+
}
|
367 |
+
|
368 |
+
// Get the text of the specified segment.
|
369 |
+
func (ctx *Context) Whisper_full_get_segment_text(segment int) string {
|
370 |
+
return C.GoString(C.whisper_full_get_segment_text((*C.struct_whisper_context)(ctx), C.int(segment)))
|
371 |
+
}
|
372 |
+
|
373 |
+
// Get number of tokens in the specified segment.
|
374 |
+
func (ctx *Context) Whisper_full_n_tokens(segment int) int {
|
375 |
+
return int(C.whisper_full_n_tokens((*C.struct_whisper_context)(ctx), C.int(segment)))
|
376 |
+
}
|
377 |
+
|
378 |
+
// Get the token text of the specified token index in the specified segment.
|
379 |
+
func (ctx *Context) Whisper_full_get_token_text(segment int, token int) string {
|
380 |
+
return C.GoString(C.whisper_full_get_token_text((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
|
381 |
+
}
|
382 |
+
|
383 |
+
// Get the token of the specified token index in the specified segment.
|
384 |
+
func (ctx *Context) Whisper_full_get_token_id(segment int, token int) Token {
|
385 |
+
return Token(C.whisper_full_get_token_id((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
|
386 |
+
}
|
387 |
+
|
388 |
+
// Get token data for the specified token in the specified segment.
|
389 |
+
// This contains probabilities, timestamps, etc.
|
390 |
+
func (ctx *Context) Whisper_full_get_token_data(segment int, token int) TokenData {
|
391 |
+
return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
|
392 |
+
}
|
393 |
+
|
394 |
+
// Get the probability of the specified token in the specified segment.
|
395 |
+
func (ctx *Context) Whisper_full_get_token_p(segment int, token int) float32 {
|
396 |
+
return float32(C.whisper_full_get_token_p((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
|
397 |
+
}
|
398 |
+
|
399 |
+
///////////////////////////////////////////////////////////////////////////////
|
400 |
+
// CALLBACKS
|
401 |
+
|
402 |
+
var (
|
403 |
+
cbNewSegment = make(map[unsafe.Pointer]func(int))
|
404 |
+
cbProgress = make(map[unsafe.Pointer]func(int))
|
405 |
+
cbEncoderBegin = make(map[unsafe.Pointer]func() bool)
|
406 |
+
)
|
407 |
+
|
408 |
+
func registerNewSegmentCallback(ctx *Context, fn func(int)) {
|
409 |
+
if fn == nil {
|
410 |
+
delete(cbNewSegment, unsafe.Pointer(ctx))
|
411 |
+
} else {
|
412 |
+
cbNewSegment[unsafe.Pointer(ctx)] = fn
|
413 |
+
}
|
414 |
+
}
|
415 |
+
|
416 |
+
func registerProgressCallback(ctx *Context, fn func(int)) {
|
417 |
+
if fn == nil {
|
418 |
+
delete(cbProgress, unsafe.Pointer(ctx))
|
419 |
+
} else {
|
420 |
+
cbProgress[unsafe.Pointer(ctx)] = fn
|
421 |
+
}
|
422 |
+
}
|
423 |
+
|
424 |
+
func registerEncoderBeginCallback(ctx *Context, fn func() bool) {
|
425 |
+
if fn == nil {
|
426 |
+
delete(cbEncoderBegin, unsafe.Pointer(ctx))
|
427 |
+
} else {
|
428 |
+
cbEncoderBegin[unsafe.Pointer(ctx)] = fn
|
429 |
+
}
|
430 |
+
}
|
431 |
+
|
432 |
+
//export callNewSegment
|
433 |
+
func callNewSegment(user_data unsafe.Pointer, new C.int) {
|
434 |
+
if fn, ok := cbNewSegment[user_data]; ok {
|
435 |
+
fn(int(new))
|
436 |
+
}
|
437 |
+
}
|
438 |
+
|
439 |
+
//export callProgress
|
440 |
+
func callProgress(user_data unsafe.Pointer, progress C.int) {
|
441 |
+
if fn, ok := cbProgress[user_data]; ok {
|
442 |
+
fn(int(progress))
|
443 |
+
}
|
444 |
+
}
|
445 |
+
|
446 |
+
//export callEncoderBegin
|
447 |
+
func callEncoderBegin(user_data unsafe.Pointer) C.bool {
|
448 |
+
if fn, ok := cbEncoderBegin[user_data]; ok {
|
449 |
+
if fn() {
|
450 |
+
return C.bool(true)
|
451 |
+
} else {
|
452 |
+
return C.bool(false)
|
453 |
+
}
|
454 |
+
}
|
455 |
+
return true
|
456 |
+
}
|
457 |
+
|
458 |
+
func (t TokenData) T0() int64 {
|
459 |
+
return int64(t.t0)
|
460 |
+
}
|
461 |
+
|
462 |
+
func (t TokenData) T1() int64 {
|
463 |
+
return int64(t.t1)
|
464 |
+
}
|
465 |
+
|
466 |
+
func (t TokenData) Id() Token {
|
467 |
+
return Token(t.id)
|
468 |
+
}
|
bindings/go/whisper_test.go
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
package whisper_test
|
2 |
+
|
3 |
+
import (
|
4 |
+
"os"
|
5 |
+
"runtime"
|
6 |
+
"testing"
|
7 |
+
"time"
|
8 |
+
|
9 |
+
// Packages
|
10 |
+
whisper "github.com/ggerganov/whisper.cpp/bindings/go"
|
11 |
+
wav "github.com/go-audio/wav"
|
12 |
+
assert "github.com/stretchr/testify/assert"
|
13 |
+
)
|
14 |
+
|
15 |
+
const (
|
16 |
+
ModelPath = "models/ggml-small.en.bin"
|
17 |
+
SamplePath = "samples/jfk.wav"
|
18 |
+
)
|
19 |
+
|
20 |
+
func Test_Whisper_000(t *testing.T) {
|
21 |
+
assert := assert.New(t)
|
22 |
+
if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
|
23 |
+
t.Skip("Skipping test, model not found:", ModelPath)
|
24 |
+
}
|
25 |
+
ctx := whisper.Whisper_init(ModelPath)
|
26 |
+
assert.NotNil(ctx)
|
27 |
+
ctx.Whisper_free()
|
28 |
+
}
|
29 |
+
|
30 |
+
func Test_Whisper_001(t *testing.T) {
|
31 |
+
assert := assert.New(t)
|
32 |
+
if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
|
33 |
+
t.Skip("Skipping test, model not found:", ModelPath)
|
34 |
+
}
|
35 |
+
if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
|
36 |
+
t.Skip("Skipping test, sample not found:", SamplePath)
|
37 |
+
}
|
38 |
+
|
39 |
+
// Open samples
|
40 |
+
fh, err := os.Open(SamplePath)
|
41 |
+
assert.NoError(err)
|
42 |
+
defer fh.Close()
|
43 |
+
|
44 |
+
// Read samples
|
45 |
+
d := wav.NewDecoder(fh)
|
46 |
+
buf, err := d.FullPCMBuffer()
|
47 |
+
assert.NoError(err)
|
48 |
+
|
49 |
+
// Run whisper
|
50 |
+
ctx := whisper.Whisper_init(ModelPath)
|
51 |
+
assert.NotNil(ctx)
|
52 |
+
defer ctx.Whisper_free()
|
53 |
+
params := ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY)
|
54 |
+
data := buf.AsFloat32Buffer().Data
|
55 |
+
err = ctx.Whisper_full(params, data, nil, nil, nil)
|
56 |
+
assert.NoError(err)
|
57 |
+
|
58 |
+
// Print out tokens
|
59 |
+
num_segments := ctx.Whisper_full_n_segments()
|
60 |
+
assert.GreaterOrEqual(num_segments, 1)
|
61 |
+
for i := 0; i < num_segments; i++ {
|
62 |
+
str := ctx.Whisper_full_get_segment_text(i)
|
63 |
+
assert.NotEmpty(str)
|
64 |
+
t0 := time.Duration(ctx.Whisper_full_get_segment_t0(i)) * time.Millisecond
|
65 |
+
t1 := time.Duration(ctx.Whisper_full_get_segment_t1(i)) * time.Millisecond
|
66 |
+
t.Logf("[%6s->%-6s] %q", t0, t1, str)
|
67 |
+
}
|
68 |
+
}
|
69 |
+
|
70 |
+
func Test_Whisper_002(t *testing.T) {
|
71 |
+
assert := assert.New(t)
|
72 |
+
for i := 0; i < whisper.Whisper_lang_max_id(); i++ {
|
73 |
+
str := whisper.Whisper_lang_str(i)
|
74 |
+
assert.NotEmpty(str)
|
75 |
+
t.Log(str)
|
76 |
+
}
|
77 |
+
}
|
78 |
+
|
79 |
+
func Test_Whisper_003(t *testing.T) {
|
80 |
+
threads := runtime.NumCPU()
|
81 |
+
assert := assert.New(t)
|
82 |
+
if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
|
83 |
+
t.Skip("Skipping test, model not found:", ModelPath)
|
84 |
+
}
|
85 |
+
if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
|
86 |
+
t.Skip("Skipping test, sample not found:", SamplePath)
|
87 |
+
}
|
88 |
+
|
89 |
+
// Open samples
|
90 |
+
fh, err := os.Open(SamplePath)
|
91 |
+
assert.NoError(err)
|
92 |
+
defer fh.Close()
|
93 |
+
|
94 |
+
// Read samples
|
95 |
+
d := wav.NewDecoder(fh)
|
96 |
+
buf, err := d.FullPCMBuffer()
|
97 |
+
assert.NoError(err)
|
98 |
+
|
99 |
+
// Make the model
|
100 |
+
ctx := whisper.Whisper_init(ModelPath)
|
101 |
+
assert.NotNil(ctx)
|
102 |
+
defer ctx.Whisper_free()
|
103 |
+
|
104 |
+
// Get MEL
|
105 |
+
assert.NoError(ctx.Whisper_pcm_to_mel(buf.AsFloat32Buffer().Data, threads))
|
106 |
+
|
107 |
+
// Get Languages
|
108 |
+
languages, err := ctx.Whisper_lang_auto_detect(0, threads)
|
109 |
+
assert.NoError(err)
|
110 |
+
for i, p := range languages {
|
111 |
+
t.Logf("%s: %f", whisper.Whisper_lang_str(i), p)
|
112 |
+
}
|
113 |
+
}
|
bindings/java/.idea/uiDesigner.xml
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="Palette2">
|
4 |
+
<group name="Swing">
|
5 |
+
<item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
6 |
+
<default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
|
7 |
+
</item>
|
8 |
+
<item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
9 |
+
<default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
|
10 |
+
</item>
|
11 |
+
<item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
12 |
+
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
|
13 |
+
</item>
|
14 |
+
<item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.svg" removable="false" auto-create-binding="false" can-attach-label="true">
|
15 |
+
<default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
|
16 |
+
</item>
|
17 |
+
<item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
18 |
+
<default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
|
19 |
+
<initial-values>
|
20 |
+
<property name="text" value="Button" />
|
21 |
+
</initial-values>
|
22 |
+
</item>
|
23 |
+
<item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
24 |
+
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
|
25 |
+
<initial-values>
|
26 |
+
<property name="text" value="RadioButton" />
|
27 |
+
</initial-values>
|
28 |
+
</item>
|
29 |
+
<item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
30 |
+
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
|
31 |
+
<initial-values>
|
32 |
+
<property name="text" value="CheckBox" />
|
33 |
+
</initial-values>
|
34 |
+
</item>
|
35 |
+
<item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
36 |
+
<default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
|
37 |
+
<initial-values>
|
38 |
+
<property name="text" value="Label" />
|
39 |
+
</initial-values>
|
40 |
+
</item>
|
41 |
+
<item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
42 |
+
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
43 |
+
<preferred-size width="150" height="-1" />
|
44 |
+
</default-constraints>
|
45 |
+
</item>
|
46 |
+
<item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
47 |
+
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
48 |
+
<preferred-size width="150" height="-1" />
|
49 |
+
</default-constraints>
|
50 |
+
</item>
|
51 |
+
<item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
52 |
+
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
|
53 |
+
<preferred-size width="150" height="-1" />
|
54 |
+
</default-constraints>
|
55 |
+
</item>
|
56 |
+
<item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
57 |
+
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
58 |
+
<preferred-size width="150" height="50" />
|
59 |
+
</default-constraints>
|
60 |
+
</item>
|
61 |
+
<item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
62 |
+
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
63 |
+
<preferred-size width="150" height="50" />
|
64 |
+
</default-constraints>
|
65 |
+
</item>
|
66 |
+
<item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
67 |
+
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
68 |
+
<preferred-size width="150" height="50" />
|
69 |
+
</default-constraints>
|
70 |
+
</item>
|
71 |
+
<item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
72 |
+
<default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
|
73 |
+
</item>
|
74 |
+
<item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
75 |
+
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
76 |
+
<preferred-size width="150" height="50" />
|
77 |
+
</default-constraints>
|
78 |
+
</item>
|
79 |
+
<item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
80 |
+
<default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
|
81 |
+
<preferred-size width="150" height="50" />
|
82 |
+
</default-constraints>
|
83 |
+
</item>
|
84 |
+
<item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
85 |
+
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
|
86 |
+
<preferred-size width="150" height="50" />
|
87 |
+
</default-constraints>
|
88 |
+
</item>
|
89 |
+
<item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
90 |
+
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
|
91 |
+
<preferred-size width="200" height="200" />
|
92 |
+
</default-constraints>
|
93 |
+
</item>
|
94 |
+
<item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
95 |
+
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
|
96 |
+
<preferred-size width="200" height="200" />
|
97 |
+
</default-constraints>
|
98 |
+
</item>
|
99 |
+
<item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.svg" removable="false" auto-create-binding="true" can-attach-label="true">
|
100 |
+
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
|
101 |
+
</item>
|
102 |
+
<item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
103 |
+
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
|
104 |
+
</item>
|
105 |
+
<item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
106 |
+
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
|
107 |
+
</item>
|
108 |
+
<item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
109 |
+
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
|
110 |
+
</item>
|
111 |
+
<item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
112 |
+
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
|
113 |
+
<preferred-size width="-1" height="20" />
|
114 |
+
</default-constraints>
|
115 |
+
</item>
|
116 |
+
<item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
|
117 |
+
<default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
|
118 |
+
</item>
|
119 |
+
<item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
|
120 |
+
<default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
|
121 |
+
</item>
|
122 |
+
</group>
|
123 |
+
</component>
|
124 |
+
</project>
|
bindings/java/README.md
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Java JNI bindings for Whisper
|
2 |
+
|
3 |
+
This package provides Java JNI bindings for whisper.cpp. They have been tested on:
|
4 |
+
|
5 |
+
* <strike>Darwin (OS X) 12.6 on x64_64</strike>
|
6 |
+
* Ubuntu on x86_64
|
7 |
+
* Windows on x86_64
|
8 |
+
|
9 |
+
The "low level" bindings are in `WhisperCppJnaLibrary`. The most simple usage is as follows:
|
10 |
+
|
11 |
+
JNA will attempt to load the `whispercpp` shared library from:
|
12 |
+
|
13 |
+
- jna.library.path
|
14 |
+
- jna.platform.library
|
15 |
+
- ~/Library/Frameworks
|
16 |
+
- /Library/Frameworks
|
17 |
+
- /System/Library/Frameworks
|
18 |
+
- classpath
|
19 |
+
|
20 |
+
```java
|
21 |
+
import io.github.ggerganov.whispercpp.WhisperCpp;
|
22 |
+
|
23 |
+
public class Example {
|
24 |
+
|
25 |
+
public static void main(String[] args) {
|
26 |
+
WhisperCpp whisper = new WhisperCpp();
|
27 |
+
// By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin"
|
28 |
+
// or you can provide the absolute path to the model file.
|
29 |
+
long context = whisper.initContext("base.en");
|
30 |
+
try {
|
31 |
+
var whisperParams = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
|
32 |
+
// custom configuration if required
|
33 |
+
whisperParams.temperature_inc = 0f;
|
34 |
+
|
35 |
+
var samples = readAudio(); // divide each value by 32767.0f
|
36 |
+
whisper.fullTranscribe(whisperParams, samples);
|
37 |
+
|
38 |
+
int segmentCount = whisper.getTextSegmentCount(context);
|
39 |
+
for (int i = 0; i < segmentCount; i++) {
|
40 |
+
String text = whisper.getTextSegment(context, i);
|
41 |
+
System.out.println(segment.getText());
|
42 |
+
}
|
43 |
+
} finally {
|
44 |
+
whisper.freeContext(context);
|
45 |
+
}
|
46 |
+
}
|
47 |
+
}
|
48 |
+
```
|
49 |
+
|
50 |
+
## Building & Testing
|
51 |
+
|
52 |
+
In order to build, you need to have the JDK 8 or higher installed. Run the tests with:
|
53 |
+
|
54 |
+
```bash
|
55 |
+
git clone https://github.com/ggerganov/whisper.cpp.git
|
56 |
+
cd whisper.cpp/bindings/java
|
57 |
+
|
58 |
+
./gradlew build
|
59 |
+
```
|
60 |
+
|
61 |
+
You need to have the `whisper` library in your [JNA library path](https://java-native-access.github.io/jna/4.2.1/com/sun/jna/NativeLibrary.html). On Windows the dll is included in the jar and you can update it:
|
62 |
+
|
63 |
+
```bash
|
64 |
+
copy /y ..\..\build\bin\Release\whisper.dll build\generated\resources\main\win32-x86-64\whisper.dll
|
65 |
+
```
|
66 |
+
|
67 |
+
|
68 |
+
## License
|
69 |
+
|
70 |
+
The license for the Go bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
|
71 |
+
|
bindings/java/build.gradle
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
plugins {
|
2 |
+
id 'java'
|
3 |
+
id 'java-library'
|
4 |
+
id 'maven-publish'
|
5 |
+
id 'signing'
|
6 |
+
}
|
7 |
+
|
8 |
+
archivesBaseName = 'whispercpp'
|
9 |
+
group = 'io.github.ggerganov'
|
10 |
+
version = '1.4.0'
|
11 |
+
|
12 |
+
|
13 |
+
sourceCompatibility = 1.8
|
14 |
+
targetCompatibility = 1.8
|
15 |
+
|
16 |
+
sourceSets {
|
17 |
+
main {
|
18 |
+
resources {
|
19 |
+
srcDirs = ['src/main/resources', 'build/generated/resources/main']
|
20 |
+
}
|
21 |
+
}
|
22 |
+
test {
|
23 |
+
runtimeClasspath += files('build/generated/resources/main')
|
24 |
+
}
|
25 |
+
}
|
26 |
+
|
27 |
+
tasks.register('copyLibwhisperDynlib', Copy) {
|
28 |
+
from '../../build'
|
29 |
+
include 'libwhisper.dynlib'
|
30 |
+
into 'build/generated/resources/main/darwin'
|
31 |
+
}
|
32 |
+
|
33 |
+
tasks.register('copyLibwhisperSo', Copy) {
|
34 |
+
from '../../build'
|
35 |
+
include 'libwhisper.so'
|
36 |
+
into 'build/generated/resources/main/linux-x86-64'
|
37 |
+
}
|
38 |
+
|
39 |
+
tasks.register('copyWhisperDll', Copy) {
|
40 |
+
from '../../build/Release'
|
41 |
+
include 'whisper.dll'
|
42 |
+
into 'build/generated/resources/main/windows-x86-64'
|
43 |
+
}
|
44 |
+
|
45 |
+
tasks.register('copyLibs') {
|
46 |
+
dependsOn copyLibwhisperDynlib, copyLibwhisperSo, copyWhisperDll
|
47 |
+
}
|
48 |
+
|
49 |
+
test {
|
50 |
+
systemProperty 'jna.library.path', project.file('build/generated/resources/main').absolutePath
|
51 |
+
}
|
52 |
+
|
53 |
+
java {
|
54 |
+
withSourcesJar()
|
55 |
+
withJavadocJar()
|
56 |
+
}
|
57 |
+
|
58 |
+
jar {
|
59 |
+
exclude '**/whisper_java.exp', '**/whisper_java.lib'
|
60 |
+
}
|
61 |
+
|
62 |
+
javadoc {
|
63 |
+
options.addStringOption('Xdoclint:none', '-quiet')
|
64 |
+
}
|
65 |
+
|
66 |
+
tasks.withType(Test) {
|
67 |
+
useJUnitPlatform()
|
68 |
+
}
|
69 |
+
|
70 |
+
dependencies {
|
71 |
+
implementation "net.java.dev.jna:jna:5.13.0"
|
72 |
+
testImplementation "org.junit.jupiter:junit-jupiter:5.9.2"
|
73 |
+
testImplementation "org.assertj:assertj-core:3.24.2"
|
74 |
+
}
|
75 |
+
|
76 |
+
repositories {
|
77 |
+
mavenCentral()
|
78 |
+
}
|
79 |
+
|
80 |
+
publishing {
|
81 |
+
publications {
|
82 |
+
mavenJava(MavenPublication) {
|
83 |
+
artifactId = 'whispercpp'
|
84 |
+
from components.java
|
85 |
+
pom {
|
86 |
+
name = 'whispercpp'
|
87 |
+
description = "Java JNA bindings for OpenAI's Whisper model, implemented in C/C++"
|
88 |
+
url = 'https://github.com/ggerganov/whisper.cpp'
|
89 |
+
licenses {
|
90 |
+
license {
|
91 |
+
name = 'MIT licence'
|
92 |
+
url = 'https://raw.githubusercontent.com/ggerganov/whisper.cpp/master/LICENSE'
|
93 |
+
}
|
94 |
+
}
|
95 |
+
developers {
|
96 |
+
developer {
|
97 |
+
id = 'ggerganov'
|
98 |
+
name = 'Georgi Gerganov'
|
99 |
+
email = 'ggerganov@gmail.com'
|
100 |
+
}
|
101 |
+
developer {
|
102 |
+
id = 'nalbion'
|
103 |
+
name = 'Nicholas Albion'
|
104 |
+
email = 'nalbion@yahoo.com'
|
105 |
+
}
|
106 |
+
}
|
107 |
+
scm {
|
108 |
+
connection = 'scm:git:git://github.com/ggerganov/whisper.cpp.git'
|
109 |
+
url = 'https://github.com/ggerganov/whisper.cpp'
|
110 |
+
}
|
111 |
+
}
|
112 |
+
}
|
113 |
+
}
|
114 |
+
|
115 |
+
repositories {
|
116 |
+
maven {
|
117 |
+
def releasesRepoUrl = 'https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/'
|
118 |
+
def snapshotsRepoUrl = 'https://s01.oss.sonatype.org/content/repositories/snapshots/'
|
119 |
+
url = version.endsWith('-SNAPSHOT') ? snapshotsRepoUrl : releasesRepoUrl
|
120 |
+
credentials {
|
121 |
+
username = System.getenv("MAVEN_USERNAME")
|
122 |
+
password = System.getenv("MAVEN_PASSWORD")
|
123 |
+
}
|
124 |
+
}
|
125 |
+
}
|
126 |
+
}
|
127 |
+
|
128 |
+
signing {
|
129 |
+
def signingKey = System.getenv("PGP_SECRET")
|
130 |
+
def signingPassword = System.getenv("PGP_PASSPHRASE")
|
131 |
+
useInMemoryPgpKeys(signingKey, signingPassword)
|
132 |
+
sign publishing.publications.mavenJava
|
133 |
+
}
|
bindings/java/gradle.properties
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
org.gradle.jvmargs=-Xms256m -Xmx1024m
|
2 |
+
system.include.dir=/usr/include
|
3 |
+
#system.local.include.dir=../../include
|
4 |
+
system.local.include.dir=./build/generated/sources/headers/java/main
|
5 |
+
jni.include.dir=/usr/lib/jvm/java-8-openjdk-amd64/include/
|
6 |
+
jni.lib.dir=/usr/lib/jvm/java-8-openjdk-amd64/lib/
|
bindings/java/gradle/wrapper/gradle-wrapper.jar
ADDED
Binary file (61.6 kB). View file
|
|
bindings/java/gradle/wrapper/gradle-wrapper.properties
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
distributionBase=GRADLE_USER_HOME
|
2 |
+
distributionPath=wrapper/dists
|
3 |
+
distributionUrl=https\://services.gradle.org/distributions/gradle-8.1-bin.zip
|
4 |
+
networkTimeout=10000
|
5 |
+
zipStoreBase=GRADLE_USER_HOME
|
6 |
+
zipStorePath=wrapper/dists
|
bindings/java/gradlew
ADDED
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
#
|
4 |
+
# Copyright © 2015-2021 the original authors.
|
5 |
+
#
|
6 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
7 |
+
# you may not use this file except in compliance with the License.
|
8 |
+
# You may obtain a copy of the License at
|
9 |
+
#
|
10 |
+
# https://www.apache.org/licenses/LICENSE-2.0
|
11 |
+
#
|
12 |
+
# Unless required by applicable law or agreed to in writing, software
|
13 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
14 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15 |
+
# See the License for the specific language governing permissions and
|
16 |
+
# limitations under the License.
|
17 |
+
#
|
18 |
+
|
19 |
+
##############################################################################
|
20 |
+
#
|
21 |
+
# Gradle start up script for POSIX generated by Gradle.
|
22 |
+
#
|
23 |
+
# Important for running:
|
24 |
+
#
|
25 |
+
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
|
26 |
+
# noncompliant, but you have some other compliant shell such as ksh or
|
27 |
+
# bash, then to run this script, type that shell name before the whole
|
28 |
+
# command line, like:
|
29 |
+
#
|
30 |
+
# ksh Gradle
|
31 |
+
#
|
32 |
+
# Busybox and similar reduced shells will NOT work, because this script
|
33 |
+
# requires all of these POSIX shell features:
|
34 |
+
# * functions;
|
35 |
+
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
|
36 |
+
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
|
37 |
+
# * compound commands having a testable exit status, especially «case»;
|
38 |
+
# * various built-in commands including «command», «set», and «ulimit».
|
39 |
+
#
|
40 |
+
# Important for patching:
|
41 |
+
#
|
42 |
+
# (2) This script targets any POSIX shell, so it avoids extensions provided
|
43 |
+
# by Bash, Ksh, etc; in particular arrays are avoided.
|
44 |
+
#
|
45 |
+
# The "traditional" practice of packing multiple parameters into a
|
46 |
+
# space-separated string is a well documented source of bugs and security
|
47 |
+
# problems, so this is (mostly) avoided, by progressively accumulating
|
48 |
+
# options in "$@", and eventually passing that to Java.
|
49 |
+
#
|
50 |
+
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
|
51 |
+
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
|
52 |
+
# see the in-line comments for details.
|
53 |
+
#
|
54 |
+
# There are tweaks for specific operating systems such as AIX, CygWin,
|
55 |
+
# Darwin, MinGW, and NonStop.
|
56 |
+
#
|
57 |
+
# (3) This script is generated from the Groovy template
|
58 |
+
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
|
59 |
+
# within the Gradle project.
|
60 |
+
#
|
61 |
+
# You can find Gradle at https://github.com/gradle/gradle/.
|
62 |
+
#
|
63 |
+
##############################################################################
|
64 |
+
|
65 |
+
# Attempt to set APP_HOME
|
66 |
+
|
67 |
+
# Resolve links: $0 may be a link
|
68 |
+
app_path=$0
|
69 |
+
|
70 |
+
# Need this for daisy-chained symlinks.
|
71 |
+
while
|
72 |
+
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
|
73 |
+
[ -h "$app_path" ]
|
74 |
+
do
|
75 |
+
ls=$( ls -ld "$app_path" )
|
76 |
+
link=${ls#*' -> '}
|
77 |
+
case $link in #(
|
78 |
+
/*) app_path=$link ;; #(
|
79 |
+
*) app_path=$APP_HOME$link ;;
|
80 |
+
esac
|
81 |
+
done
|
82 |
+
|
83 |
+
# This is normally unused
|
84 |
+
# shellcheck disable=SC2034
|
85 |
+
APP_BASE_NAME=${0##*/}
|
86 |
+
APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
|
87 |
+
|
88 |
+
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
89 |
+
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
90 |
+
|
91 |
+
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
92 |
+
MAX_FD=maximum
|
93 |
+
|
94 |
+
warn () {
|
95 |
+
echo "$*"
|
96 |
+
} >&2
|
97 |
+
|
98 |
+
die () {
|
99 |
+
echo
|
100 |
+
echo "$*"
|
101 |
+
echo
|
102 |
+
exit 1
|
103 |
+
} >&2
|
104 |
+
|
105 |
+
# OS specific support (must be 'true' or 'false').
|
106 |
+
cygwin=false
|
107 |
+
msys=false
|
108 |
+
darwin=false
|
109 |
+
nonstop=false
|
110 |
+
case "$( uname )" in #(
|
111 |
+
CYGWIN* ) cygwin=true ;; #(
|
112 |
+
Darwin* ) darwin=true ;; #(
|
113 |
+
MSYS* | MINGW* ) msys=true ;; #(
|
114 |
+
NONSTOP* ) nonstop=true ;;
|
115 |
+
esac
|
116 |
+
|
117 |
+
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
118 |
+
|
119 |
+
|
120 |
+
# Determine the Java command to use to start the JVM.
|
121 |
+
if [ -n "$JAVA_HOME" ] ; then
|
122 |
+
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
123 |
+
# IBM's JDK on AIX uses strange locations for the executables
|
124 |
+
JAVACMD=$JAVA_HOME/jre/sh/java
|
125 |
+
else
|
126 |
+
JAVACMD=$JAVA_HOME/bin/java
|
127 |
+
fi
|
128 |
+
if [ ! -x "$JAVACMD" ] ; then
|
129 |
+
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
130 |
+
|
131 |
+
Please set the JAVA_HOME variable in your environment to match the
|
132 |
+
location of your Java installation."
|
133 |
+
fi
|
134 |
+
else
|
135 |
+
JAVACMD=java
|
136 |
+
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
137 |
+
|
138 |
+
Please set the JAVA_HOME variable in your environment to match the
|
139 |
+
location of your Java installation."
|
140 |
+
fi
|
141 |
+
|
142 |
+
# Increase the maximum file descriptors if we can.
|
143 |
+
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
144 |
+
case $MAX_FD in #(
|
145 |
+
max*)
|
146 |
+
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
|
147 |
+
# shellcheck disable=SC3045
|
148 |
+
MAX_FD=$( ulimit -H -n ) ||
|
149 |
+
warn "Could not query maximum file descriptor limit"
|
150 |
+
esac
|
151 |
+
case $MAX_FD in #(
|
152 |
+
'' | soft) :;; #(
|
153 |
+
*)
|
154 |
+
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
|
155 |
+
# shellcheck disable=SC3045
|
156 |
+
ulimit -n "$MAX_FD" ||
|
157 |
+
warn "Could not set maximum file descriptor limit to $MAX_FD"
|
158 |
+
esac
|
159 |
+
fi
|
160 |
+
|
161 |
+
# Collect all arguments for the java command, stacking in reverse order:
|
162 |
+
# * args from the command line
|
163 |
+
# * the main class name
|
164 |
+
# * -classpath
|
165 |
+
# * -D...appname settings
|
166 |
+
# * --module-path (only if needed)
|
167 |
+
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
|
168 |
+
|
169 |
+
# For Cygwin or MSYS, switch paths to Windows format before running java
|
170 |
+
if "$cygwin" || "$msys" ; then
|
171 |
+
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
|
172 |
+
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
|
173 |
+
|
174 |
+
JAVACMD=$( cygpath --unix "$JAVACMD" )
|
175 |
+
|
176 |
+
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
177 |
+
for arg do
|
178 |
+
if
|
179 |
+
case $arg in #(
|
180 |
+
-*) false ;; # don't mess with options #(
|
181 |
+
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
|
182 |
+
[ -e "$t" ] ;; #(
|
183 |
+
*) false ;;
|
184 |
+
esac
|
185 |
+
then
|
186 |
+
arg=$( cygpath --path --ignore --mixed "$arg" )
|
187 |
+
fi
|
188 |
+
# Roll the args list around exactly as many times as the number of
|
189 |
+
# args, so each arg winds up back in the position where it started, but
|
190 |
+
# possibly modified.
|
191 |
+
#
|
192 |
+
# NB: a `for` loop captures its iteration list before it begins, so
|
193 |
+
# changing the positional parameters here affects neither the number of
|
194 |
+
# iterations, nor the values presented in `arg`.
|
195 |
+
shift # remove old arg
|
196 |
+
set -- "$@" "$arg" # push replacement arg
|
197 |
+
done
|
198 |
+
fi
|
199 |
+
|
200 |
+
# Collect all arguments for the java command;
|
201 |
+
# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
|
202 |
+
# shell script including quotes and variable substitutions, so put them in
|
203 |
+
# double quotes to make sure that they get re-expanded; and
|
204 |
+
# * put everything else in single quotes, so that it's not re-expanded.
|
205 |
+
|
206 |
+
set -- \
|
207 |
+
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
208 |
+
-classpath "$CLASSPATH" \
|
209 |
+
org.gradle.wrapper.GradleWrapperMain \
|
210 |
+
"$@"
|
211 |
+
|
212 |
+
# Stop when "xargs" is not available.
|
213 |
+
if ! command -v xargs >/dev/null 2>&1
|
214 |
+
then
|
215 |
+
die "xargs is not available"
|
216 |
+
fi
|
217 |
+
|
218 |
+
# Use "xargs" to parse quoted args.
|
219 |
+
#
|
220 |
+
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
|
221 |
+
#
|
222 |
+
# In Bash we could simply go:
|
223 |
+
#
|
224 |
+
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
|
225 |
+
# set -- "${ARGS[@]}" "$@"
|
226 |
+
#
|
227 |
+
# but POSIX shell has neither arrays nor command substitution, so instead we
|
228 |
+
# post-process each arg (as a line of input to sed) to backslash-escape any
|
229 |
+
# character that might be a shell metacharacter, then use eval to reverse
|
230 |
+
# that process (while maintaining the separation between arguments), and wrap
|
231 |
+
# the whole thing up as a single "set" statement.
|
232 |
+
#
|
233 |
+
# This will of course break if any of these variables contains a newline or
|
234 |
+
# an unmatched quote.
|
235 |
+
#
|
236 |
+
|
237 |
+
eval "set -- $(
|
238 |
+
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
|
239 |
+
xargs -n1 |
|
240 |
+
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
|
241 |
+
tr '\n' ' '
|
242 |
+
)" '"$@"'
|
243 |
+
|
244 |
+
exec "$JAVACMD" "$@"
|
bindings/java/gradlew.bat
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@rem
|
2 |
+
@rem Copyright 2015 the original author or authors.
|
3 |
+
@rem
|
4 |
+
@rem Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
@rem you may not use this file except in compliance with the License.
|
6 |
+
@rem You may obtain a copy of the License at
|
7 |
+
@rem
|
8 |
+
@rem https://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
@rem
|
10 |
+
@rem Unless required by applicable law or agreed to in writing, software
|
11 |
+
@rem distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
@rem See the License for the specific language governing permissions and
|
14 |
+
@rem limitations under the License.
|
15 |
+
@rem
|
16 |
+
|
17 |
+
@if "%DEBUG%"=="" @echo off
|
18 |
+
@rem ##########################################################################
|
19 |
+
@rem
|
20 |
+
@rem Gradle startup script for Windows
|
21 |
+
@rem
|
22 |
+
@rem ##########################################################################
|
23 |
+
|
24 |
+
@rem Set local scope for the variables with windows NT shell
|
25 |
+
if "%OS%"=="Windows_NT" setlocal
|
26 |
+
|
27 |
+
set DIRNAME=%~dp0
|
28 |
+
if "%DIRNAME%"=="" set DIRNAME=.
|
29 |
+
@rem This is normally unused
|
30 |
+
set APP_BASE_NAME=%~n0
|
31 |
+
set APP_HOME=%DIRNAME%
|
32 |
+
|
33 |
+
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
|
34 |
+
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
|
35 |
+
|
36 |
+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
37 |
+
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
|
38 |
+
|
39 |
+
@rem Find java.exe
|
40 |
+
if defined JAVA_HOME goto findJavaFromJavaHome
|
41 |
+
|
42 |
+
set JAVA_EXE=java.exe
|
43 |
+
%JAVA_EXE% -version >NUL 2>&1
|
44 |
+
if %ERRORLEVEL% equ 0 goto execute
|
45 |
+
|
46 |
+
echo.
|
47 |
+
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
48 |
+
echo.
|
49 |
+
echo Please set the JAVA_HOME variable in your environment to match the
|
50 |
+
echo location of your Java installation.
|
51 |
+
|
52 |
+
goto fail
|
53 |
+
|
54 |
+
:findJavaFromJavaHome
|
55 |
+
set JAVA_HOME=%JAVA_HOME:"=%
|
56 |
+
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
57 |
+
|
58 |
+
if exist "%JAVA_EXE%" goto execute
|
59 |
+
|
60 |
+
echo.
|
61 |
+
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
62 |
+
echo.
|
63 |
+
echo Please set the JAVA_HOME variable in your environment to match the
|
64 |
+
echo location of your Java installation.
|
65 |
+
|
66 |
+
goto fail
|
67 |
+
|
68 |
+
:execute
|
69 |
+
@rem Setup the command line
|
70 |
+
|
71 |
+
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
72 |
+
|
73 |
+
|
74 |
+
@rem Execute Gradle
|
75 |
+
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
|
76 |
+
|
77 |
+
:end
|
78 |
+
@rem End local scope for the variables with windows NT shell
|
79 |
+
if %ERRORLEVEL% equ 0 goto mainEnd
|
80 |
+
|
81 |
+
:fail
|
82 |
+
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
83 |
+
rem the _cmd.exe /c_ return code!
|
84 |
+
set EXIT_CODE=%ERRORLEVEL%
|
85 |
+
if %EXIT_CODE% equ 0 set EXIT_CODE=1
|
86 |
+
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
|
87 |
+
exit /b %EXIT_CODE%
|
88 |
+
|
89 |
+
:mainEnd
|
90 |
+
if "%OS%"=="Windows_NT" endlocal
|
91 |
+
|
92 |
+
:omega
|
bindings/java/settings.gradle
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
rootProject.name = "whispercpp"
|