benjamin-paine commited on
Commit
206075b
1 Parent(s): 91ade26

Upload 2 files

Browse files
Files changed (2) hide show
  1. index.js +263 -0
  2. style.css +152 -0
index.js ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Play audio samples using the Web Audio API.
3
+ * @param {Float32Array} audioSamples - The audio samples to play.
4
+ * @param {number} sampleRate - The sample rate of the audio samples.
5
+ */
6
+ function playAudioSamples(audioSamples, sampleRate = 16000) {
7
+ // Create an AudioContext
8
+ const audioContext = new (window.AudioContext || window.webkitAudioContext)();
9
+
10
+ // Create an AudioBuffer
11
+ const audioBuffer = audioContext.createBuffer(
12
+ 1, // number of channels
13
+ audioSamples.length, // length of the buffer in samples
14
+ sampleRate // sample rate (samples per second)
15
+ );
16
+
17
+ // Fill the AudioBuffer with the Float32Array of audio samples
18
+ audioBuffer.getChannelData(0).set(audioSamples);
19
+
20
+ // Create a BufferSource node
21
+ const source = audioContext.createBufferSource();
22
+ source.buffer = audioBuffer;
23
+
24
+ // Connect the source to the AudioContext's destination (the speakers)
25
+ source.connect(audioContext.destination);
26
+
27
+ // Start playback
28
+ source.start();
29
+ };
30
+
31
+ /**
32
+ * Turns floating-point audio samples to a Wave blob.
33
+ * @param {Float32Array} audioSamples - The audio samples to play.
34
+ * @param {number} sampleRate - The sample rate of the audio samples.
35
+ * @param {number} numChannels - The number of channels in the audio. Defaults to 1 (mono).
36
+ * @return {Blob} A blob of type `audio/wav`
37
+ */
38
+ function samplesToBlob(audioSamples, sampleRate = 16000, numChannels = 1) {
39
+ // Helper to write a string to the DataView
40
+ const writeString = (view, offset, string) => {
41
+ for (let i = 0; i < string.length; i++) {
42
+ view.setUint8(offset + i, string.charCodeAt(i));
43
+ }
44
+ };
45
+
46
+ // Helper to convert Float32Array to Int16Array (16-bit PCM)
47
+ const floatTo16BitPCM = (output, offset, input) => {
48
+ for (let i = 0; i < input.length; i++, offset += 2) {
49
+ let s = Math.max(-1, Math.min(1, input[i])); // Clamping to [-1, 1]
50
+ output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); // Convert to 16-bit PCM
51
+ }
52
+ };
53
+
54
+ const byteRate = sampleRate * numChannels * 2; // 16-bit PCM = 2 bytes per sample
55
+
56
+ // Calculate sizes
57
+ const blockAlign = numChannels * 2; // 2 bytes per sample for 16-bit audio
58
+ const wavHeaderSize = 44;
59
+ const dataLength = audioSamples.length * numChannels * 2; // 16-bit PCM data length
60
+ const buffer = new ArrayBuffer(wavHeaderSize + dataLength);
61
+ const view = new DataView(buffer);
62
+
63
+ // Write WAV file headers
64
+ writeString(view, 0, 'RIFF'); // ChunkID
65
+ view.setUint32(4, 36 + dataLength, true); // ChunkSize
66
+ writeString(view, 8, 'WAVE'); // Format
67
+ writeString(view, 12, 'fmt '); // Subchunk1ID
68
+ view.setUint32(16, 16, true); // Subchunk1Size (PCM = 16)
69
+ view.setUint16(20, 1, true); // AudioFormat (PCM = 1)
70
+ view.setUint16(22, numChannels, true); // NumChannels
71
+ view.setUint32(24, sampleRate, true); // SampleRate
72
+ view.setUint32(28, byteRate, true); // ByteRate
73
+ view.setUint16(32, blockAlign, true); // BlockAlign
74
+ view.setUint16(34, 16, true); // BitsPerSample (16-bit PCM)
75
+ writeString(view, 36, 'data'); // Subchunk2ID
76
+ view.setUint32(40, dataLength, true); // Subchunk2Size
77
+
78
+ // Convert the Float32Array audio samples to 16-bit PCM and write them to the DataView
79
+ floatTo16BitPCM(view, wavHeaderSize, audioSamples);
80
+
81
+ // Create and return the Blob
82
+ return new Blob([view], { type: 'audio/wav' });
83
+ }
84
+
85
+ /**
86
+ * Renders a blob to an audio element with controls.
87
+ * Use `appendChild(result)` to add to the document or a node.
88
+ * @param {Blob} audioBlob - A blob with a valid audio type.
89
+ * @see samplesToBlob
90
+ */
91
+ function blobToAudio(audioBlob) {
92
+ const url = URL.createObjectURL(audioBlob);
93
+ const audio = document.createElement("audio");
94
+ audio.controls = true;
95
+ audio.src = url;
96
+ return audio;
97
+ }
98
+
99
+ /** Configuration */
100
+ const colors = {
101
+ "buddy": [0,119,187],
102
+ "hey buddy": [0,153,136],
103
+ "hi buddy": [51,227,138],
104
+ "sup buddy": [238,119,51],
105
+ "yo buddy": [204,51,217],
106
+ "okay buddy": [238,51,119],
107
+ "hello buddy": [184,62,104],
108
+ "speech": [22,200,206],
109
+ "frame budget": [25,255,25]
110
+ };
111
+ const rootUrl = "https://huggingface.co/benjamin-paine/hey-buddy/resolve/main";
112
+ const wakeWords = ["buddy", "hey buddy", "hi buddy", "sup buddy", "yo buddy", "okay buddy", "hello buddy"];
113
+ const canvasSize = { width: 640, height: 100 };
114
+ const graphLineWidth = 1;
115
+ const options = {
116
+ debug: true,
117
+ modelPath: wakeWords.map((word) => `${rootUrl}/models/${word.replace(' ', '-')}.onnx`),
118
+ vadModelPath: `${rootUrl}/pretrained/silero-vad.onnx`,
119
+ spectrogramModelPath: `${rootUrl}/pretrained/mel-spectrogram.onnx`,
120
+ embeddingModelPath: `${rootUrl}/pretrained/speech-embedding.onnx`,
121
+ };
122
+
123
+ /** Main */
124
+ document.addEventListener("DOMContentLoaded", () => {
125
+ /** DOM elements */
126
+ const graphsContainer = document.getElementById("graphs");
127
+ const audioContainer = document.getElementById("audio");
128
+
129
+ /** Memory for drawing */
130
+ const graphs = {};
131
+ const history = {};
132
+ const current = {};
133
+ const active = {};
134
+
135
+ /** Instantiate */
136
+ const heyBuddy = new HeyBuddy(options);
137
+
138
+ /** Add callbacks */
139
+
140
+ // When processed, update state for next draw
141
+ heyBuddy.onProcessed((result) => {
142
+ current["frame budget"] = heyBuddy.frameTimeEma;
143
+ current["speech"] = result.speech.probability || 0.0;
144
+ active["speech"] = result.speech.active;
145
+ for (let wakeWord in result.wakeWords) {
146
+ current[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].probability || 0.0;
147
+ active[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].active;
148
+ }
149
+ if (result.recording) {
150
+ audioContainer.innerHTML = "Recording&hellip;";
151
+ }
152
+ });
153
+
154
+ // When recording is complete, replace the audio element
155
+ heyBuddy.onRecording((audioSamples) => {
156
+ const audioBlob = samplesToBlob(audioSamples);
157
+ const audioElement = blobToAudio(audioBlob);
158
+ audioContainer.innerHTML = "";
159
+ audioContainer.appendChild(audioElement);
160
+ });
161
+
162
+ /** Add graphs */
163
+ for (let graphName of ["wake words", "speech", "frame budget"]) {
164
+ // Create containers for the graph and its label
165
+ const graphContainer = document.createElement("div");
166
+ const graphLabel = document.createElement("label");
167
+ graphLabel.textContent = graphName;
168
+
169
+ // Create a canvas for the graph
170
+ const graphCanvas = document.createElement("canvas");
171
+ graphCanvas.className = "graph";
172
+ graphCanvas.width = canvasSize.width;
173
+ graphCanvas.height = canvasSize.height;
174
+ graphs[graphName] = graphCanvas;
175
+
176
+ // Add the canvas to the container and the container to the document
177
+ graphContainer.appendChild(graphCanvas);
178
+ graphContainer.appendChild(graphLabel);
179
+ graphsContainer.appendChild(graphContainer);
180
+
181
+ // If this is the wake-word graph, also add legend
182
+ if (graphName === "wake words") {
183
+ const graphLegend = document.createElement("div");
184
+ graphLegend.className = "legend";
185
+ for (let wakeWord of wakeWords) {
186
+ const legendItem = document.createElement("div");
187
+ const [r,g,b] = colors[wakeWord];
188
+ legendItem.style.color = `rgb(${r},${g},${b})`;
189
+ legendItem.textContent = wakeWord;
190
+ graphLegend.appendChild(legendItem);
191
+ }
192
+ graphLabel.appendChild(graphLegend);
193
+ }
194
+ }
195
+
196
+ /** Define draw loop */
197
+ const draw = () => {
198
+ // Draw speech and model graphs
199
+ for (let graphName in graphs) {
200
+ const isWakeWords = graphName === "wake words";
201
+ const isFrameBudget = graphName === "frame budget";
202
+ const subGraphs = isWakeWords ? wakeWords : [graphName];
203
+
204
+ let isFirst = true;
205
+ for (let name of subGraphs) {
206
+ // Update history
207
+ history[name] = history[name] || [];
208
+ if (isFrameBudget) {
209
+ history[name].push((current[name] || 0.0) / 120.0); // 120ms budget
210
+ } else {
211
+ history[name].push(current[name] || 0.0);
212
+ }
213
+
214
+ // Trim history
215
+ if (history[name].length > canvasSize.width) {
216
+ history[name] = history[name].slice(history[name].length - canvasSize.width);
217
+ }
218
+
219
+ // Draw graph
220
+ const canvas = graphs[graphName];
221
+ const ctx = canvas.getContext("2d");
222
+ const [r,g,b] = colors[name];
223
+ const opacity = isFrameBudget || active[name] ? 1.0 : 0.5;
224
+
225
+ if (isFirst) {
226
+ // Clear canvas on first draw
227
+ ctx.clearRect(0, 0, canvas.width, canvas.height);
228
+ isFirst = false;
229
+ }
230
+
231
+ ctx.strokeStyle = `rgba(${r},${g},${b},${opacity})`;
232
+ ctx.fillStyle = `rgba(${r},${g},${b},${opacity/2})`;
233
+ ctx.lineWidth = graphLineWidth;
234
+
235
+ // Draw from left to right (the frame shifts right to left)
236
+ ctx.beginPath();
237
+ let lastX;
238
+ for (let i = 0; i < history[name].length; i++) {
239
+ const x = i;
240
+ const y = canvas.height - history[name][i] * canvas.height;
241
+ if (i === 0) {
242
+ ctx.moveTo(1, y);
243
+ } else {
244
+ ctx.lineTo(x, y);
245
+ }
246
+ lastX = x;
247
+ }
248
+ // extend downwards to make a polygon
249
+ ctx.lineTo(lastX, canvas.height);
250
+ ctx.lineTo(0, canvas.height);
251
+ ctx.closePath();
252
+ ctx.fill();
253
+ ctx.stroke();
254
+ }
255
+ }
256
+
257
+ // Request next frame
258
+ requestAnimationFrame(draw);
259
+ };
260
+
261
+ /** Start the loop */
262
+ requestAnimationFrame(draw);
263
+ });
style.css ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ display: flex;
3
+ flex-flow: column nowrap;
4
+ justify-content: center;
5
+ align-items: center;
6
+ height: 100vh;
7
+ width: 100vw;
8
+ padding: 0;
9
+ margin: 0;
10
+ font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
11
+ background-color: rgb(11,15,25);
12
+ color: white
13
+ }
14
+
15
+ h1 {
16
+ font-size: 16px;
17
+ margin-top: 0;
18
+ }
19
+
20
+ p {
21
+ font-size: 15px;
22
+ margin-bottom: 10px;
23
+ margin-top: 5px;
24
+ }
25
+
26
+ strong, em {
27
+ color: #16c8ce;
28
+ }
29
+
30
+ .card {
31
+ max-width: 640px;
32
+ margin: 0 auto;
33
+ padding: 16px;
34
+ border: 1px solid rgb(107, 114, 128);
35
+ border-radius: 16px;
36
+ background-color: rgb(16, 22, 35);
37
+ }
38
+
39
+ .card p:last-child {
40
+ margin-bottom: 0;
41
+ }
42
+
43
+ .card img {
44
+ width: 100%;
45
+ max-width: 420px;
46
+ margin: 0 auto;
47
+ }
48
+
49
+ #logo, #links {
50
+ display: flex;
51
+ flex-flow: row wrap;
52
+ justify-content: center;
53
+ }
54
+
55
+ #links {
56
+ gap: 1em;
57
+ margin: 1em;
58
+ }
59
+
60
+ #links img {
61
+ height: 20px;
62
+ }
63
+
64
+ #graphs {
65
+ display: flex;
66
+ flex-flow: column nowrap;
67
+ justify-content: center;
68
+ align-items: center;
69
+ gap: 1em;
70
+ }
71
+
72
+ label {
73
+ display: block;
74
+ }
75
+
76
+ #graphs div {
77
+ position: relative;
78
+ }
79
+
80
+ #graphs label {
81
+ position: absolute;
82
+ right: 0;
83
+ top: 0;
84
+ max-width: 120px;
85
+ text-transform: uppercase;
86
+ font-family: monospace;
87
+ text-align: right;
88
+ padding: 0 4px;
89
+ line-height: 20px;
90
+ background-image: linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0));
91
+ border: 1px solid rgba(255,255,255,0.1);
92
+ border-top: none;
93
+ border-right: none;
94
+ }
95
+
96
+ #graphs .legend {
97
+ display: flex;
98
+ flex-flow: row wrap;
99
+ justify-content: flex-end;
100
+ gap: 1px 5px;
101
+ text-transform: uppercase;
102
+ font-family: monospace;
103
+ font-size: 10px;
104
+ line-height: 11px;
105
+ }
106
+
107
+ canvas.graph {
108
+ border: 1px solid rgba(255,255,255,0.1);
109
+ border-bottom: none;
110
+ background-image:
111
+ repeating-linear-gradient(to top, rgba(255,255,255,0.05), rgba(255,255,255,0.05) 1px, transparent 1px, transparent 10px),
112
+ linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0));
113
+ }
114
+
115
+ #recording {
116
+ margin-top: 1em;
117
+ position: relative;
118
+ display: block;
119
+ height: 100px;
120
+ line-height: 100px;
121
+ text-align: center;
122
+ font-size: 11px;
123
+ background-image: linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0));
124
+ border: 1px solid rgba(255,255,255,0.1);
125
+ border-bottom-left-radius: 10px;
126
+ border-bottom-right-radius: 10px;
127
+ }
128
+
129
+ #recording #audio {
130
+ display: flex;
131
+ flex-flow: row nowrap;
132
+ align-items: center;
133
+ justify-content: center;
134
+ height: 100%;
135
+ }
136
+
137
+ #recording label {
138
+ position: absolute;
139
+ right: 0;
140
+ top: 0;
141
+ max-width: 120px;
142
+ text-transform: uppercase;
143
+ font-family: monospace;
144
+ font-size: 12px;
145
+ text-align: right;
146
+ padding: 0 4px;
147
+ line-height: 20px;
148
+ background-image: linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0));
149
+ border: 1px solid rgba(255,255,255,0.1);
150
+ border-top: none;
151
+ border-right: none;
152
+ }