Spaces:
Running
Running
benjamin-paine
commited on
Upload 2 files
Browse files
index.js
ADDED
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/**
|
2 |
+
* Play audio samples using the Web Audio API.
|
3 |
+
* @param {Float32Array} audioSamples - The audio samples to play.
|
4 |
+
* @param {number} sampleRate - The sample rate of the audio samples.
|
5 |
+
*/
|
6 |
+
function playAudioSamples(audioSamples, sampleRate = 16000) {
|
7 |
+
// Create an AudioContext
|
8 |
+
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
9 |
+
|
10 |
+
// Create an AudioBuffer
|
11 |
+
const audioBuffer = audioContext.createBuffer(
|
12 |
+
1, // number of channels
|
13 |
+
audioSamples.length, // length of the buffer in samples
|
14 |
+
sampleRate // sample rate (samples per second)
|
15 |
+
);
|
16 |
+
|
17 |
+
// Fill the AudioBuffer with the Float32Array of audio samples
|
18 |
+
audioBuffer.getChannelData(0).set(audioSamples);
|
19 |
+
|
20 |
+
// Create a BufferSource node
|
21 |
+
const source = audioContext.createBufferSource();
|
22 |
+
source.buffer = audioBuffer;
|
23 |
+
|
24 |
+
// Connect the source to the AudioContext's destination (the speakers)
|
25 |
+
source.connect(audioContext.destination);
|
26 |
+
|
27 |
+
// Start playback
|
28 |
+
source.start();
|
29 |
+
};
|
30 |
+
|
31 |
+
/**
|
32 |
+
* Turns floating-point audio samples to a Wave blob.
|
33 |
+
* @param {Float32Array} audioSamples - The audio samples to play.
|
34 |
+
* @param {number} sampleRate - The sample rate of the audio samples.
|
35 |
+
* @param {number} numChannels - The number of channels in the audio. Defaults to 1 (mono).
|
36 |
+
* @return {Blob} A blob of type `audio/wav`
|
37 |
+
*/
|
38 |
+
function samplesToBlob(audioSamples, sampleRate = 16000, numChannels = 1) {
|
39 |
+
// Helper to write a string to the DataView
|
40 |
+
const writeString = (view, offset, string) => {
|
41 |
+
for (let i = 0; i < string.length; i++) {
|
42 |
+
view.setUint8(offset + i, string.charCodeAt(i));
|
43 |
+
}
|
44 |
+
};
|
45 |
+
|
46 |
+
// Helper to convert Float32Array to Int16Array (16-bit PCM)
|
47 |
+
const floatTo16BitPCM = (output, offset, input) => {
|
48 |
+
for (let i = 0; i < input.length; i++, offset += 2) {
|
49 |
+
let s = Math.max(-1, Math.min(1, input[i])); // Clamping to [-1, 1]
|
50 |
+
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); // Convert to 16-bit PCM
|
51 |
+
}
|
52 |
+
};
|
53 |
+
|
54 |
+
const byteRate = sampleRate * numChannels * 2; // 16-bit PCM = 2 bytes per sample
|
55 |
+
|
56 |
+
// Calculate sizes
|
57 |
+
const blockAlign = numChannels * 2; // 2 bytes per sample for 16-bit audio
|
58 |
+
const wavHeaderSize = 44;
|
59 |
+
const dataLength = audioSamples.length * numChannels * 2; // 16-bit PCM data length
|
60 |
+
const buffer = new ArrayBuffer(wavHeaderSize + dataLength);
|
61 |
+
const view = new DataView(buffer);
|
62 |
+
|
63 |
+
// Write WAV file headers
|
64 |
+
writeString(view, 0, 'RIFF'); // ChunkID
|
65 |
+
view.setUint32(4, 36 + dataLength, true); // ChunkSize
|
66 |
+
writeString(view, 8, 'WAVE'); // Format
|
67 |
+
writeString(view, 12, 'fmt '); // Subchunk1ID
|
68 |
+
view.setUint32(16, 16, true); // Subchunk1Size (PCM = 16)
|
69 |
+
view.setUint16(20, 1, true); // AudioFormat (PCM = 1)
|
70 |
+
view.setUint16(22, numChannels, true); // NumChannels
|
71 |
+
view.setUint32(24, sampleRate, true); // SampleRate
|
72 |
+
view.setUint32(28, byteRate, true); // ByteRate
|
73 |
+
view.setUint16(32, blockAlign, true); // BlockAlign
|
74 |
+
view.setUint16(34, 16, true); // BitsPerSample (16-bit PCM)
|
75 |
+
writeString(view, 36, 'data'); // Subchunk2ID
|
76 |
+
view.setUint32(40, dataLength, true); // Subchunk2Size
|
77 |
+
|
78 |
+
// Convert the Float32Array audio samples to 16-bit PCM and write them to the DataView
|
79 |
+
floatTo16BitPCM(view, wavHeaderSize, audioSamples);
|
80 |
+
|
81 |
+
// Create and return the Blob
|
82 |
+
return new Blob([view], { type: 'audio/wav' });
|
83 |
+
}
|
84 |
+
|
85 |
+
/**
|
86 |
+
* Renders a blob to an audio element with controls.
|
87 |
+
* Use `appendChild(result)` to add to the document or a node.
|
88 |
+
* @param {Blob} audioBlob - A blob with a valid audio type.
|
89 |
+
* @see samplesToBlob
|
90 |
+
*/
|
91 |
+
function blobToAudio(audioBlob) {
|
92 |
+
const url = URL.createObjectURL(audioBlob);
|
93 |
+
const audio = document.createElement("audio");
|
94 |
+
audio.controls = true;
|
95 |
+
audio.src = url;
|
96 |
+
return audio;
|
97 |
+
}
|
98 |
+
|
99 |
+
/** Configuration */
|
100 |
+
const colors = {
|
101 |
+
"buddy": [0,119,187],
|
102 |
+
"hey buddy": [0,153,136],
|
103 |
+
"hi buddy": [51,227,138],
|
104 |
+
"sup buddy": [238,119,51],
|
105 |
+
"yo buddy": [204,51,217],
|
106 |
+
"okay buddy": [238,51,119],
|
107 |
+
"hello buddy": [184,62,104],
|
108 |
+
"speech": [22,200,206],
|
109 |
+
"frame budget": [25,255,25]
|
110 |
+
};
|
111 |
+
const rootUrl = "https://huggingface.co/benjamin-paine/hey-buddy/resolve/main";
|
112 |
+
const wakeWords = ["buddy", "hey buddy", "hi buddy", "sup buddy", "yo buddy", "okay buddy", "hello buddy"];
|
113 |
+
const canvasSize = { width: 640, height: 100 };
|
114 |
+
const graphLineWidth = 1;
|
115 |
+
const options = {
|
116 |
+
debug: true,
|
117 |
+
modelPath: wakeWords.map((word) => `${rootUrl}/models/${word.replace(' ', '-')}.onnx`),
|
118 |
+
vadModelPath: `${rootUrl}/pretrained/silero-vad.onnx`,
|
119 |
+
spectrogramModelPath: `${rootUrl}/pretrained/mel-spectrogram.onnx`,
|
120 |
+
embeddingModelPath: `${rootUrl}/pretrained/speech-embedding.onnx`,
|
121 |
+
};
|
122 |
+
|
123 |
+
/** Main */
|
124 |
+
document.addEventListener("DOMContentLoaded", () => {
|
125 |
+
/** DOM elements */
|
126 |
+
const graphsContainer = document.getElementById("graphs");
|
127 |
+
const audioContainer = document.getElementById("audio");
|
128 |
+
|
129 |
+
/** Memory for drawing */
|
130 |
+
const graphs = {};
|
131 |
+
const history = {};
|
132 |
+
const current = {};
|
133 |
+
const active = {};
|
134 |
+
|
135 |
+
/** Instantiate */
|
136 |
+
const heyBuddy = new HeyBuddy(options);
|
137 |
+
|
138 |
+
/** Add callbacks */
|
139 |
+
|
140 |
+
// When processed, update state for next draw
|
141 |
+
heyBuddy.onProcessed((result) => {
|
142 |
+
current["frame budget"] = heyBuddy.frameTimeEma;
|
143 |
+
current["speech"] = result.speech.probability || 0.0;
|
144 |
+
active["speech"] = result.speech.active;
|
145 |
+
for (let wakeWord in result.wakeWords) {
|
146 |
+
current[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].probability || 0.0;
|
147 |
+
active[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].active;
|
148 |
+
}
|
149 |
+
if (result.recording) {
|
150 |
+
audioContainer.innerHTML = "Recording…";
|
151 |
+
}
|
152 |
+
});
|
153 |
+
|
154 |
+
// When recording is complete, replace the audio element
|
155 |
+
heyBuddy.onRecording((audioSamples) => {
|
156 |
+
const audioBlob = samplesToBlob(audioSamples);
|
157 |
+
const audioElement = blobToAudio(audioBlob);
|
158 |
+
audioContainer.innerHTML = "";
|
159 |
+
audioContainer.appendChild(audioElement);
|
160 |
+
});
|
161 |
+
|
162 |
+
/** Add graphs */
|
163 |
+
for (let graphName of ["wake words", "speech", "frame budget"]) {
|
164 |
+
// Create containers for the graph and its label
|
165 |
+
const graphContainer = document.createElement("div");
|
166 |
+
const graphLabel = document.createElement("label");
|
167 |
+
graphLabel.textContent = graphName;
|
168 |
+
|
169 |
+
// Create a canvas for the graph
|
170 |
+
const graphCanvas = document.createElement("canvas");
|
171 |
+
graphCanvas.className = "graph";
|
172 |
+
graphCanvas.width = canvasSize.width;
|
173 |
+
graphCanvas.height = canvasSize.height;
|
174 |
+
graphs[graphName] = graphCanvas;
|
175 |
+
|
176 |
+
// Add the canvas to the container and the container to the document
|
177 |
+
graphContainer.appendChild(graphCanvas);
|
178 |
+
graphContainer.appendChild(graphLabel);
|
179 |
+
graphsContainer.appendChild(graphContainer);
|
180 |
+
|
181 |
+
// If this is the wake-word graph, also add legend
|
182 |
+
if (graphName === "wake words") {
|
183 |
+
const graphLegend = document.createElement("div");
|
184 |
+
graphLegend.className = "legend";
|
185 |
+
for (let wakeWord of wakeWords) {
|
186 |
+
const legendItem = document.createElement("div");
|
187 |
+
const [r,g,b] = colors[wakeWord];
|
188 |
+
legendItem.style.color = `rgb(${r},${g},${b})`;
|
189 |
+
legendItem.textContent = wakeWord;
|
190 |
+
graphLegend.appendChild(legendItem);
|
191 |
+
}
|
192 |
+
graphLabel.appendChild(graphLegend);
|
193 |
+
}
|
194 |
+
}
|
195 |
+
|
196 |
+
/** Define draw loop */
|
197 |
+
const draw = () => {
|
198 |
+
// Draw speech and model graphs
|
199 |
+
for (let graphName in graphs) {
|
200 |
+
const isWakeWords = graphName === "wake words";
|
201 |
+
const isFrameBudget = graphName === "frame budget";
|
202 |
+
const subGraphs = isWakeWords ? wakeWords : [graphName];
|
203 |
+
|
204 |
+
let isFirst = true;
|
205 |
+
for (let name of subGraphs) {
|
206 |
+
// Update history
|
207 |
+
history[name] = history[name] || [];
|
208 |
+
if (isFrameBudget) {
|
209 |
+
history[name].push((current[name] || 0.0) / 120.0); // 120ms budget
|
210 |
+
} else {
|
211 |
+
history[name].push(current[name] || 0.0);
|
212 |
+
}
|
213 |
+
|
214 |
+
// Trim history
|
215 |
+
if (history[name].length > canvasSize.width) {
|
216 |
+
history[name] = history[name].slice(history[name].length - canvasSize.width);
|
217 |
+
}
|
218 |
+
|
219 |
+
// Draw graph
|
220 |
+
const canvas = graphs[graphName];
|
221 |
+
const ctx = canvas.getContext("2d");
|
222 |
+
const [r,g,b] = colors[name];
|
223 |
+
const opacity = isFrameBudget || active[name] ? 1.0 : 0.5;
|
224 |
+
|
225 |
+
if (isFirst) {
|
226 |
+
// Clear canvas on first draw
|
227 |
+
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
228 |
+
isFirst = false;
|
229 |
+
}
|
230 |
+
|
231 |
+
ctx.strokeStyle = `rgba(${r},${g},${b},${opacity})`;
|
232 |
+
ctx.fillStyle = `rgba(${r},${g},${b},${opacity/2})`;
|
233 |
+
ctx.lineWidth = graphLineWidth;
|
234 |
+
|
235 |
+
// Draw from left to right (the frame shifts right to left)
|
236 |
+
ctx.beginPath();
|
237 |
+
let lastX;
|
238 |
+
for (let i = 0; i < history[name].length; i++) {
|
239 |
+
const x = i;
|
240 |
+
const y = canvas.height - history[name][i] * canvas.height;
|
241 |
+
if (i === 0) {
|
242 |
+
ctx.moveTo(1, y);
|
243 |
+
} else {
|
244 |
+
ctx.lineTo(x, y);
|
245 |
+
}
|
246 |
+
lastX = x;
|
247 |
+
}
|
248 |
+
// extend downwards to make a polygon
|
249 |
+
ctx.lineTo(lastX, canvas.height);
|
250 |
+
ctx.lineTo(0, canvas.height);
|
251 |
+
ctx.closePath();
|
252 |
+
ctx.fill();
|
253 |
+
ctx.stroke();
|
254 |
+
}
|
255 |
+
}
|
256 |
+
|
257 |
+
// Request next frame
|
258 |
+
requestAnimationFrame(draw);
|
259 |
+
};
|
260 |
+
|
261 |
+
/** Start the loop */
|
262 |
+
requestAnimationFrame(draw);
|
263 |
+
});
|
style.css
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
body {
|
2 |
+
display: flex;
|
3 |
+
flex-flow: column nowrap;
|
4 |
+
justify-content: center;
|
5 |
+
align-items: center;
|
6 |
+
height: 100vh;
|
7 |
+
width: 100vw;
|
8 |
+
padding: 0;
|
9 |
+
margin: 0;
|
10 |
+
font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
|
11 |
+
background-color: rgb(11,15,25);
|
12 |
+
color: white
|
13 |
+
}
|
14 |
+
|
15 |
+
h1 {
|
16 |
+
font-size: 16px;
|
17 |
+
margin-top: 0;
|
18 |
+
}
|
19 |
+
|
20 |
+
p {
|
21 |
+
font-size: 15px;
|
22 |
+
margin-bottom: 10px;
|
23 |
+
margin-top: 5px;
|
24 |
+
}
|
25 |
+
|
26 |
+
strong, em {
|
27 |
+
color: #16c8ce;
|
28 |
+
}
|
29 |
+
|
30 |
+
.card {
|
31 |
+
max-width: 640px;
|
32 |
+
margin: 0 auto;
|
33 |
+
padding: 16px;
|
34 |
+
border: 1px solid rgb(107, 114, 128);
|
35 |
+
border-radius: 16px;
|
36 |
+
background-color: rgb(16, 22, 35);
|
37 |
+
}
|
38 |
+
|
39 |
+
.card p:last-child {
|
40 |
+
margin-bottom: 0;
|
41 |
+
}
|
42 |
+
|
43 |
+
.card img {
|
44 |
+
width: 100%;
|
45 |
+
max-width: 420px;
|
46 |
+
margin: 0 auto;
|
47 |
+
}
|
48 |
+
|
49 |
+
#logo, #links {
|
50 |
+
display: flex;
|
51 |
+
flex-flow: row wrap;
|
52 |
+
justify-content: center;
|
53 |
+
}
|
54 |
+
|
55 |
+
#links {
|
56 |
+
gap: 1em;
|
57 |
+
margin: 1em;
|
58 |
+
}
|
59 |
+
|
60 |
+
#links img {
|
61 |
+
height: 20px;
|
62 |
+
}
|
63 |
+
|
64 |
+
#graphs {
|
65 |
+
display: flex;
|
66 |
+
flex-flow: column nowrap;
|
67 |
+
justify-content: center;
|
68 |
+
align-items: center;
|
69 |
+
gap: 1em;
|
70 |
+
}
|
71 |
+
|
72 |
+
label {
|
73 |
+
display: block;
|
74 |
+
}
|
75 |
+
|
76 |
+
#graphs div {
|
77 |
+
position: relative;
|
78 |
+
}
|
79 |
+
|
80 |
+
#graphs label {
|
81 |
+
position: absolute;
|
82 |
+
right: 0;
|
83 |
+
top: 0;
|
84 |
+
max-width: 120px;
|
85 |
+
text-transform: uppercase;
|
86 |
+
font-family: monospace;
|
87 |
+
text-align: right;
|
88 |
+
padding: 0 4px;
|
89 |
+
line-height: 20px;
|
90 |
+
background-image: linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0));
|
91 |
+
border: 1px solid rgba(255,255,255,0.1);
|
92 |
+
border-top: none;
|
93 |
+
border-right: none;
|
94 |
+
}
|
95 |
+
|
96 |
+
#graphs .legend {
|
97 |
+
display: flex;
|
98 |
+
flex-flow: row wrap;
|
99 |
+
justify-content: flex-end;
|
100 |
+
gap: 1px 5px;
|
101 |
+
text-transform: uppercase;
|
102 |
+
font-family: monospace;
|
103 |
+
font-size: 10px;
|
104 |
+
line-height: 11px;
|
105 |
+
}
|
106 |
+
|
107 |
+
canvas.graph {
|
108 |
+
border: 1px solid rgba(255,255,255,0.1);
|
109 |
+
border-bottom: none;
|
110 |
+
background-image:
|
111 |
+
repeating-linear-gradient(to top, rgba(255,255,255,0.05), rgba(255,255,255,0.05) 1px, transparent 1px, transparent 10px),
|
112 |
+
linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0));
|
113 |
+
}
|
114 |
+
|
115 |
+
#recording {
|
116 |
+
margin-top: 1em;
|
117 |
+
position: relative;
|
118 |
+
display: block;
|
119 |
+
height: 100px;
|
120 |
+
line-height: 100px;
|
121 |
+
text-align: center;
|
122 |
+
font-size: 11px;
|
123 |
+
background-image: linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0));
|
124 |
+
border: 1px solid rgba(255,255,255,0.1);
|
125 |
+
border-bottom-left-radius: 10px;
|
126 |
+
border-bottom-right-radius: 10px;
|
127 |
+
}
|
128 |
+
|
129 |
+
#recording #audio {
|
130 |
+
display: flex;
|
131 |
+
flex-flow: row nowrap;
|
132 |
+
align-items: center;
|
133 |
+
justify-content: center;
|
134 |
+
height: 100%;
|
135 |
+
}
|
136 |
+
|
137 |
+
#recording label {
|
138 |
+
position: absolute;
|
139 |
+
right: 0;
|
140 |
+
top: 0;
|
141 |
+
max-width: 120px;
|
142 |
+
text-transform: uppercase;
|
143 |
+
font-family: monospace;
|
144 |
+
font-size: 12px;
|
145 |
+
text-align: right;
|
146 |
+
padding: 0 4px;
|
147 |
+
line-height: 20px;
|
148 |
+
background-image: linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0));
|
149 |
+
border: 1px solid rgba(255,255,255,0.1);
|
150 |
+
border-top: none;
|
151 |
+
border-right: none;
|
152 |
+
}
|