|
|
|
function freeConfig(config, Module) { |
|
if ('buffer' in config) { |
|
Module._free(config.buffer); |
|
} |
|
|
|
if ('config' in config) { |
|
freeConfig(config.config, Module) |
|
} |
|
|
|
if ('segmentation' in config) { |
|
freeConfig(config.segmentation, Module) |
|
} |
|
|
|
if ('embedding' in config) { |
|
freeConfig(config.embedding, Module) |
|
} |
|
|
|
if ('clustering' in config) { |
|
freeConfig(config.clustering, Module) |
|
} |
|
|
|
Module._free(config.ptr); |
|
} |
|
|
|
function initSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig( |
|
config, Module) { |
|
const modelLen = Module.lengthBytesUTF8(config.model || '') + 1; |
|
const n = modelLen; |
|
const buffer = Module._malloc(n); |
|
|
|
const len = 1 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
let offset = 0; |
|
Module.stringToUTF8(config.model || '', buffer + offset, modelLen); |
|
offset += modelLen; |
|
|
|
offset = 0; |
|
Module.setValue(ptr, buffer + offset, 'i8*'); |
|
|
|
return { |
|
buffer: buffer, ptr: ptr, len: len, |
|
} |
|
} |
|
|
|
function initSherpaOnnxOfflineSpeakerSegmentationModelConfig(config, Module) { |
|
if (!('pyannote' in config)) { |
|
config.pyannote = { |
|
model: '', |
|
}; |
|
} |
|
|
|
const pyannote = initSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig( |
|
config.pyannote, Module); |
|
|
|
const len = pyannote.len + 3 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
let offset = 0; |
|
Module._CopyHeap(pyannote.ptr, pyannote.len, ptr + offset); |
|
offset += pyannote.len; |
|
|
|
Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.debug || 0, 'i32'); |
|
offset += 4; |
|
|
|
const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; |
|
const buffer = Module._malloc(providerLen); |
|
Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen); |
|
Module.setValue(ptr + offset, buffer, 'i8*'); |
|
|
|
return { |
|
buffer: buffer, |
|
ptr: ptr, |
|
len: len, |
|
config: pyannote, |
|
}; |
|
} |
|
|
|
function initSherpaOnnxSpeakerEmbeddingExtractorConfig(config, Module) { |
|
const modelLen = Module.lengthBytesUTF8(config.model || '') + 1; |
|
const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; |
|
const n = modelLen + providerLen; |
|
const buffer = Module._malloc(n); |
|
|
|
const len = 4 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
let offset = 0; |
|
Module.stringToUTF8(config.model || '', buffer + offset, modelLen); |
|
offset += modelLen; |
|
|
|
Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen); |
|
offset += providerLen; |
|
|
|
offset = 0 |
|
Module.setValue(ptr + offset, buffer, 'i8*'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.debug || 0, 'i32'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, buffer + modelLen, 'i8*'); |
|
offset += 4; |
|
|
|
return { |
|
buffer: buffer, |
|
ptr: ptr, |
|
len: len, |
|
}; |
|
} |
|
|
|
function initSherpaOnnxFastClusteringConfig(config, Module) { |
|
const len = 2 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
let offset = 0; |
|
Module.setValue(ptr + offset, config.numClusters || -1, 'i32'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.threshold || 0.5, 'float'); |
|
offset += 4; |
|
|
|
return { |
|
ptr: ptr, |
|
len: len, |
|
}; |
|
} |
|
|
|
function initSherpaOnnxOfflineSpeakerDiarizationConfig(config, Module) { |
|
if (!('segmentation' in config)) { |
|
config.segmentation = { |
|
pyannote: {model: ''}, |
|
numThreads: 1, |
|
debug: 0, |
|
provider: 'cpu', |
|
}; |
|
} |
|
|
|
if (!('embedding' in config)) { |
|
config.embedding = { |
|
model: '', |
|
numThreads: 1, |
|
debug: 0, |
|
provider: 'cpu', |
|
}; |
|
} |
|
|
|
if (!('clustering' in config)) { |
|
config.clustering = { |
|
numClusters: -1, |
|
threshold: 0.5, |
|
}; |
|
} |
|
|
|
const segmentation = initSherpaOnnxOfflineSpeakerSegmentationModelConfig( |
|
config.segmentation, Module); |
|
|
|
const embedding = |
|
initSherpaOnnxSpeakerEmbeddingExtractorConfig(config.embedding, Module); |
|
|
|
const clustering = |
|
initSherpaOnnxFastClusteringConfig(config.clustering, Module); |
|
|
|
const len = segmentation.len + embedding.len + clustering.len + 2 * 4; |
|
const ptr = Module._malloc(len); |
|
|
|
let offset = 0; |
|
Module._CopyHeap(segmentation.ptr, segmentation.len, ptr + offset); |
|
offset += segmentation.len; |
|
|
|
Module._CopyHeap(embedding.ptr, embedding.len, ptr + offset); |
|
offset += embedding.len; |
|
|
|
Module._CopyHeap(clustering.ptr, clustering.len, ptr + offset); |
|
offset += clustering.len; |
|
|
|
Module.setValue(ptr + offset, config.minDurationOn || 0.2, 'float'); |
|
offset += 4; |
|
|
|
Module.setValue(ptr + offset, config.minDurationOff || 0.5, 'float'); |
|
offset += 4; |
|
|
|
return { |
|
ptr: ptr, len: len, segmentation: segmentation, embedding: embedding, |
|
clustering: clustering, |
|
} |
|
} |
|
|
|
class OfflineSpeakerDiarization { |
|
constructor(configObj, Module) { |
|
const config = |
|
initSherpaOnnxOfflineSpeakerDiarizationConfig(configObj, Module) |
|
|
|
|
|
const handle = |
|
Module._SherpaOnnxCreateOfflineSpeakerDiarization(config.ptr); |
|
|
|
freeConfig(config, Module); |
|
|
|
this.handle = handle; |
|
this.sampleRate = |
|
Module._SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(this.handle); |
|
this.Module = Module |
|
|
|
this.config = configObj; |
|
} |
|
|
|
free() { |
|
this.Module._SherpaOnnxDestroyOfflineSpeakerDiarization(this.handle); |
|
this.handle = 0 |
|
} |
|
|
|
setConfig(configObj) { |
|
if (!('clustering' in configObj)) { |
|
return; |
|
} |
|
|
|
const config = |
|
initSherpaOnnxOfflineSpeakerDiarizationConfig(configObj, this.Module); |
|
|
|
this.Module._SherpaOnnxOfflineSpeakerDiarizationSetConfig( |
|
this.handle, config.ptr); |
|
|
|
freeConfig(config, Module); |
|
|
|
this.config.clustering = configObj.clustering; |
|
} |
|
|
|
process(samples) { |
|
const pointer = |
|
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); |
|
this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT); |
|
|
|
let r = this.Module._SherpaOnnxOfflineSpeakerDiarizationProcess( |
|
this.handle, pointer, samples.length); |
|
this.Module._free(pointer); |
|
|
|
let numSegments = |
|
this.Module._SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(r); |
|
|
|
let segments = |
|
this.Module._SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime( |
|
r); |
|
|
|
let ans = []; |
|
|
|
let sizeOfSegment = 3 * 4; |
|
for (let i = 0; i < numSegments; ++i) { |
|
let p = segments + i * sizeOfSegment |
|
|
|
let start = this.Module.HEAPF32[p / 4 + 0]; |
|
let end = this.Module.HEAPF32[p / 4 + 1]; |
|
let speaker = this.Module.HEAP32[p / 4 + 2]; |
|
|
|
ans.push({start: start, end: end, speaker: speaker}); |
|
} |
|
|
|
this.Module._SherpaOnnxOfflineSpeakerDiarizationDestroySegment(segments); |
|
this.Module._SherpaOnnxOfflineSpeakerDiarizationDestroyResult(r); |
|
|
|
return ans; |
|
} |
|
} |
|
|
|
function createOfflineSpeakerDiarization(Module, myConfig) { |
|
let config = { |
|
segmentation: { |
|
pyannote: {model: './segmentation.onnx'}, |
|
debug: 1, |
|
}, |
|
embedding: { |
|
model: './embedding.onnx', |
|
debug: 1, |
|
}, |
|
clustering: {numClusters: -1, threshold: 0.5}, |
|
minDurationOn: 0.3, |
|
minDurationOff: 0.5, |
|
}; |
|
|
|
if (myConfig) { |
|
config = myConfig; |
|
} |
|
|
|
return new OfflineSpeakerDiarization(config, Module); |
|
} |
|
|
|
if (typeof process == 'object' && typeof process.versions == 'object' && |
|
typeof process.versions.node == 'string') { |
|
module.exports = { |
|
createOfflineSpeakerDiarization, |
|
}; |
|
} |
|
|