File size: 9,536 Bytes
0fcbf28 3eeee98 ccff356 3eeee98 ccff356 ee3b864 3eeee98 76849d7 3eeee98 1d88bdc 3eeee98 5d2461b 3ef7bcd 248976b d06dbc0 8e6e415 d06dbc0 cd81b30 3eeee98 aefaec7 3eeee98 3ef7bcd 3eeee98 b3f6e0e c418a9e 3eeee98 3ef7bcd 0537f85 3ef7bcd 3eeee98 3ef7bcd 747ffc5 3eeee98 aefaec7 3eeee98 9dc01a4 3eeee98 9dc01a4 3eeee98 9dc01a4 3eeee98 6be5772 3eeee98 6be5772 5d2461b 3ef7bcd 3eeee98 5d2461b 3eeee98 5d2461b 3ef7bcd 0537f85 c5889e1 3ef7bcd 0537f85 3ef7bcd 5d2461b 3eeee98 cb8e9a3 3eeee98 cd81b30 8cd4a84 cd81b30 bbebd26 d1ceb3d bbebd26 706ce8f 4f21fc5 bbebd26 758ff20 d1ceb3d 758ff20 d1ceb3d 1112b3e e2f7493 2c00960 1112b3e e2f7493 2c00960 e2f7493 5a70475 1a25f98 bbebd26 c33769f 71f261f bbebd26 3ef7bcd 2c00960 5d2461b 1d88bdc 3eeee98 05c2457 47b61b6 ee3b864 2ea5795 05c2457 3eeee98 0fcbf28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
<!doctype html>
<html lang="en">
<head>
<meta name="viewport" content="width=device-width" />
<link rel="stylesheet" href="style.css" />
<meta charset="UTF-8">
<title>Match-TTS Onnx Benchmarks</title>
</head>
<body>
<h1>Match-TTS Onnx Benchmarks</h1>
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
<script type="module">
import { MatchaTTSRaw } from "./js-esm/matcha_tts_raw.js";
import { webWavPlay } from "./js-esm/web_wav_play.js";
import { arpa_to_ipa } from "./js-esm/arpa_to_ipa.js";
import { loadCmudict } from "./js-esm/cmudict_loader.js";
import { env,textToArpa} from "./js-esm/text_to_arpa.js";
env.allowLocalModels = true;
env.localModelPath = "./models/";
env.backends.onnx.logLevel = "error";
let matcha_tts_raw
let cmudict ={}
let speaking = false
let total_infer_time=0
let count_infer=0
let loaded_model_name
let load_time
async function main(model_name) {
if (typeof model_name !== 'string') {//via button click
model_name ="en001_ep6399_univ_simplify"
}
console.log(model_name)
if (speaking){
console.log("speaking return")
}
speaking = true
console.log("main called")
if(!matcha_tts_raw){
const load_startTime = performance.now();
matcha_tts_raw = new MatchaTTSRaw()
console.time("load model");
const model_path = `./models/matcha-tts/${model_name}.onnx`
console.log(model_path)
await matcha_tts_raw.load_model(model_path,{ executionProviders: ['webgpu','wasm'] });
console.timeEnd("load model");
load_time = (performance.now() - load_startTime)/1000 //sec
loaded_model_name = model_name
let cmudictReady = loadCmudict(cmudict,'./dictionaries/cmudict-0.7b')
await cmudictReady
update_infer_bench1()
}else{
console.log("session exist skip load model")
}
const startTime = performance.now();
const text = document.getElementById('textInput').value
console.log("### textToArpa call")
const arpa_text = await textToArpa(cmudict,text)
console.log("### arpa returned")
const ipa_text = arpa_to_ipa(arpa_text).replace(/\s/g, "");
//console.log(ipa_text)
const spks = 0
const speed = document.getElementById('speed').value
const tempature = document.getElementById('temperature').value
console.time("infer");
const result = await matcha_tts_raw.infer(ipa_text, tempature, speed,spks);
if (result!=null){
console.timeEnd("infer");
const endTime = performance.now();
const infer_time = endTime-startTime
total_infer_time+=infer_time
count_infer += 1
update_infer_bench2()
webWavPlay(result)
}
speaking = false
}
function update_infer_bench1(){
const text = `${loaded_model_name} load time ${load_time.toFixed(1)} sec`;
document.getElementById('result1').innerText=text
}
function update_infer_bench2(){
const avg = (total_infer_time/count_infer)/1000
const text = `Infer Count ${count_infer} avg infer-time ${avg.toFixed(1)} sec`;
document.getElementById('result2').innerText=text
}
function update_range(){
const value = document.getElementById('spks').value
let formattedNumber = value.toString().padStart(3, '0');
document.getElementById('spks_label').textContent = formattedNumber
}
function update_range2(){
const value = document.getElementById('temperature').value
//let formattedNumber = value.toString().padStart(3, '0');
document.getElementById('tempature_label').textContent = value//formattedNumber
}
function update_range3(){
const value = document.getElementById('speed').value
//let formattedNumber = value.toString().padStart(3, '0');
document.getElementById('speed_label').textContent = value//sformattedNumber
}
window.onload = async function(){
//document.getElementById('textInput').onchange = main;
document.getElementById('myButton').onclick = main;
document.getElementById('temperature').onchange = update_range2
document.getElementById('speed').onchange = update_range3
}
function loadModel(model_name){
total_infer_time=0
count_infer=0
matcha_tts_raw=null
main(model_name)
}
function create_button(label, model_name) {
// ボタンの作成
const button = document.createElement('button');
button.style ="margin:4px;"
button.textContent = label;
// クリックイベントハンドラの設定
button.onclick = function() {
loadModel(model_name);
};
return button
}
document.getElementById('buttons').appendChild(create_button("ljspeech","ljspeech_sim"))
document.getElementById('buttons').appendChild(create_button("ljspeech-quantized","ljspeech_sim_q8"))
document.getElementById('buttons').appendChild(create_button("vctk","vctk_univ_simplify"))
document.getElementById('buttons').appendChild(create_button("vctk-quantized","vctk_univ_simplify_q8"))
document.getElementById('buttons').appendChild(create_button("en001","en001_ep6399_univ_simplify"))
document.getElementById('buttons').appendChild(create_button("en001-quantized","en001_ep6399_univ_simplify_q8"))
document.getElementById('buttons').appendChild(document.createElement('br'))
document.getElementById('buttons').appendChild(create_button("en001-t2-step01","en001_6399_T2_step01"))
document.getElementById('buttons').appendChild(create_button("en001-t2-step05","en001_6399_T2_step05"))
document.getElementById('buttons').appendChild(create_button("en001-t2-step10","en001_6399_T2_step10"))
//document.getElementById('buttons').appendChild(create_button("en001-t2-step20","en001_6399_T2_step20"))
document.getElementById('buttons').appendChild(document.createElement('br'))
document.getElementById('buttons').appendChild(create_button("en001-univ-step01","en001_6399_univ_step01"))
document.getElementById('buttons').appendChild(create_button("en001-univ-step05","en001_6399_univ_step05"))
document.getElementById('buttons').appendChild(create_button("en001-univ-step10","en001_6399_univ_step10"))
//document.getElementById('buttons').appendChild(create_button("en001-univ-step20","en001_6399_univ_step20"))
</script>
<div id="result1">Click button to load a model</div>
<div id="buttons"></div>
<br>
<div id="result2">en001-T2 and en001-univ are experimental</div>
<br><br>
<input type="text" id="textInput" value ="Hello Huggingface." placeholder="Enter some text here...">
<button id="myButton">Text To Speak</button><br>
<label for ="temperature" style="width: 110px;display: inline-block;">Temperature</label>
<input type="range" id="temperature" min="0" max="1.0" value="0.5" step="0.1"/>
<label for ="temperature" id="tempature_label">0.5</label><br>
<label for ="speed" style="width: 110px;display: inline-block;">Speed</label>
<input type="range" id="speed" min="0.1" max="2.0" value="1.0" step="0.1"/>
<label for ="speed" id="speed_label">1.0</label>
<br>
<br>
<div>almost load time 15 sec,short text TTS time 2 sec(my 2070super-gpu)</div><br>
<div>Quantized version is too slow and exist just for Github Page 100MB limitation so far</div><br>
<div>Multispeaker(vctk) is little bit slow than singlespeaker.default timesteps is 5(smallest 1 is 300msec fast,but audio become low quality)</div>
<br>
<div id="footer">
<b>Credits</b><br>
<a href="https://github.com/akjava/Matcha-TTS-Japanese" style="font-size: 9px" target="link">Matcha-TTS-Japanese</a> |
<a href = "http://www.udialogue.org/download/cstr-vctk-corpus.html" style="font-size: 9px" target="link">CSTR VCTK Corpus</a> |
<a href = "https://github.com/cmusphinx/cmudict" style="font-size: 9px" target="link">CMUDict</a> |
<a href = "https://huggingface.co/docs/transformers.js/index" style="font-size: 9px" target="link">Transformer.js</a> |
<a href = "https://huggingface.co/cisco-ai/mini-bart-g2p" style="font-size: 9px" target="link">mini-bart-g2p</a> |
<a href = "https://onnxruntime.ai/docs/get-started/with-javascript/web.html" style="font-size: 9px" target="link">ONNXRuntime-Web</a> |
<a href = "https://github.com/akjava/English-To-IPA-Collections" style="font-size: 9px" target="link">English-To-IPA-Collections</a> |
<a href ="https://huggingface.co/papers/2309.03199" style="font-size: 9px" target="link">Matcha-TTS Paper</a>
</div>
</body>
</html>
|