File size: 2,583 Bytes
978f8ad 05710e7 978f8ad 1b53073 a68d2c7 db61e17 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
env.backends.onnx.logLevelInternal="error"
console.log(env)
async function text2text_generation(words,convert_ipa=false) {
const generator = await pipeline('text2text-generation', 'mini-bart-g2p',{quantized: false});
const inputTexts = words;
const options = { max_new_tokens: 100 };
const outputs = await generator(inputTexts, options);
if (convert_ipa){
const ipas = []
outputs.forEach(output => {
const ipa = arpa_to_ipa(output.generated_text).replace(/\s/g, "")
ipas.push(ipa)
});
return ipas
}else{
return outputs //arpa
}
}
async function textToArpa(cmudict,text,replace_questions=false){
if (replace_questions){
text = text.replaceAll("!",".").replaceAll("?",".")
}
const cleanedString = text.replace(/[^a-zA-Z0-9.,!? ]/g, '');
const dict = wordsToArpa(cmudict,cleanedString)
const result = dict["result"]
const non_converted = dict["non_converted"]
let arpa_text = result.join(" ");
//console.log(non_converted.length)
if (non_converted.length > 0){
console.log("non_converted length = "+non_converted.length)
const arpas = await text2text_generation(non_converted)
console.log(arpas)
for (let i = 0; i < non_converted.length; i++) {
const word = non_converted[i]
const arpa = arpas[i].generated_text
console.log("@"+word,arpa)
arpa_text = arpa_text.replace("@"+word,arpa)
}
}
return arpa_text
}
function get_arpa(cmudict,word){
return cmudict[word.toUpperCase()]
}
function wordsToArpa(cmudict,text){
var keep_words = [",",".","!","?"]
let inputText = text.toUpperCase()
keep_words.forEach(function(key){
inputText = inputText.replaceAll(key," "+key+" ");
});
//console.log(`replaced ${inputText}`)
let result = []
let non_converted = []
var words = inputText.split(" ")
words.forEach(word => {
if (keep_words.includes(word)){//,.!? just keep
result.push(word)
}else if (word ==""){
}else{
const arpa = get_arpa(cmudict,word)
if (typeof arpa == "undefined"){
result.push("@"+word)
non_converted.push(word)
}else{
result.push(arpa)
}
}
});
return {"result":result,"non_converted":non_converted}
}
export{env,textToArpa} |