File size: 2,583 Bytes
978f8ad
05710e7
978f8ad
1b53073
a68d2c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db61e17
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
env.backends.onnx.logLevelInternal="error"
console.log(env)


async function text2text_generation(words,convert_ipa=false) {
  const generator = await pipeline('text2text-generation', 'mini-bart-g2p',{quantized: false});
  const inputTexts = words;
  const options = { max_new_tokens: 100 };
  const outputs = await generator(inputTexts, options);

  if (convert_ipa){
      const ipas = []
  outputs.forEach(output => {
      const ipa = arpa_to_ipa(output.generated_text).replace(/\s/g, "")
      ipas.push(ipa)
      });
      return ipas
  }else{
      return outputs  //arpa
  }
  
}


async function textToArpa(cmudict,text,replace_questions=false){
    if (replace_questions){
        text = text.replaceAll("!",".").replaceAll("?",".")
    }
    const cleanedString = text.replace(/[^a-zA-Z0-9.,!? ]/g, '');
        
    const dict = wordsToArpa(cmudict,cleanedString)
    


    const result = dict["result"]
    const non_converted = dict["non_converted"]

    let arpa_text = result.join(" ");

    //console.log(non_converted.length)
    if (non_converted.length > 0){
        console.log("non_converted length = "+non_converted.length)
        const arpas = await text2text_generation(non_converted)
        console.log(arpas)
        for (let i = 0; i < non_converted.length; i++) {
                const word = non_converted[i]
                const arpa = arpas[i].generated_text
                console.log("@"+word,arpa)
                arpa_text = arpa_text.replace("@"+word,arpa)
            }
    }
    return arpa_text

}

function get_arpa(cmudict,word){
    return cmudict[word.toUpperCase()]
  }

function wordsToArpa(cmudict,text){
    var keep_words = [",",".","!","?"]
    let inputText = text.toUpperCase()
    keep_words.forEach(function(key){
      inputText = inputText.replaceAll(key," "+key+" ");
    });
    //console.log(`replaced ${inputText}`)
    
    let result = []
    let non_converted = []
    var words = inputText.split(" ")
    
    words.forEach(word => {
       
        if (keep_words.includes(word)){//,.!? just keep
          result.push(word)
        }else if (word ==""){
          
          }else{
          const arpa = get_arpa(cmudict,word)
          
          if (typeof arpa == "undefined"){
            result.push("@"+word)
            non_converted.push(word)
          }else{
            result.push(arpa)
          }
        }
    });
    
    return {"result":result,"non_converted":non_converted}
  }

export{env,textToArpa}