Yuyang2022 commited on
Commit
d38f3bc
1 Parent(s): f2faf3d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -10
app.py CHANGED
@@ -2,25 +2,92 @@ from transformers import pipeline
2
  import tempfile
3
  import gradio as gr
4
  from neon_tts_plugin_coqui import CoquiTTS
 
 
 
 
5
 
6
  pipe = pipeline(model="Yuyang2022/yue") # change to "your-username/the-name-you-picked"
7
  LANGUAGES = list(CoquiTTS.langs.keys())
8
  coquiTTS = CoquiTTS()
9
 
10
- def audio_tts(audio, language):
11
  text = pipe(audio)["text"]
 
12
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
13
  coquiTTS.get_tts(text, fp, speaker = {"language" : language})
14
  return fp.name
15
 
16
- inputs = [gr.Audio(source="microphone", type="filepath"),
17
- gr.Radio(
18
- label="Language",
19
- choices=LANGUAGES, value="sv")]
20
- outputs = gr.Audio(label="Output")
21
 
22
- demo = gr.Interface(fn=audio_tts, inputs=inputs, outputs=outputs,
23
- title="translation - speeh to speech",
24
- description="Realtime demo for speech translation.",)
 
 
25
 
26
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import tempfile
3
  import gradio as gr
4
  from neon_tts_plugin_coqui import CoquiTTS
5
+ import os
6
+ import time
7
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
8
+ from flores200_codes import flores_codes
9
 
10
  pipe = pipeline(model="Yuyang2022/yue") # change to "your-username/the-name-you-picked"
11
  LANGUAGES = list(CoquiTTS.langs.keys())
12
  coquiTTS = CoquiTTS()
13
 
14
+ def audio_tts(audio, language:str, lang):
15
  text = pipe(audio)["text"]
16
+ text = translation("zho_Hant", lang, text)
17
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
18
  coquiTTS.get_tts(text, fp, speaker = {"language" : language})
19
  return fp.name
20
 
 
 
 
 
 
21
 
22
+ def load_models():
23
+ # build model and tokenizer
24
+ model_name_dict = {
25
+ "nllb-distilled-600M": "facebook/nllb-200-distilled-600M",
26
+ }
27
 
28
+ model_dict = {}
29
+
30
+ for call_name, real_name in model_name_dict.items():
31
+ print("\tLoading model: %s" % call_name)
32
+ model = AutoModelForSeq2SeqLM.from_pretrained(real_name)
33
+ tokenizer = AutoTokenizer.from_pretrained(real_name)
34
+ model_dict[call_name + "_model"] = model
35
+ model_dict[call_name + "_tokenizer"] = tokenizer
36
+
37
+ return model_dict
38
+
39
+
40
+ def translation(source, target, text):
41
+ if len(model_dict) == 2:
42
+ model_name = "nllb-distilled-600M"
43
+
44
+ start_time = time.time()
45
+ source = "zho_Hant" #flores_codes[source]
46
+ target = flores_codes[target]
47
+
48
+ model = model_dict[model_name + "_model"]
49
+ tokenizer = model_dict[model_name + "_tokenizer"]
50
+
51
+ translator = pipeline(
52
+ "translation",
53
+ model=model,
54
+ tokenizer=tokenizer,
55
+ src_lang=source,
56
+ tgt_lang=target,
57
+ )
58
+ output = translator(text, max_length=400)
59
+
60
+ end_time = time.time()
61
+
62
+ output = output[0]["translation_text"]
63
+ result = {
64
+ "inference_time": end_time - start_time,
65
+ "source": source,
66
+ "target": target,
67
+ "result": output,
68
+ }
69
+ return output
70
+
71
+
72
+ if __name__ == "__main__":
73
+ print("\tinit models")
74
+
75
+ global model_dict
76
+
77
+ model_dict = load_models()
78
+
79
+ lang_codes = list(flores_codes.keys())
80
+
81
+ # define gradio demo
82
+ inputs = [gr.Audio(source="microphone", type="filepath"),
83
+ gr.Radio(
84
+ label="Target text Language",
85
+ choices=LANGUAGES, value="en"),
86
+ gr.inputs.Dropdown(lang_codes, default="English", label="Target text Language"),]
87
+ outputs = gr.Audio(label="Output")
88
+
89
+ demo = gr.Interface(fn=audio_tts, inputs=inputs, outputs=outputs,
90
+ title="translation - speech to speech",
91
+ description="Realtime demo for speech translation.",)
92
+
93
+ demo.launch()