ccoreilly commited on
Commit
c19313c
·
1 Parent(s): db540c2

Afegeix Festival

Browse files
Files changed (3) hide show
  1. Dockerfile +8 -1
  2. app.py +9 -7
  3. festival.py +65 -0
Dockerfile CHANGED
@@ -1,6 +1,11 @@
1
  FROM python:3.9
2
 
3
- RUN apt update && apt install -y git make autoconf automake libtool pkg-config gcc libsonic-dev ronn kramdown libpcaudio-dev
 
 
 
 
 
4
 
5
  RUN git clone -b ca-to-pr https://github.com/projecte-aina/espeak-ng
6
 
@@ -14,6 +19,7 @@ RUN useradd -m -u 1000 user
14
 
15
  USER user
16
 
 
17
  ENV HOME=/home/user \
18
  PATH=/home/user/.local/bin:$PATH
19
 
@@ -26,6 +32,7 @@ COPY --chown=user models models
26
  RUN pip install -r requirements.txt
27
 
28
  COPY --chown=user engine.py .
 
29
  COPY --chown=user app.py .
30
 
31
  RUN mkdir -p cache && chmod 777 cache
 
1
  FROM python:3.9
2
 
3
+ RUN apt-get update && apt-get install -y gnupg && \
4
+ apt-key adv --recv-keys --keyserver hkp://keyserver.ubuntu.com:80 A3A48C4A && \
5
+ echo "deb http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
6
+ echo "deb-src http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
7
+ apt-get update && \
8
+ apt-get -y install festival festvox-ca-ona-hts festvox-ca-pau-hts lame git make autoconf automake libtool pkg-config gcc libsonic-dev ronn kramdown libpcaudio-dev
9
 
10
  RUN git clone -b ca-to-pr https://github.com/projecte-aina/espeak-ng
11
 
 
19
 
20
  USER user
21
 
22
+
23
  ENV HOME=/home/user \
24
  PATH=/home/user/.local/bin:$PATH
25
 
 
32
  RUN pip install -r requirements.txt
33
 
34
  COPY --chown=user engine.py .
35
+ COPY --chown=user festival.py .
36
  COPY --chown=user app.py .
37
 
38
  RUN mkdir -p cache && chmod 777 cache
app.py CHANGED
@@ -1,19 +1,17 @@
1
- from engine import Piper
2
  import tempfile
3
  from typing import Optional
4
  from TTS.config import load_config
5
  import gradio as gr
6
  import numpy as np
7
  import os
8
- import json
9
  from TTS.utils.manage import ModelManager
10
  from TTS.utils.synthesizer import Synthesizer
11
  from espeak_phonemizer import Phonemizer
 
 
12
 
13
  MAX_TXT_LEN = 325
14
 
15
- SPEAKERS = ['f_cen_05', 'f_cen_81', 'f_occ_31', 'f_occ_de', 'f_sep_31', 'm_cen_08', 'm_occ_44', 'm_val_89']
16
-
17
  fonemitzador = Phonemizer("ca")
18
 
19
  def carrega_bsc():
@@ -51,7 +49,7 @@ model_collectivat = carrega_collectivat()
51
 
52
  model_piper = carrega_piper()
53
 
54
- def tts(text, speaker_idx):
55
  if len(text) > MAX_TXT_LEN:
56
  text = text[:MAX_TXT_LEN]
57
  print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
@@ -82,7 +80,9 @@ def tts(text, speaker_idx):
82
 
83
  fonemes = fonemitzador.phonemize(text, keep_clause_breakers=True)
84
 
85
- return fonemes, fp_bsc, fp_coll, fp_piper
 
 
86
 
87
 
88
  description="""
@@ -113,10 +113,12 @@ iface = gr.Interface(
113
  label="Text",
114
  value="L'Èlia i l'Alí a l'aula. L'oli i l'ou. Lulú olorava la lila.",
115
  ),
116
- gr.Dropdown(label="Selecciona un parlant pel model VITS multi-parlant del BSC", choices=SPEAKERS, value="ona")
 
117
  ],
118
  outputs=[
119
  gr.Markdown(label="Fonemes"),
 
120
  gr.Audio(label="BSC VITS",type="filepath"),
121
  gr.Audio(label="Collectivat Fastspeech",type="filepath"),
122
  gr.Audio(label="Piper VITS",type="filepath")
 
 
1
  import tempfile
2
  from typing import Optional
3
  from TTS.config import load_config
4
  import gradio as gr
5
  import numpy as np
6
  import os
 
7
  from TTS.utils.manage import ModelManager
8
  from TTS.utils.synthesizer import Synthesizer
9
  from espeak_phonemizer import Phonemizer
10
+ from engine import Piper
11
+ from festival import festival_synthesize
12
 
13
  MAX_TXT_LEN = 325
14
 
 
 
15
  fonemitzador = Phonemizer("ca")
16
 
17
  def carrega_bsc():
 
49
 
50
  model_piper = carrega_piper()
51
 
52
+ def tts(text, festival_voice, speaker_idx):
53
  if len(text) > MAX_TXT_LEN:
54
  text = text[:MAX_TXT_LEN]
55
  print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
 
80
 
81
  fonemes = fonemitzador.phonemize(text, keep_clause_breakers=True)
82
 
83
+ fp_festival = festival_synthesize(text, festival_voice)
84
+
85
+ return fonemes, fp_festival, fp_bsc, fp_coll, fp_piper
86
 
87
 
88
  description="""
 
113
  label="Text",
114
  value="L'Èlia i l'Alí a l'aula. L'oli i l'ou. Lulú olorava la lila.",
115
  ),
116
+ gr.Dropdown(label="Parlant del motor Festival", choices=["ona", "pau"], value="ona"),
117
+ gr.Dropdown(label="Parlant del model VITS multi-parlant del BSC", choices=SPEAKERS, value="ona")
118
  ],
119
  outputs=[
120
  gr.Markdown(label="Fonemes"),
121
+ gr.Audio(label="Festival",type="filepath"),
122
  gr.Audio(label="BSC VITS",type="filepath"),
123
  gr.Audio(label="Collectivat Fastspeech",type="filepath"),
124
  gr.Audio(label="Piper VITS",type="filepath")
festival.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- encoding: utf-8 -*-
3
+ #
4
+ # Copyright (c) 2016 Jordi Mas i Hernandez <jmas@softcatala.org>
5
+ #
6
+ # This program is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2.1 of the License, or (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this program; if not, write to the
18
+ # Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19
+ # Boston, MA 02111-1307, USA.
20
+
21
+ import subprocess
22
+ import tempfile
23
+
24
+ festival_voices = {
25
+ "ona": "voice_upc_ca_ona_hts",
26
+ "pau": "voice_upc_ca_pau_hts"
27
+ }
28
+
29
+ def _normalize(result):
30
+ mapping = {
31
+ '’' : '\'',
32
+ 'à' : 'à',
33
+ 'í' : 'í',
34
+ 'ó' : 'ó',
35
+ 'è' : 'è',
36
+ 'ò' : 'ò',
37
+ 'ú' : 'ú',
38
+ }
39
+
40
+ for char in mapping.keys():
41
+ result = result.replace(char, mapping[char])
42
+
43
+ return result
44
+
45
+
46
+ def festival_synthesize(text, voice):
47
+ if voice not in ["ona", "pau"]:
48
+ raise Error
49
+
50
+ txt2wave = '/usr/bin/text2wave'
51
+
52
+ with tempfile.NamedTemporaryFile() as encoded_file,\
53
+ tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as wave_file:
54
+
55
+ text = _normalize(text)
56
+ f = open(encoded_file.name, 'wb')
57
+ f.write(text.encode('ISO-8859-15', 'ignore'))
58
+ f.close()
59
+
60
+ cmd = '{0} -o {1} {2} -eval "({3})"'.\
61
+ format(txt2wave, wave_file.name, encoded_file.name, festival_voices[voice])
62
+ p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
63
+ p.wait()
64
+
65
+ return wave_file.name