Spaces:
Runtime error
Runtime error
NTT123
commited on
Commit
•
5ec3478
1
Parent(s):
7911639
expand numbers, new examples.
Browse files- app.py +7 -1
- inference.py +1 -1
- mynumbers.py +73 -0
- requirements.txt +6 -5
- text.py +6 -1
- wavegru.ckpt +1 -1
app.py
CHANGED
@@ -33,9 +33,15 @@ gr.Interface(
|
|
33 |
fn=speak,
|
34 |
inputs="text",
|
35 |
examples=[
|
36 |
-
"
|
|
|
|
|
|
|
|
|
|
|
37 |
"October arrived, spreading a damp chill over the grounds and into the castle. Madam Pomfrey, the nurse, was kept busy by a sudden spate of colds among the staff and students.",
|
38 |
"Artificial intelligence is intelligence demonstrated by machines, as opposed to natural intelligence displayed by animals including humans.",
|
|
|
39 |
],
|
40 |
outputs="audio",
|
41 |
title=title,
|
|
|
33 |
fn=speak,
|
34 |
inputs="text",
|
35 |
examples=[
|
36 |
+
"This is a test!",
|
37 |
+
"President Trump met with other leaders at the Group of 20 conference.",
|
38 |
+
"The buses aren't the problem, they actually provide a solution.",
|
39 |
+
"Generative adversarial network or variational auto-encoder.",
|
40 |
+
"Basilar membrane and otolaryngology are not auto-correlations.",
|
41 |
+
"There are several variations on the full gated unit, with gating done using the previous hidden state and the bias in various combinations, and a simplified form called minimal gated unit.",
|
42 |
"October arrived, spreading a damp chill over the grounds and into the castle. Madam Pomfrey, the nurse, was kept busy by a sudden spate of colds among the staff and students.",
|
43 |
"Artificial intelligence is intelligence demonstrated by machines, as opposed to natural intelligence displayed by animals including humans.",
|
44 |
+
'Uncle Vernon entered the kitchen as Harry was turning over the bacon. "Comb your hair!" he barked, by way of a morning greeting. About once a week, Uncle Vernon looked over the top of his newspaper and shouted that Harry needed a haircut. Harry must have had more haircuts than the rest of the boys in his class put together, but it made no difference, his hair simply grew that way - all over the place.',
|
45 |
],
|
46 |
outputs="audio",
|
47 |
title=title,
|
inference.py
CHANGED
@@ -28,7 +28,7 @@ def load_tacotron_model(alphabet_file, config_file, model_file):
|
|
28 |
return alphabet, net, config
|
29 |
|
30 |
|
31 |
-
tacotron_inference_fn = pax.pure(lambda net, text: net.inference(text, max_len=
|
32 |
|
33 |
|
34 |
def text_to_mel(net, text, alphabet, config):
|
|
|
28 |
return alphabet, net, config
|
29 |
|
30 |
|
31 |
+
tacotron_inference_fn = pax.pure(lambda net, text: net.inference(text, max_len=1200))
|
32 |
|
33 |
|
34 |
def text_to_mel(net, text, alphabet, config):
|
mynumbers.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
""" from https://github.com/keithito/tacotron """
|
2 |
+
|
3 |
+
import inflect
|
4 |
+
import re
|
5 |
+
|
6 |
+
|
7 |
+
_inflect = inflect.engine()
|
8 |
+
_comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])")
|
9 |
+
_decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)")
|
10 |
+
_pounds_re = re.compile(r"£([0-9\,]*[0-9]+)")
|
11 |
+
_dollars_re = re.compile(r"\$([0-9\.\,]*[0-9]+)")
|
12 |
+
_ordinal_re = re.compile(r"[0-9]+(st|nd|rd|th)")
|
13 |
+
_number_re = re.compile(r"[0-9]+")
|
14 |
+
|
15 |
+
|
16 |
+
def _remove_commas(m):
|
17 |
+
return m.group(1).replace(",", "")
|
18 |
+
|
19 |
+
|
20 |
+
def _expand_decimal_point(m):
|
21 |
+
return m.group(1).replace(".", " point ")
|
22 |
+
|
23 |
+
|
24 |
+
def _expand_dollars(m):
|
25 |
+
match = m.group(1)
|
26 |
+
parts = match.split(".")
|
27 |
+
if len(parts) > 2:
|
28 |
+
return match + " dollars" # Unexpected format
|
29 |
+
dollars = int(parts[0]) if parts[0] else 0
|
30 |
+
cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
|
31 |
+
if dollars and cents:
|
32 |
+
dollar_unit = "dollar" if dollars == 1 else "dollars"
|
33 |
+
cent_unit = "cent" if cents == 1 else "cents"
|
34 |
+
return "%s %s, %s %s" % (dollars, dollar_unit, cents, cent_unit)
|
35 |
+
elif dollars:
|
36 |
+
dollar_unit = "dollar" if dollars == 1 else "dollars"
|
37 |
+
return "%s %s" % (dollars, dollar_unit)
|
38 |
+
elif cents:
|
39 |
+
cent_unit = "cent" if cents == 1 else "cents"
|
40 |
+
return "%s %s" % (cents, cent_unit)
|
41 |
+
else:
|
42 |
+
return "zero dollars"
|
43 |
+
|
44 |
+
|
45 |
+
def _expand_ordinal(m):
|
46 |
+
return _inflect.number_to_words(m.group(0))
|
47 |
+
|
48 |
+
|
49 |
+
def _expand_number(m):
|
50 |
+
num = int(m.group(0))
|
51 |
+
if num > 1000 and num < 3000:
|
52 |
+
if num == 2000:
|
53 |
+
return "two thousand"
|
54 |
+
elif num > 2000 and num < 2010:
|
55 |
+
return "two thousand " + _inflect.number_to_words(num % 100)
|
56 |
+
elif num % 100 == 0:
|
57 |
+
return _inflect.number_to_words(num // 100) + " hundred"
|
58 |
+
else:
|
59 |
+
return _inflect.number_to_words(
|
60 |
+
num, andword="", zero="oh", group=2
|
61 |
+
).replace(", ", " ")
|
62 |
+
else:
|
63 |
+
return _inflect.number_to_words(num, andword="")
|
64 |
+
|
65 |
+
|
66 |
+
def normalize_numbers(text):
|
67 |
+
text = re.sub(_comma_number_re, _remove_commas, text)
|
68 |
+
text = re.sub(_pounds_re, r"\1 pounds", text)
|
69 |
+
text = re.sub(_dollars_re, _expand_dollars, text)
|
70 |
+
text = re.sub(_decimal_number_re, _expand_decimal_point, text)
|
71 |
+
text = re.sub(_ordinal_re, _expand_ordinal, text)
|
72 |
+
text = re.sub(_number_re, _expand_number, text)
|
73 |
+
return text
|
requirements.txt
CHANGED
@@ -1,10 +1,11 @@
|
|
|
|
|
|
1 |
jax==0.3.1
|
2 |
jaxlib==0.3.0
|
3 |
-
|
4 |
librosa==0.9.1
|
|
|
5 |
pax3==0.5.6
|
6 |
-
|
7 |
-
jinja2
|
8 |
toml==0.10.2
|
9 |
-
unidecode==1.3.4
|
10 |
-
pyyaml==6.0
|
|
|
1 |
+
gradio
|
2 |
+
inflect==5.4.0
|
3 |
jax==0.3.1
|
4 |
jaxlib==0.3.0
|
5 |
+
jinja2
|
6 |
librosa==0.9.1
|
7 |
+
numpy==1.22.3
|
8 |
pax3==0.5.6
|
9 |
+
pyyaml==6.0
|
|
|
10 |
toml==0.10.2
|
11 |
+
unidecode==1.3.4
|
|
text.py
CHANGED
@@ -13,7 +13,7 @@ hyperparameter. Some cleaners are English-specific. You'll typically want to use
|
|
13 |
"""
|
14 |
|
15 |
import re
|
16 |
-
|
17 |
from unidecode import unidecode
|
18 |
|
19 |
# Regular expression matching whitespace:
|
@@ -51,6 +51,10 @@ def expand_abbreviations(text):
|
|
51 |
return text
|
52 |
|
53 |
|
|
|
|
|
|
|
|
|
54 |
def lowercase(text):
|
55 |
return text.lower()
|
56 |
|
@@ -82,6 +86,7 @@ def english_cleaners(text):
|
|
82 |
"""Pipeline for English text, including number and abbreviation expansion."""
|
83 |
text = convert_to_ascii(text)
|
84 |
text = lowercase(text)
|
|
|
85 |
text = expand_abbreviations(text)
|
86 |
text = collapse_whitespace(text)
|
87 |
return text
|
|
|
13 |
"""
|
14 |
|
15 |
import re
|
16 |
+
from mynumbers import normalize_numbers
|
17 |
from unidecode import unidecode
|
18 |
|
19 |
# Regular expression matching whitespace:
|
|
|
51 |
return text
|
52 |
|
53 |
|
54 |
+
def expand_numbers(text):
|
55 |
+
return normalize_numbers(text)
|
56 |
+
|
57 |
+
|
58 |
def lowercase(text):
|
59 |
return text.lower()
|
60 |
|
|
|
86 |
"""Pipeline for English text, including number and abbreviation expansion."""
|
87 |
text = convert_to_ascii(text)
|
88 |
text = lowercase(text)
|
89 |
+
text = expand_numbers(text)
|
90 |
text = expand_abbreviations(text)
|
91 |
text = collapse_whitespace(text)
|
92 |
return text
|
wavegru.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 58039876
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:377de02cc88587e3562bdc709d48a43d00c901198d6b018bf6e1734211036577
|
3 |
size 58039876
|