Spaces:
Running
on
Zero
Running
on
Zero
Naozumi0512
commited on
Commit
·
06ba675
1
Parent(s):
ca6ebec
Fix cleaner
Browse files
oldVersion/V101/text/chinese.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import re
|
2 |
import cn2an
|
3 |
-
import ToJyutping
|
4 |
|
|
|
5 |
from .symbols import punctuation
|
6 |
|
7 |
normalizer = lambda x: cn2an.transform(x, "an2cn")
|
@@ -90,7 +90,7 @@ rep_map = {
|
|
90 |
|
91 |
|
92 |
def replace_punctuation(text):
|
93 |
-
text = text.replace("嗯", "恩").replace("呣", "母")
|
94 |
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
95 |
|
96 |
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
@@ -152,13 +152,9 @@ def jyuping_to_initials_finals_tones(jyuping_syllables):
|
|
152 |
|
153 |
|
154 |
def get_jyutping(text):
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
result.append(jp)
|
159 |
-
else:
|
160 |
-
result.append(char)
|
161 |
-
return result
|
162 |
|
163 |
|
164 |
def get_bert_feature(text, word2ph):
|
|
|
1 |
import re
|
2 |
import cn2an
|
|
|
3 |
|
4 |
+
from pyjyutping import jyutping
|
5 |
from .symbols import punctuation
|
6 |
|
7 |
normalizer = lambda x: cn2an.transform(x, "an2cn")
|
|
|
90 |
|
91 |
|
92 |
def replace_punctuation(text):
|
93 |
+
# text = text.replace("嗯", "恩").replace("呣", "母")
|
94 |
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
95 |
|
96 |
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
|
|
152 |
|
153 |
|
154 |
def get_jyutping(text):
|
155 |
+
jp = jyutping.convert(text)
|
156 |
+
jp_array = jp.split()
|
157 |
+
return jp_array
|
|
|
|
|
|
|
|
|
158 |
|
159 |
|
160 |
def get_bert_feature(text, word2ph):
|
oldVersion/V110/text/chinese.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import re
|
2 |
import cn2an
|
3 |
-
import ToJyutping
|
4 |
|
|
|
5 |
from .symbols import punctuation
|
6 |
|
7 |
normalizer = lambda x: cn2an.transform(x, "an2cn")
|
@@ -90,7 +90,7 @@ rep_map = {
|
|
90 |
|
91 |
|
92 |
def replace_punctuation(text):
|
93 |
-
text = text.replace("嗯", "恩").replace("呣", "母")
|
94 |
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
95 |
|
96 |
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
@@ -152,13 +152,9 @@ def jyuping_to_initials_finals_tones(jyuping_syllables):
|
|
152 |
|
153 |
|
154 |
def get_jyutping(text):
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
result.append(jp)
|
159 |
-
else:
|
160 |
-
result.append(char)
|
161 |
-
return result
|
162 |
|
163 |
|
164 |
def get_bert_feature(text, word2ph):
|
|
|
1 |
import re
|
2 |
import cn2an
|
|
|
3 |
|
4 |
+
from pyjyutping import jyutping
|
5 |
from .symbols import punctuation
|
6 |
|
7 |
normalizer = lambda x: cn2an.transform(x, "an2cn")
|
|
|
90 |
|
91 |
|
92 |
def replace_punctuation(text):
|
93 |
+
# text = text.replace("嗯", "恩").replace("呣", "母")
|
94 |
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
95 |
|
96 |
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
|
|
152 |
|
153 |
|
154 |
def get_jyutping(text):
|
155 |
+
jp = jyutping.convert(text)
|
156 |
+
jp_array = jp.split()
|
157 |
+
return jp_array
|
|
|
|
|
|
|
|
|
158 |
|
159 |
|
160 |
def get_bert_feature(text, word2ph):
|
oldVersion/V111/text/chinese.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import re
|
2 |
import cn2an
|
3 |
-
import ToJyutping
|
4 |
|
|
|
5 |
from .symbols import punctuation
|
6 |
|
7 |
normalizer = lambda x: cn2an.transform(x, "an2cn")
|
@@ -90,7 +90,7 @@ rep_map = {
|
|
90 |
|
91 |
|
92 |
def replace_punctuation(text):
|
93 |
-
text = text.replace("嗯", "恩").replace("呣", "母")
|
94 |
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
95 |
|
96 |
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
@@ -152,13 +152,9 @@ def jyuping_to_initials_finals_tones(jyuping_syllables):
|
|
152 |
|
153 |
|
154 |
def get_jyutping(text):
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
result.append(jp)
|
159 |
-
else:
|
160 |
-
result.append(char)
|
161 |
-
return result
|
162 |
|
163 |
|
164 |
def get_bert_feature(text, word2ph):
|
|
|
1 |
import re
|
2 |
import cn2an
|
|
|
3 |
|
4 |
+
from pyjyutping import jyutping
|
5 |
from .symbols import punctuation
|
6 |
|
7 |
normalizer = lambda x: cn2an.transform(x, "an2cn")
|
|
|
90 |
|
91 |
|
92 |
def replace_punctuation(text):
|
93 |
+
# text = text.replace("嗯", "恩").replace("呣", "母")
|
94 |
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
95 |
|
96 |
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
|
|
152 |
|
153 |
|
154 |
def get_jyutping(text):
|
155 |
+
jp = jyutping.convert(text)
|
156 |
+
jp_array = jp.split()
|
157 |
+
return jp_array
|
|
|
|
|
|
|
|
|
158 |
|
159 |
|
160 |
def get_bert_feature(text, word2ph):
|
oldVersion/V200/text/chinese.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import re
|
2 |
import cn2an
|
3 |
-
import ToJyutping
|
4 |
|
|
|
5 |
from .symbols import punctuation
|
6 |
|
7 |
normalizer = lambda x: cn2an.transform(x, "an2cn")
|
@@ -90,7 +90,7 @@ rep_map = {
|
|
90 |
|
91 |
|
92 |
def replace_punctuation(text):
|
93 |
-
text = text.replace("嗯", "恩").replace("呣", "母")
|
94 |
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
95 |
|
96 |
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
@@ -152,13 +152,9 @@ def jyuping_to_initials_finals_tones(jyuping_syllables):
|
|
152 |
|
153 |
|
154 |
def get_jyutping(text):
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
result.append(jp)
|
159 |
-
else:
|
160 |
-
result.append(char)
|
161 |
-
return result
|
162 |
|
163 |
|
164 |
def get_bert_feature(text, word2ph):
|
|
|
1 |
import re
|
2 |
import cn2an
|
|
|
3 |
|
4 |
+
from pyjyutping import jyutping
|
5 |
from .symbols import punctuation
|
6 |
|
7 |
normalizer = lambda x: cn2an.transform(x, "an2cn")
|
|
|
90 |
|
91 |
|
92 |
def replace_punctuation(text):
|
93 |
+
# text = text.replace("嗯", "恩").replace("呣", "母")
|
94 |
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
95 |
|
96 |
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
|
|
152 |
|
153 |
|
154 |
def get_jyutping(text):
|
155 |
+
jp = jyutping.convert(text)
|
156 |
+
jp_array = jp.split()
|
157 |
+
return jp_array
|
|
|
|
|
|
|
|
|
158 |
|
159 |
|
160 |
def get_bert_feature(text, word2ph):
|
oldVersion/V210/text/chinese.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import re
|
2 |
import cn2an
|
3 |
-
import ToJyutping
|
4 |
|
|
|
5 |
from .symbols import punctuation
|
6 |
|
7 |
normalizer = lambda x: cn2an.transform(x, "an2cn")
|
@@ -90,7 +90,7 @@ rep_map = {
|
|
90 |
|
91 |
|
92 |
def replace_punctuation(text):
|
93 |
-
text = text.replace("嗯", "恩").replace("呣", "母")
|
94 |
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
95 |
|
96 |
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
@@ -152,13 +152,9 @@ def jyuping_to_initials_finals_tones(jyuping_syllables):
|
|
152 |
|
153 |
|
154 |
def get_jyutping(text):
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
result.append(jp)
|
159 |
-
else:
|
160 |
-
result.append(char)
|
161 |
-
return result
|
162 |
|
163 |
|
164 |
def get_bert_feature(text, word2ph):
|
|
|
1 |
import re
|
2 |
import cn2an
|
|
|
3 |
|
4 |
+
from pyjyutping import jyutping
|
5 |
from .symbols import punctuation
|
6 |
|
7 |
normalizer = lambda x: cn2an.transform(x, "an2cn")
|
|
|
90 |
|
91 |
|
92 |
def replace_punctuation(text):
|
93 |
+
# text = text.replace("嗯", "恩").replace("呣", "母")
|
94 |
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
95 |
|
96 |
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
|
|
152 |
|
153 |
|
154 |
def get_jyutping(text):
|
155 |
+
jp = jyutping.convert(text)
|
156 |
+
jp_array = jp.split()
|
157 |
+
return jp_array
|
|
|
|
|
|
|
|
|
158 |
|
159 |
|
160 |
def get_bert_feature(text, word2ph):
|
text/chinese.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import re
|
2 |
import cn2an
|
3 |
-
import ToJyutping
|
4 |
|
|
|
5 |
from text.symbols import punctuation
|
6 |
|
7 |
normalizer = lambda x: cn2an.transform(x, "an2cn")
|
@@ -152,13 +152,9 @@ def jyuping_to_initials_finals_tones(jyuping_syllables):
|
|
152 |
|
153 |
|
154 |
def get_jyutping(text):
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
result.append(jp)
|
159 |
-
else:
|
160 |
-
result.append(char)
|
161 |
-
return result
|
162 |
|
163 |
|
164 |
def get_bert_feature(text, word2ph):
|
|
|
1 |
import re
|
2 |
import cn2an
|
|
|
3 |
|
4 |
+
from pyjyutping import jyutping
|
5 |
from text.symbols import punctuation
|
6 |
|
7 |
normalizer = lambda x: cn2an.transform(x, "an2cn")
|
|
|
152 |
|
153 |
|
154 |
def get_jyutping(text):
|
155 |
+
jp = jyutping.convert(text)
|
156 |
+
jp_array = jp.split()
|
157 |
+
return jp_array
|
|
|
|
|
|
|
|
|
158 |
|
159 |
|
160 |
def get_bert_feature(text, word2ph):
|