Naozumi0512 commited on
Commit
06ba675
·
1 Parent(s): ca6ebec

Fix cleaner

Browse files
oldVersion/V101/text/chinese.py CHANGED
@@ -1,7 +1,7 @@
1
  import re
2
  import cn2an
3
- import ToJyutping
4
 
 
5
  from .symbols import punctuation
6
 
7
  normalizer = lambda x: cn2an.transform(x, "an2cn")
@@ -90,7 +90,7 @@ rep_map = {
90
 
91
 
92
  def replace_punctuation(text):
93
- text = text.replace("嗯", "恩").replace("呣", "母")
94
  pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
95
 
96
  replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
@@ -152,13 +152,9 @@ def jyuping_to_initials_finals_tones(jyuping_syllables):
152
 
153
 
154
  def get_jyutping(text):
155
- result = []
156
- for char, jp in ToJyutping.get_jyutping_list(text):
157
- if jp:
158
- result.append(jp)
159
- else:
160
- result.append(char)
161
- return result
162
 
163
 
164
  def get_bert_feature(text, word2ph):
 
1
  import re
2
  import cn2an
 
3
 
4
+ from pyjyutping import jyutping
5
  from .symbols import punctuation
6
 
7
  normalizer = lambda x: cn2an.transform(x, "an2cn")
 
90
 
91
 
92
  def replace_punctuation(text):
93
+ # text = text.replace("嗯", "恩").replace("呣", "母")
94
  pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
95
 
96
  replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
 
152
 
153
 
154
  def get_jyutping(text):
155
+ jp = jyutping.convert(text)
156
+ jp_array = jp.split()
157
+ return jp_array
 
 
 
 
158
 
159
 
160
  def get_bert_feature(text, word2ph):
oldVersion/V110/text/chinese.py CHANGED
@@ -1,7 +1,7 @@
1
  import re
2
  import cn2an
3
- import ToJyutping
4
 
 
5
  from .symbols import punctuation
6
 
7
  normalizer = lambda x: cn2an.transform(x, "an2cn")
@@ -90,7 +90,7 @@ rep_map = {
90
 
91
 
92
  def replace_punctuation(text):
93
- text = text.replace("嗯", "恩").replace("呣", "母")
94
  pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
95
 
96
  replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
@@ -152,13 +152,9 @@ def jyuping_to_initials_finals_tones(jyuping_syllables):
152
 
153
 
154
  def get_jyutping(text):
155
- result = []
156
- for char, jp in ToJyutping.get_jyutping_list(text):
157
- if jp:
158
- result.append(jp)
159
- else:
160
- result.append(char)
161
- return result
162
 
163
 
164
  def get_bert_feature(text, word2ph):
 
1
  import re
2
  import cn2an
 
3
 
4
+ from pyjyutping import jyutping
5
  from .symbols import punctuation
6
 
7
  normalizer = lambda x: cn2an.transform(x, "an2cn")
 
90
 
91
 
92
  def replace_punctuation(text):
93
+ # text = text.replace("嗯", "恩").replace("呣", "母")
94
  pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
95
 
96
  replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
 
152
 
153
 
154
  def get_jyutping(text):
155
+ jp = jyutping.convert(text)
156
+ jp_array = jp.split()
157
+ return jp_array
 
 
 
 
158
 
159
 
160
  def get_bert_feature(text, word2ph):
oldVersion/V111/text/chinese.py CHANGED
@@ -1,7 +1,7 @@
1
  import re
2
  import cn2an
3
- import ToJyutping
4
 
 
5
  from .symbols import punctuation
6
 
7
  normalizer = lambda x: cn2an.transform(x, "an2cn")
@@ -90,7 +90,7 @@ rep_map = {
90
 
91
 
92
  def replace_punctuation(text):
93
- text = text.replace("嗯", "恩").replace("呣", "母")
94
  pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
95
 
96
  replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
@@ -152,13 +152,9 @@ def jyuping_to_initials_finals_tones(jyuping_syllables):
152
 
153
 
154
  def get_jyutping(text):
155
- result = []
156
- for char, jp in ToJyutping.get_jyutping_list(text):
157
- if jp:
158
- result.append(jp)
159
- else:
160
- result.append(char)
161
- return result
162
 
163
 
164
  def get_bert_feature(text, word2ph):
 
1
  import re
2
  import cn2an
 
3
 
4
+ from pyjyutping import jyutping
5
  from .symbols import punctuation
6
 
7
  normalizer = lambda x: cn2an.transform(x, "an2cn")
 
90
 
91
 
92
  def replace_punctuation(text):
93
+ # text = text.replace("嗯", "恩").replace("呣", "母")
94
  pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
95
 
96
  replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
 
152
 
153
 
154
  def get_jyutping(text):
155
+ jp = jyutping.convert(text)
156
+ jp_array = jp.split()
157
+ return jp_array
 
 
 
 
158
 
159
 
160
  def get_bert_feature(text, word2ph):
oldVersion/V200/text/chinese.py CHANGED
@@ -1,7 +1,7 @@
1
  import re
2
  import cn2an
3
- import ToJyutping
4
 
 
5
  from .symbols import punctuation
6
 
7
  normalizer = lambda x: cn2an.transform(x, "an2cn")
@@ -90,7 +90,7 @@ rep_map = {
90
 
91
 
92
  def replace_punctuation(text):
93
- text = text.replace("嗯", "恩").replace("呣", "母")
94
  pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
95
 
96
  replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
@@ -152,13 +152,9 @@ def jyuping_to_initials_finals_tones(jyuping_syllables):
152
 
153
 
154
  def get_jyutping(text):
155
- result = []
156
- for char, jp in ToJyutping.get_jyutping_list(text):
157
- if jp:
158
- result.append(jp)
159
- else:
160
- result.append(char)
161
- return result
162
 
163
 
164
  def get_bert_feature(text, word2ph):
 
1
  import re
2
  import cn2an
 
3
 
4
+ from pyjyutping import jyutping
5
  from .symbols import punctuation
6
 
7
  normalizer = lambda x: cn2an.transform(x, "an2cn")
 
90
 
91
 
92
  def replace_punctuation(text):
93
+ # text = text.replace("嗯", "恩").replace("呣", "母")
94
  pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
95
 
96
  replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
 
152
 
153
 
154
  def get_jyutping(text):
155
+ jp = jyutping.convert(text)
156
+ jp_array = jp.split()
157
+ return jp_array
 
 
 
 
158
 
159
 
160
  def get_bert_feature(text, word2ph):
oldVersion/V210/text/chinese.py CHANGED
@@ -1,7 +1,7 @@
1
  import re
2
  import cn2an
3
- import ToJyutping
4
 
 
5
  from .symbols import punctuation
6
 
7
  normalizer = lambda x: cn2an.transform(x, "an2cn")
@@ -90,7 +90,7 @@ rep_map = {
90
 
91
 
92
  def replace_punctuation(text):
93
- text = text.replace("嗯", "恩").replace("呣", "母")
94
  pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
95
 
96
  replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
@@ -152,13 +152,9 @@ def jyuping_to_initials_finals_tones(jyuping_syllables):
152
 
153
 
154
  def get_jyutping(text):
155
- result = []
156
- for char, jp in ToJyutping.get_jyutping_list(text):
157
- if jp:
158
- result.append(jp)
159
- else:
160
- result.append(char)
161
- return result
162
 
163
 
164
  def get_bert_feature(text, word2ph):
 
1
  import re
2
  import cn2an
 
3
 
4
+ from pyjyutping import jyutping
5
  from .symbols import punctuation
6
 
7
  normalizer = lambda x: cn2an.transform(x, "an2cn")
 
90
 
91
 
92
  def replace_punctuation(text):
93
+ # text = text.replace("嗯", "恩").replace("呣", "母")
94
  pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
95
 
96
  replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
 
152
 
153
 
154
  def get_jyutping(text):
155
+ jp = jyutping.convert(text)
156
+ jp_array = jp.split()
157
+ return jp_array
 
 
 
 
158
 
159
 
160
  def get_bert_feature(text, word2ph):
text/chinese.py CHANGED
@@ -1,7 +1,7 @@
1
  import re
2
  import cn2an
3
- import ToJyutping
4
 
 
5
  from text.symbols import punctuation
6
 
7
  normalizer = lambda x: cn2an.transform(x, "an2cn")
@@ -152,13 +152,9 @@ def jyuping_to_initials_finals_tones(jyuping_syllables):
152
 
153
 
154
  def get_jyutping(text):
155
- result = []
156
- for char, jp in ToJyutping.get_jyutping_list(text):
157
- if jp:
158
- result.append(jp)
159
- else:
160
- result.append(char)
161
- return result
162
 
163
 
164
  def get_bert_feature(text, word2ph):
 
1
  import re
2
  import cn2an
 
3
 
4
+ from pyjyutping import jyutping
5
  from text.symbols import punctuation
6
 
7
  normalizer = lambda x: cn2an.transform(x, "an2cn")
 
152
 
153
 
154
  def get_jyutping(text):
155
+ jp = jyutping.convert(text)
156
+ jp_array = jp.split()
157
+ return jp_array
 
 
 
 
158
 
159
 
160
  def get_bert_feature(text, word2ph):