Spaces:

Mahiruoshi
/

lovelive-ShojoKageki-vits

Running

App Files Files Community

Mahiruoshi commited on Apr 18, 2023

Commit

361bf5f

1 Parent(s): d67d0bf

Upload 11 files

Browse files

Files changed (7) hide show

text/__init__.py +3 -27
text/__pycache__/__init__.cpython-39.pyc +0 -0
text/__pycache__/cleaners.cpython-39.pyc +0 -0
text/__pycache__/japanese.cpython-39.pyc +0 -0
text/__pycache__/mandarin.cpython-39.pyc +0 -0
text/cleaners.py +57 -14
text/japanese.py +1 -13

text/__init__.py CHANGED Viewed

@@ -1,14 +1,8 @@
 """ from https://github.com/keithito/tacotron """
 from text import cleaners
-from text.symbols import symbols
-# Mappings from symbol to numeric ID and vice versa:
-_symbol_to_id = {s: i for i, s in enumerate(symbols)}
-_id_to_symbol = {i: s for i, s in enumerate(symbols)}
-def text_to_sequence(text, cleaner_names):
   '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
     Args:
       text: string to convert to a sequence
@@ -16,6 +10,8 @@ def text_to_sequence(text, cleaner_names):
     Returns:
       List of integers corresponding to the symbols in the text
   '''
   sequence = []
   clean_text = _clean_text(text, cleaner_names)
@@ -27,26 +23,6 @@ def text_to_sequence(text, cleaner_names):
   return sequence
-def cleaned_text_to_sequence(cleaned_text):
-  '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
-    Args:
-      text: string to convert to a sequence
-    Returns:
-      List of integers corresponding to the symbols in the text
-  '''
-  sequence = [_symbol_to_id[symbol] for symbol in cleaned_text if symbol in _symbol_to_id.keys()]
-  return sequence
-def sequence_to_text(sequence):
-  '''Converts a sequence of IDs back to a string'''
-  result = ''
-  for symbol_id in sequence:
-    s = _id_to_symbol[symbol_id]
-    result += s
-  return result
 def _clean_text(text, cleaner_names):
   for name in cleaner_names:
     cleaner = getattr(cleaners, name)

 """ from https://github.com/keithito/tacotron """
 from text import cleaners
+def text_to_sequence(text, symbols, cleaner_names):
   '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
     Args:
       text: string to convert to a sequence
     Returns:
       List of integers corresponding to the symbols in the text
   '''
+  _symbol_to_id = {s: i for i, s in enumerate(symbols)}
   sequence = []
   clean_text = _clean_text(text, cleaner_names)
   return sequence
 def _clean_text(text, cleaner_names):
   for name in cleaner_names:
     cleaner = getattr(cleaners, name)

text/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (1.23 kB). View file

text/__pycache__/cleaners.cpython-39.pyc ADDED Viewed

Binary file (3.74 kB). View file

text/__pycache__/japanese.cpython-39.pyc ADDED Viewed

Binary file (4.43 kB). View file

text/__pycache__/mandarin.cpython-39.pyc ADDED Viewed

Binary file (6.4 kB). View file

text/cleaners.py CHANGED Viewed

@@ -1,21 +1,33 @@
 import re
-from text.english import english_to_lazy_ipa, english_to_ipa2, english_to_lazy_ipa2
-from text.japanese import clean_japanese, japanese_to_romaji_with_accent, japanese_to_ipa, japanese_to_ipa2, japanese_to_ipa3
 from text.mandarin import number_to_chinese, chinese_to_bopomofo, latin_to_bopomofo, chinese_to_romaji, chinese_to_lazy_ipa, chinese_to_ipa, chinese_to_ipa2
-def none_cleaner(text):
-    return text
 def japanese_cleaners(text):
-    text = clean_japanese(text)
-    text = re.sub(r'([A-Za-z])$', r'\1.', text)
     return text
 def japanese_cleaners2(text):
     return japanese_cleaners(text).replace('ts', 'ʦ').replace('...', '…')
 def chinese_cleaners(text):
     '''Pipeline for Chinese text'''
     text = number_to_chinese(text)
     text = chinese_to_bopomofo(text)
     text = latin_to_bopomofo(text)
@@ -23,7 +35,10 @@ def chinese_cleaners(text):
         text += '。'
     return text
 def zh_ja_mixture_cleaners(text):
     chinese_texts = re.findall(r'\[ZH\].*?\[ZH\]', text)
     japanese_texts = re.findall(r'\[JA\].*?\[JA\]', text)
     for chinese_text in chinese_texts:
@@ -38,25 +53,53 @@ def zh_ja_mixture_cleaners(text):
         text += '.'
     return text
 def cjke_cleaners(text):
     chinese_texts = re.findall(r'\[ZH\].*?\[ZH\]', text)
     japanese_texts = re.findall(r'\[JA\].*?\[JA\]', text)
-    english_texts = re.findall(r'\[EN\].*?\[EN\]', text)
     for chinese_text in chinese_texts:
         cleaned_text = chinese_to_lazy_ipa(chinese_text[4:-4])
         cleaned_text = cleaned_text.replace(
             'ʧ', 'tʃ').replace('ʦ', 'ts').replace('ɥan', 'ɥæn')
         text = text.replace(chinese_text, cleaned_text+' ', 1)
     for japanese_text in japanese_texts:
-        cleaned_text = clean_japanese(japanese_text[4:-4])
         cleaned_text = cleaned_text.replace('ʧ', 'tʃ').replace(
             'ʦ', 'ts').replace('ɥan', 'ɥæn').replace('ʥ', 'dz')
         text = text.replace(japanese_text, cleaned_text+' ', 1)
-    for english_text in english_texts:
-        cleaned_text = english_to_ipa2(english_text[4:-4])
-        cleaned_text = cleaned_text.replace('ɑ', 'a').replace(
-            'ɔ', 'o').replace('ɛ', 'e').replace('ɪ', 'i').replace('ʊ', 'u')
-        text = text.replace(english_text, cleaned_text+' ', 1)
     text = text[:-1]
     if re.match(r'[^\.,!\?\-…~]', text[-1]):
         text += '.'

 import re
+from text.japanese import japanese_to_romaji_with_accent, japanese_to_ipa, japanese_to_ipa2, japanese_to_ipa3
 from text.mandarin import number_to_chinese, chinese_to_bopomofo, latin_to_bopomofo, chinese_to_romaji, chinese_to_lazy_ipa, chinese_to_ipa, chinese_to_ipa2
 def japanese_cleaners(text):
+    from text.japanese import japanese_to_romaji_with_accent
+    text = japanese_to_romaji_with_accent(text)
+    if re.match('[A-Za-z]', text[-1]):
+        text += '.'
     return text
 def japanese_cleaners2(text):
     return japanese_cleaners(text).replace('ts', 'ʦ').replace('...', '…')
+def korean_cleaners(text):
+    '''Pipeline for Korean text'''
+    from text.korean import latin_to_hangul, number_to_hangul, divide_hangul
+    text = latin_to_hangul(text)
+    text = number_to_hangul(text)
+    text = divide_hangul(text)
+    if re.match('[\u3131-\u3163]', text[-1]):
+        text += '.'
+    return text
 def chinese_cleaners(text):
     '''Pipeline for Chinese text'''
+    from text.mandarin import number_to_chinese, chinese_to_bopomofo, latin_to_bopomofo
     text = number_to_chinese(text)
     text = chinese_to_bopomofo(text)
     text = latin_to_bopomofo(text)
         text += '。'
     return text
 def zh_ja_mixture_cleaners(text):
+    from text.mandarin import chinese_to_romaji
+    from text.japanese import japanese_to_romaji_with_accent
     chinese_texts = re.findall(r'\[ZH\].*?\[ZH\]', text)
     japanese_texts = re.findall(r'\[JA\].*?\[JA\]', text)
     for chinese_text in chinese_texts:
         text += '.'
     return text
+def sanskrit_cleaners(text):
+    text = text.replace('॥', '।').replace('ॐ', 'ओम्')
+    if text[-1] != '।':
+        text += ' ।'
+    return text
+def cjks_cleaners(text):
+    from text.mandarin import chinese_to_lazy_ipa
+    from text.japanese import japanese_to_ipa
+    from text.korean import korean_to_lazy_ipa
+    from text.sanskrit import devanagari_to_ipa
+    chinese_texts = re.findall(r'\[ZH\].*?\[ZH\]', text)
+    japanese_texts = re.findall(r'\[JA\].*?\[JA\]', text)
+    korean_texts = re.findall(r'\[KO\].*?\[KO\]', text)
+    sanskrit_texts = re.findall(r'\[SA\].*?\[SA\]', text)
+    for chinese_text in chinese_texts:
+        cleaned_text = chinese_to_lazy_ipa(chinese_text[4:-4])
+        text = text.replace(chinese_text, cleaned_text+' ', 1)
+    for japanese_text in japanese_texts:
+        cleaned_text = japanese_to_ipa(japanese_text[4:-4])
+        text = text.replace(japanese_text, cleaned_text+' ', 1)
+    for korean_text in korean_texts:
+        cleaned_text = korean_to_lazy_ipa(korean_text[4:-4])
+        text = text.replace(korean_text, cleaned_text+' ', 1)
+    for sanskrit_text in sanskrit_texts:
+        cleaned_text = devanagari_to_ipa(sanskrit_text[4:-4])
+        text = text.replace(sanskrit_text, cleaned_text+' ', 1)
+    text = text[:-1]
+    if re.match(r'[^\.,!\?\-…~]', text[-1]):
+        text += '.'
+    return text
 def cjke_cleaners(text):
     chinese_texts = re.findall(r'\[ZH\].*?\[ZH\]', text)
     japanese_texts = re.findall(r'\[JA\].*?\[JA\]', text)
     for chinese_text in chinese_texts:
         cleaned_text = chinese_to_lazy_ipa(chinese_text[4:-4])
         cleaned_text = cleaned_text.replace(
             'ʧ', 'tʃ').replace('ʦ', 'ts').replace('ɥan', 'ɥæn')
         text = text.replace(chinese_text, cleaned_text+' ', 1)
     for japanese_text in japanese_texts:
+        cleaned_text = japanese_to_ipa(japanese_text[4:-4])
         cleaned_text = cleaned_text.replace('ʧ', 'tʃ').replace(
             'ʦ', 'ts').replace('ɥan', 'ɥæn').replace('ʥ', 'dz')
         text = text.replace(japanese_text, cleaned_text+' ', 1)
     text = text[:-1]
     if re.match(r'[^\.,!\?\-…~]', text[-1]):
         text += '.'

text/japanese.py CHANGED Viewed

@@ -1,18 +1,6 @@
 import re
 from unidecode import unidecode
-from unidecode import unidecode
-import ctypes
-dll = ctypes.cdll.LoadLibrary('cleaners/JapaneseCleaner.dll')
-dll.CreateOjt.restype = ctypes.c_uint64
-dll.PluginMain.restype = ctypes.c_uint64
-floder = ctypes.create_unicode_buffer("cleaners")
-dll.CreateOjt(floder)
-def clean_japanese(text):
-    input_wchar_pointer = ctypes.create_unicode_buffer(text)
-    result = ctypes.wstring_at(dll.PluginMain(input_wchar_pointer))
-    return result
 # Regular expression matching Japanese without punctuation marks:

 import re
 from unidecode import unidecode
+import pyopenjtalk
 # Regular expression matching Japanese without punctuation marks: