Respair commited on
Commit
dca5d56
1 Parent(s): 17570d9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -105
README.md CHANGED
@@ -27,112 +27,9 @@ more accurate representation for Japanese.
27
 
28
  Don't use this model without the post processing functions I wrote below, or you'll get less than ideal performance. check the notebook.
29
 
30
- ## Inference and Post-proc (Highly recommended to check the notebook below!)
31
 
32
- ```python
33
-
34
- # this function was borrowed and modified from Aaron Yinghao Li, the Author of StyleTTS paper.
35
-
36
- from datasets import Dataset, Audio
37
- from transformers import WhisperProcessor, WhisperForConditionalGeneration
38
- import re
39
- import pykakasi
40
-
41
- kana_mapper = dict([
42
- ("ゔぁ","ba"),
43
- .
44
- .
45
- .
46
- etc. # Take a look at the Notebook for the whole code
47
- ("ぉ"," o"),
48
- ("ゎ"," ɯa"),
49
- ("ぉ"," o"),
50
-
51
- ("を","o")
52
- ])
53
-
54
-
55
- def post_fix(text):
56
- orig = text
57
-
58
- for k, v in kana_mapper.items():
59
- text = text.replace(k, v)
60
-
61
- return text
62
-
63
-
64
- processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3")
65
- model = WhisperForConditionalGeneration.from_pretrained("Respair/Hibiki_ASR_Phonemizer").to("cuda:0")
66
-
67
- forced_decoder_ids = processor.get_decoder_prompt_ids(task="transcribe", language='japanese')
68
-
69
-
70
-
71
-
72
- def convert_to_kana(text):
73
- kks = pykakasi.kakasi()
74
-
75
-
76
- def convert_word(word):
77
- result = kks.convert(word)
78
- return ''.join(item['hira'] for item in result)
79
-
80
-
81
- parts = re.split(r'([^\u3000-\u30ff\u3400-\u4dbf\u4e00-\u9fff]+)', text)
82
-
83
-
84
- converted_parts = [convert_word(part) if re.match(r'[\u3000-\u30ff\u3400-\u4dbf\u4e00-\u9fff]', part) else part for part in parts]
85
-
86
- return ''.join(converted_parts)
87
-
88
-
89
- sample = Dataset.from_dict({"audio": ["/content/kl_chunk1987.wav"]}).cast_column("audio", Audio(16000))
90
- sample = sample[0]['audio']
91
-
92
- # Ensure the input features are on the same device as the model
93
- input_features = processor(sample["array"], sampling_rate=sample["sampling_rate"], return_tensors="pt").input_features.to("cuda:0")
94
-
95
- # generate token ids
96
- predicted_ids = model.generate(input_features,forced_decoder_ids=forced_decoder_ids, repetition_penalty=1.2)
97
- # decode token ids to text
98
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
99
-
100
-
101
- # You can add your final adjustments here, it's better to write a dict though, but I'm just giving you a quick demonstration here.
102
-
103
- if ' neɽitai ' in transcription[0]:
104
- transcription[0] = transcription[0].replace(' neɽitai ', "naɽitai")
105
-
106
- if 'harɯdʑisama' in transcription[0]:
107
- transcription[0] = transcription[0].replace('harɯdʑisama', "arɯdʑisama")
108
-
109
-
110
- if 'tɕabiʔto' in transcription[0]:
111
- transcription[0] = transcription[0].replace('tɕabiʔto', "tɕabiʔto")
112
-
113
-
114
- if "ki ni ɕinai" in transcription[0]:
115
- transcription[0] = re.sub(r'(?<!\s)ki ni ɕinai', r' ki ni ɕinai', transcription[0])
116
-
117
- if 'ʔt' in transcription[0]:
118
- transcription[0] = re.sub(r'(?<!\s)ʔt', r'ʔt', transcription[0])
119
-
120
- if 'de aɽoɯ' in transcription[0]:
121
- transcription[0] = re.sub(r'(?<!\s)de aɽoɯ', r' de aɽoɯ', transcription[0])
122
-
123
- if ".ʔ" in transcription[0]:
124
- transcription[0] = transcription[0].replace(".ʔ","..")
125
-
126
- if "ʔ." in transcription[0]:
127
- transcription[0] = transcription[0].replace("ʔ.",".")
128
-
129
- transcription[0] = convert_to_kana(transcription[0]) # Ensuring the model won't hallucinate and accidentally return kana / kanji.
130
-
131
- post_fix(transcription[0].lstrip())
132
-
133
- ```
134
-
135
- the Full code -> [Notebook](https://colab.research.google.com/drive/13tx8WKzkvePFdtKU4WUE_iYyYCqTY8dZ#scrollTo=5XqUs-sPdT79)
136
 
137
  ## Intended uses & limitations
138
 
 
27
 
28
  Don't use this model without the post processing functions I wrote below, or you'll get less than ideal performance. check the notebook.
29
 
30
+ ## Inference and Post-proc
31
 
32
+ Check here -> [Notebook](https://colab.research.google.com/drive/13tx8WKzkvePFdtKU4WUE_iYyYCqTY8dZ#scrollTo=5XqUs-sPdT79)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  ## Intended uses & limitations
35