patrickvonplaten commited on
Commit
2057efe
1 Parent(s): 3141b41

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -48,7 +48,7 @@ model_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
48
  device = "cuda"
49
  processor_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
50
 
51
- chars_to_ignore_regex = r"[¥•"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、 、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·'℃°•·.﹑︰〈〉─《﹖﹣﹂﹁﹔!?。。"#$%&'()*+,﹐-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏..!\\"#$%&()*+,\\-.\\:;<=>?@\\[\\]\\\\\\/^_`{|}~]"
52
 
53
  model = Wav2Vec2ForCTC.from_pretrained(model_name).to(device)
54
  processor = Wav2Vec2Processor.from_pretrained(processor_name)
@@ -113,7 +113,7 @@ model_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
113
  device = "cuda"
114
  processor_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
115
 
116
- chars_to_ignore_regex = r"[¥•"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、 、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·'℃°•·.﹑︰〈〉─《﹖﹣﹂﹁﹔!?。。"#$%&'()*+,﹐-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏..!\\"#$%&()*+,\\-.\\:;<=>?@\\[\\]\\\\\\/^_`{|}~]"
117
 
118
  model = Wav2Vec2ForCTC.from_pretrained(model_name).to(device)
119
  processor = Wav2Vec2Processor.from_pretrained(processor_name)
@@ -170,7 +170,7 @@ from transformers import AutoTokenizer, AutoModelWithLMHead
170
  model_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
171
  device = "cuda"
172
  processor_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
173
- chars_to_ignore_regex = r"[¥•"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、 、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·'℃°•·.﹑︰〈〉─《﹖﹣﹂﹁﹔!?。。"#$%&'()*+,﹐-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏..!\\"#$%&()*+,\\-.\\:;<=>?@\\[\\]\\\\\\/^_`{|}~]"
174
 
175
  tokenizer = AutoTokenizer.from_pretrained("ckiplab/gpt2-base-chinese")
176
  gpt_model = AutoModelWithLMHead.from_pretrained("ckiplab/gpt2-base-chinese").to(device)
 
48
  device = "cuda"
49
  processor_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
50
 
51
+ chars_to_ignore_regex = r"[¥•"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、 、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·'℃°•·.﹑︰〈〉─《﹖﹣﹂﹁﹔!?。。"#$%&'()*+,﹐-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏..!\\\\"#$%&()*+,\\\\-.\\\\:;<=>?@\\\\[\\\\]\\\\\\\\\\\\/^_`{|}~]"
52
 
53
  model = Wav2Vec2ForCTC.from_pretrained(model_name).to(device)
54
  processor = Wav2Vec2Processor.from_pretrained(processor_name)
 
113
  device = "cuda"
114
  processor_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
115
 
116
+ chars_to_ignore_regex = r"[¥•"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、 、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·'℃°•·.﹑︰〈〉─《﹖﹣﹂﹁﹔!?。。"#$%&'()*+,﹐-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏..!\\\\"#$%&()*+,\\\\-.\\\\:;<=>?@\\\\[\\\\]\\\\\\\\\\\\/^_`{|}~]"
117
 
118
  model = Wav2Vec2ForCTC.from_pretrained(model_name).to(device)
119
  processor = Wav2Vec2Processor.from_pretrained(processor_name)
 
170
  model_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
171
  device = "cuda"
172
  processor_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
173
+ chars_to_ignore_regex = r"""[¥•"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、 、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·'℃°•·.﹑︰〈〉─《﹖﹣﹂﹁﹔!?。。"#$%&'()*+,﹐-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏..!\\\\"#$%&()*+,\\\\-.\\\\:;<=>?@\\\\[\\\\]\\\\\\\\\\\\/^_`{|}~]"""
174
 
175
  tokenizer = AutoTokenizer.from_pretrained("ckiplab/gpt2-base-chinese")
176
  gpt_model = AutoModelWithLMHead.from_pretrained("ckiplab/gpt2-base-chinese").to(device)