Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -13,10 +13,6 @@ import gradio as gr
|
|
13 |
model = "facebook/wav2vec2-large-960h-lv60-self"
|
14 |
tokenizer = Wav2Vec2Tokenizer.from_pretrained(model)
|
15 |
asr_model = Wav2Vec2ForCTC.from_pretrained(model)#.to('cuda')
|
16 |
-
vocab_dict = tokenizer.get_vocab()
|
17 |
-
sort_vocab = sorted((value, key) for (key,value) in vocab_dict.items())
|
18 |
-
vocab = ([x[1].replace("|", " ") if x[1] not in tokenizer.all_special_tokens else "_" for x in sort_vocab])
|
19 |
-
|
20 |
|
21 |
# Line count for SRT file
|
22 |
line_count = 0
|
@@ -27,7 +23,6 @@ def sort_alphanumeric(data):
|
|
27 |
|
28 |
return sorted(data, key = alphanum_key)
|
29 |
|
30 |
-
|
31 |
def transcribe_audio(tokenizer, asr_model, audio_file, file_handle):
|
32 |
# Run Wav2Vec2.0 inference on each audio file generated after VAD segmentation.
|
33 |
global line_count
|
@@ -49,8 +44,7 @@ def transcribe_audio(tokenizer, asr_model, audio_file, file_handle):
|
|
49 |
if len(infered_text) > 1:
|
50 |
line_count += 1
|
51 |
write_to_file(file_handle, infered_text, line_count, limits)
|
52 |
-
|
53 |
-
|
54 |
def get_subs(input_file):
|
55 |
# Get directory for audio
|
56 |
base_directory = os.getcwd()
|
|
|
13 |
model = "facebook/wav2vec2-large-960h-lv60-self"
|
14 |
tokenizer = Wav2Vec2Tokenizer.from_pretrained(model)
|
15 |
asr_model = Wav2Vec2ForCTC.from_pretrained(model)#.to('cuda')
|
|
|
|
|
|
|
|
|
16 |
|
17 |
# Line count for SRT file
|
18 |
line_count = 0
|
|
|
23 |
|
24 |
return sorted(data, key = alphanum_key)
|
25 |
|
|
|
26 |
def transcribe_audio(tokenizer, asr_model, audio_file, file_handle):
|
27 |
# Run Wav2Vec2.0 inference on each audio file generated after VAD segmentation.
|
28 |
global line_count
|
|
|
44 |
if len(infered_text) > 1:
|
45 |
line_count += 1
|
46 |
write_to_file(file_handle, infered_text, line_count, limits)
|
47 |
+
|
|
|
48 |
def get_subs(input_file):
|
49 |
# Get directory for audio
|
50 |
base_directory = os.getcwd()
|