Update app.py
Browse files
app.py
CHANGED
@@ -58,32 +58,49 @@ def convert_audio(audio_filepath, tmpdir, utt_id):
|
|
58 |
asr_pipeline = pipeline("automatic-speech-recognition", model="nvidia/canary-1b")
|
59 |
|
60 |
def transcribe(audio_filepath, src_lang, tgt_lang, pnc):
|
61 |
-
if audio_filepath is None:
|
62 |
-
raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
"source_lang": src_lang,
|
71 |
-
"target_lang": tgt_lang,
|
72 |
-
"taskname": "asr", # Setting taskname to "asr"
|
73 |
-
"pnc": pnc,
|
74 |
-
"answer": "predict"
|
75 |
-
}
|
76 |
-
|
77 |
-
manifest_filepath = os.path.join(tmpdir, 'config.json')
|
78 |
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
-
|
83 |
-
transcribed_text = asr_pipeline(audio_filepath)
|
84 |
-
output_text = transcribed_text[0]['transcription']
|
85 |
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
|
89 |
|
|
|
58 |
asr_pipeline = pipeline("automatic-speech-recognition", model="nvidia/canary-1b")
|
59 |
|
60 |
def transcribe(audio_filepath, src_lang, tgt_lang, pnc):
|
|
|
|
|
61 |
|
62 |
+
if audio_filepath is None:
|
63 |
+
raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
|
64 |
+
|
65 |
+
utt_id = uuid.uuid4()
|
66 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
67 |
+
converted_audio_filepath, duration = convert_audio(audio_filepath, tmpdir, str(utt_id))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
+
# make manifest file and save
|
70 |
+
manifest_data = {
|
71 |
+
"audio_filepath": converted_audio_filepath,
|
72 |
+
"source_lang": src_lang,
|
73 |
+
"target_lang": tgt_lang,
|
74 |
+
"taskname": taskname,
|
75 |
+
"pnc": pnc,
|
76 |
+
"answer": "predict",
|
77 |
+
"duration": str(duration),
|
78 |
+
}
|
79 |
|
80 |
+
manifest_filepath = os.path.join(tmpdir, f'{utt_id}.json')
|
|
|
|
|
81 |
|
82 |
+
with open(manifest_filepath, 'w') as fout:
|
83 |
+
line = json.dumps(manifest_data)
|
84 |
+
fout.write(line + '\n')
|
85 |
+
|
86 |
+
# call transcribe, passing in manifest filepath
|
87 |
+
if duration < 40:
|
88 |
+
output_text = model.transcribe(manifest_filepath)[0]
|
89 |
+
else: # do buffered inference
|
90 |
+
with torch.cuda.amp.autocast(dtype=amp_dtype): # TODO: make it work if no cuda
|
91 |
+
with torch.no_grad():
|
92 |
+
hyps = get_buffered_pred_feat_multitaskAED(
|
93 |
+
frame_asr,
|
94 |
+
model.cfg.preprocessor,
|
95 |
+
model_stride_in_secs,
|
96 |
+
model.device,
|
97 |
+
manifest=manifest_filepath,
|
98 |
+
filepaths=None,
|
99 |
+
)
|
100 |
+
|
101 |
+
output_text = hyps[0].text
|
102 |
+
|
103 |
+
return output_text
|
104 |
|
105 |
|
106 |
|