Spaces:

jiuuee
/

my-alexa

Runtime error

jiuuee commited on May 2

Commit

b3d14b2

•

1 Parent(s): 6b9a6d0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -58,32 +58,49 @@ def convert_audio(audio_filepath, tmpdir, utt_id):
 asr_pipeline = pipeline("automatic-speech-recognition", model="nvidia/canary-1b")
 def transcribe(audio_filepath, src_lang, tgt_lang, pnc):
-    if audio_filepath is None:
-        raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
-    utt_id = uuid.uuid4()
-    with tempfile.TemporaryDirectory() as tmpdir:
-        # Make manifest file and save
-        manifest_data = {
-            "audio_filepath": audio_filepath,
-            "source_lang": src_lang,
-            "target_lang": tgt_lang,
-            "taskname": "asr",  # Setting taskname to "asr"
-            "pnc": pnc,
-            "answer": "predict"
-        }
-        manifest_filepath = os.path.join(tmpdir, 'config.json')
-        with open(manifest_filepath, 'w') as fout:
-            json.dump(manifest_data, fout)
-        # Transcribe audio using ASR pipeline
-        transcribed_text = asr_pipeline(audio_filepath)
-        output_text = transcribed_text[0]['transcription']
-    return output_text

 asr_pipeline = pipeline("automatic-speech-recognition", model="nvidia/canary-1b")
 def transcribe(audio_filepath, src_lang, tgt_lang, pnc):
+	if audio_filepath is None:
+		raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
+	utt_id = uuid.uuid4()
+	with tempfile.TemporaryDirectory() as tmpdir:
+		converted_audio_filepath, duration = convert_audio(audio_filepath, tmpdir, str(utt_id))
+		# make manifest file and save
+		manifest_data = {
+			"audio_filepath": converted_audio_filepath,
+			"source_lang": src_lang,
+			"target_lang": tgt_lang,
+			"taskname": taskname,
+			"pnc": pnc,
+			"answer": "predict",
+			"duration": str(duration),
+		}
+		manifest_filepath = os.path.join(tmpdir, f'{utt_id}.json')
+		with open(manifest_filepath, 'w') as fout:
+			line = json.dumps(manifest_data)
+			fout.write(line + '\n')
+		# call transcribe, passing in manifest filepath
+		if duration < 40:
+			output_text = model.transcribe(manifest_filepath)[0]
+		else: # do buffered inference
+			with torch.cuda.amp.autocast(dtype=amp_dtype): # TODO: make it work if no cuda
+				with torch.no_grad():
+					hyps = get_buffered_pred_feat_multitaskAED(
+						frame_asr,
+						model.cfg.preprocessor,
+						model_stride_in_secs,
+						model.device,
+						manifest=manifest_filepath,
+						filepaths=None,
+					)
+					output_text = hyps[0].text
+	return output_text