jiuuee commited on
Commit
b3d14b2
1 Parent(s): 6b9a6d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -22
app.py CHANGED
@@ -58,32 +58,49 @@ def convert_audio(audio_filepath, tmpdir, utt_id):
58
  asr_pipeline = pipeline("automatic-speech-recognition", model="nvidia/canary-1b")
59
 
60
  def transcribe(audio_filepath, src_lang, tgt_lang, pnc):
61
- if audio_filepath is None:
62
- raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
63
 
64
- utt_id = uuid.uuid4()
65
-
66
- with tempfile.TemporaryDirectory() as tmpdir:
67
- # Make manifest file and save
68
- manifest_data = {
69
- "audio_filepath": audio_filepath,
70
- "source_lang": src_lang,
71
- "target_lang": tgt_lang,
72
- "taskname": "asr", # Setting taskname to "asr"
73
- "pnc": pnc,
74
- "answer": "predict"
75
- }
76
-
77
- manifest_filepath = os.path.join(tmpdir, 'config.json')
78
 
79
- with open(manifest_filepath, 'w') as fout:
80
- json.dump(manifest_data, fout)
 
 
 
 
 
 
 
 
81
 
82
- # Transcribe audio using ASR pipeline
83
- transcribed_text = asr_pipeline(audio_filepath)
84
- output_text = transcribed_text[0]['transcription']
85
 
86
- return output_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
 
89
 
 
58
  asr_pipeline = pipeline("automatic-speech-recognition", model="nvidia/canary-1b")
59
 
60
  def transcribe(audio_filepath, src_lang, tgt_lang, pnc):
 
 
61
 
62
+ if audio_filepath is None:
63
+ raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
64
+
65
+ utt_id = uuid.uuid4()
66
+ with tempfile.TemporaryDirectory() as tmpdir:
67
+ converted_audio_filepath, duration = convert_audio(audio_filepath, tmpdir, str(utt_id))
 
 
 
 
 
 
 
 
68
 
69
+ # make manifest file and save
70
+ manifest_data = {
71
+ "audio_filepath": converted_audio_filepath,
72
+ "source_lang": src_lang,
73
+ "target_lang": tgt_lang,
74
+ "taskname": taskname,
75
+ "pnc": pnc,
76
+ "answer": "predict",
77
+ "duration": str(duration),
78
+ }
79
 
80
+ manifest_filepath = os.path.join(tmpdir, f'{utt_id}.json')
 
 
81
 
82
+ with open(manifest_filepath, 'w') as fout:
83
+ line = json.dumps(manifest_data)
84
+ fout.write(line + '\n')
85
+
86
+ # call transcribe, passing in manifest filepath
87
+ if duration < 40:
88
+ output_text = model.transcribe(manifest_filepath)[0]
89
+ else: # do buffered inference
90
+ with torch.cuda.amp.autocast(dtype=amp_dtype): # TODO: make it work if no cuda
91
+ with torch.no_grad():
92
+ hyps = get_buffered_pred_feat_multitaskAED(
93
+ frame_asr,
94
+ model.cfg.preprocessor,
95
+ model_stride_in_secs,
96
+ model.device,
97
+ manifest=manifest_filepath,
98
+ filepaths=None,
99
+ )
100
+
101
+ output_text = hyps[0].text
102
+
103
+ return output_text
104
 
105
 
106