jiuuee commited on
Commit
2ca3e3f
·
verified ·
1 Parent(s): 8b2f563

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -9
app.py CHANGED
@@ -1,7 +1,3 @@
1
-
2
-
3
-
4
-
5
  '''
6
  import gradio as gr
7
  from transformers import pipeline
@@ -11,6 +7,7 @@ asr_pipeline = pipeline("automatic-speech-recognition", model="nvidia/canary-1b"
11
  qa_pipeline = pipeline("question-answering", model="LLAMA/llama3-base-qa", tokenizer="LLAMA/llama3-base-qa")
12
  tts_pipeline = pipeline("text-to-speech", model="patrickvonplaten/vits-large", device=0)
13
  '''
 
14
  import gradio as gr
15
  import json
16
  import librosa
@@ -26,7 +23,7 @@ from nemo.collections.asr.parts.utils.streaming_utils import FrameBatchMultiTask
26
  from nemo.collections.asr.parts.utils.transcribe_utils import get_buffered_pred_feat_multitaskAED
27
 
28
  SAMPLE_RATE = 16000 # Hz
29
- MAX_AUDIO_MINUTES = 10 # wont try to transcribe if longer than this
30
 
31
  model = ASRModel.from_pretrained("nvidia/canary-1b")
32
  model.eval()
@@ -59,12 +56,11 @@ def convert_audio(audio_filepath, tmpdir, utt_id):
59
  Do not convert and raise error if audio too long.
60
  Returns output filename and duration.
61
  """
62
-
63
  data, sr = librosa.load(audio_filepath, sr=None, mono=True)
64
 
65
  duration = librosa.get_duration(y=data, sr=sr)
66
 
67
- if duration / 60.0 > MAX_AUDIO_MINUTES:
68
  raise gr.Error(
69
  f"This demo can transcribe up to {MAX_AUDIO_MINUTES} minutes of audio. "
70
  "If you wish, you may trim the audio using the Audio viewer in Step 1 "
@@ -92,9 +88,9 @@ def transcribe(audio_filepath, src_lang, tgt_lang, pnc):
92
 
93
  utt_id = uuid.uuid4()
94
  with tempfile.TemporaryDirectory() as tmpdir:
95
- converted_audio_filepath, duration = convert_audio(audio_filepath, tmpdir, str(utt_
96
 
97
- # make manifest file and save
98
  manifest_data = {
99
  "audio_filepath": converted_audio_filepath,
100
  "source_lang": src_lang,
 
 
 
 
 
1
  '''
2
  import gradio as gr
3
  from transformers import pipeline
 
7
  qa_pipeline = pipeline("question-answering", model="LLAMA/llama3-base-qa", tokenizer="LLAMA/llama3-base-qa")
8
  tts_pipeline = pipeline("text-to-speech", model="patrickvonplaten/vits-large", device=0)
9
  '''
10
+
11
  import gradio as gr
12
  import json
13
  import librosa
 
23
  from nemo.collections.asr.parts.utils.transcribe_utils import get_buffered_pred_feat_multitaskAED
24
 
25
  SAMPLE_RATE = 16000 # Hz
26
+ MAX_AUDIO_SECS = 30 # wont try to transcribe if longer than this
27
 
28
  model = ASRModel.from_pretrained("nvidia/canary-1b")
29
  model.eval()
 
56
  Do not convert and raise error if audio too long.
57
  Returns output filename and duration.
58
  """
 
59
  data, sr = librosa.load(audio_filepath, sr=None, mono=True)
60
 
61
  duration = librosa.get_duration(y=data, sr=sr)
62
 
63
+ if duration > MAX_AUDIO_SECS:
64
  raise gr.Error(
65
  f"This demo can transcribe up to {MAX_AUDIO_MINUTES} minutes of audio. "
66
  "If you wish, you may trim the audio using the Audio viewer in Step 1 "
 
88
 
89
  utt_id = uuid.uuid4()
90
  with tempfile.TemporaryDirectory() as tmpdir:
91
+ converted_audio_filepath, duration = convert_audio(audio_filepath, tmpdir, str(utt_id))
92
 
93
+ # make manifest file and save
94
  manifest_data = {
95
  "audio_filepath": converted_audio_filepath,
96
  "source_lang": src_lang,