demomodels commited on
Commit
acbc440
·
1 Parent(s): 3a41822

Initial commit

Browse files
Files changed (1) hide show
  1. app.py +19 -14
app.py CHANGED
@@ -1,8 +1,7 @@
1
  import gradio as gr
2
- import json
3
  import torch
 
4
  import numpy as np
5
-
6
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
7
 
8
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -23,25 +22,31 @@ pipe = pipeline(
23
  tokenizer=processor.tokenizer,
24
  feature_extractor=processor.feature_extractor,
25
  max_new_tokens=128,
26
- chunk_length_s=30,
27
- batch_size=16,
28
  return_timestamps=True,
29
  torch_dtype=torch_dtype,
30
  device=device,
31
  )
32
 
33
- def process(audio):
34
- # return audio
35
- sr, y = audio
36
- y = y.astype(np.float32)
37
- y /= np.max(np.abs(y))
38
-
39
- # return transcriber({"sampling_rate": sr, "raw": y})["text"]
40
- result = pipe({"sampling_rate": sr, "raw": y})['chunks']
41
  for item in result:
42
  item['timestamp'] = list(item['timestamp'])
43
  return json.dumps(result)
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- iface = gr.Interface(fn=process, inputs="audio", outputs="text")
47
- iface.launch()
 
1
  import gradio as gr
 
2
  import torch
3
+ import json
4
  import numpy as np
 
5
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
6
 
7
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
22
  tokenizer=processor.tokenizer,
23
  feature_extractor=processor.feature_extractor,
24
  max_new_tokens=128,
25
+ chunk_length_s=15,
26
+ batch_size=1,
27
  return_timestamps=True,
28
  torch_dtype=torch_dtype,
29
  device=device,
30
  )
31
 
32
+ def transcribe_speech(filepath):
33
+ result = pipe(filepath)['chunks']
 
 
 
 
 
 
34
  for item in result:
35
  item['timestamp'] = list(item['timestamp'])
36
  return json.dumps(result)
37
 
38
+ demo = gr.Blocks()
39
+
40
+ file_transcribe = gr.Interface(
41
+ fn=transcribe_speech,
42
+ inputs=gr.Audio(sources="upload", type="filepath"),
43
+ outputs="text",
44
+ )
45
+
46
+ with demo:
47
+ gr.TabbedInterface(
48
+ [file_transcribe],
49
+ ["Song Lyrics"],
50
+ )
51
 
52
+ demo.launch(debug=True)