Spaces:

demomodels
/

lyrics

Runtime error

demomodels commited on Feb 17, 2024

Commit

acbc440

1 Parent(s): 3a41822

Initial commit

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import gradio as gr
-import json
 import torch
 import numpy as np
 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -23,25 +22,31 @@ pipe = pipeline(
     tokenizer=processor.tokenizer,
     feature_extractor=processor.feature_extractor,
     max_new_tokens=128,
-    chunk_length_s=30,
-    batch_size=16,
     return_timestamps=True,
     torch_dtype=torch_dtype,
     device=device,
 )
-def process(audio):
-    # return audio
-    sr, y = audio
-    y = y.astype(np.float32)
-    y /= np.max(np.abs(y))
-    # return transcriber({"sampling_rate": sr, "raw": y})["text"]
-    result = pipe({"sampling_rate": sr, "raw": y})['chunks']
     for item in result:
         item['timestamp'] = list(item['timestamp'])
     return json.dumps(result)
-iface = gr.Interface(fn=process, inputs="audio", outputs="text")
-iface.launch()

 import gradio as gr
 import torch
+import json
 import numpy as np
 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
     tokenizer=processor.tokenizer,
     feature_extractor=processor.feature_extractor,
     max_new_tokens=128,
+    chunk_length_s=15,
+    batch_size=1,
     return_timestamps=True,
     torch_dtype=torch_dtype,
     device=device,
 )
+def transcribe_speech(filepath):
+    result = pipe(filepath)['chunks']
     for item in result:
         item['timestamp'] = list(item['timestamp'])
     return json.dumps(result)
+demo = gr.Blocks()
+file_transcribe = gr.Interface(
+    fn=transcribe_speech,
+    inputs=gr.Audio(sources="upload", type="filepath"),
+    outputs="text",
+)
+with demo:
+    gr.TabbedInterface(
+        [file_transcribe],
+        ["Song Lyrics"],
+    )
+demo.launch(debug=True)