AkitoP commited on
Commit
08ab644
1 Parent(s): 478bebc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -46
app.py CHANGED
@@ -1,47 +1,48 @@
1
- import os
2
- import time
3
- import warnings
4
- from pathlib import Path
5
-
6
- import gradio as gr
7
- import librosa
8
- import spaces
9
- import torch
10
- from transformers import pipeline
11
-
12
- warnings.filterwarnings("ignore")
13
-
14
- is_hf = os.getenv("SYSTEM") == "spaces"
15
- # reference from litagin / galgame-whisper-wip
16
-
17
- generate_kwargs = {
18
- "max_new_tokens": 256,
19
- }
20
-
21
- pipe = pipeline(
22
- "automatic-speech-recognition",
23
- model="AkitoP/whisper-large-v3-japense-phone_accent",
24
- device="cuda" if torch.cuda.is_available() else "cpu",
25
- )
26
-
27
-
28
- @spaces.GPU
29
- def transcribe(audio: str) -> str:
30
- result = pipe(audio, generate_kwargs=generate_kwargs)["text"]
31
- return result
32
-
33
-
34
- initial_md = """
35
- # Whisper Large V3 Japanese Phone Accent
36
-
37
- A Whisper model fine-tuned to transcribe Japanese speech into Katakana with pitch accent annotations. Built on whisper-large-v3-turbo, it uses a subset (1/20) of the Galgame-Speech dataset and the jsut-5000 dataset.
38
- """
39
-
40
- with gr.Blocks() as app:
41
- gr.Markdown(initial_md)
42
- audio = gr.Audio(type="filepath")
43
- transcribe_btn = gr.Button("Transcribe")
44
- output = gr.Textbox(label="Result")
45
- transcribe_btn.click(fn=transcribe,inputs=[audio], outputs=[output])
46
-
 
47
  app.launch(inbrowser=True)
 
1
+ import os
2
+ import time
3
+ import warnings
4
+ from pathlib import Path
5
+
6
+ import gradio as gr
7
+ import librosa
8
+ import spaces
9
+ import torch
10
+ from transformers import pipeline
11
+
12
+ warnings.filterwarnings("ignore")
13
+
14
+ is_hf = os.getenv("SYSTEM") == "spaces"
15
+ # reference from litagin / galgame-whisper-wip
16
+
17
+ generate_kwargs = {
18
+ "max_new_tokens": 256,
19
+ }
20
+
21
+ pipe = pipeline(
22
+ "automatic-speech-recognition",
23
+ model="AkitoP/whisper-large-v3-japense-phone_accent",
24
+ device="cuda" if torch.cuda.is_available() else "cpu",
25
+ )
26
+
27
+
28
+ @spaces.GPU
29
+ def transcribe(audio: str) -> str:
30
+ result = pipe(audio, generate_kwargs=generate_kwargs)["text"]
31
+ print(result)
32
+ return result
33
+
34
+
35
+ initial_md = """
36
+ # Whisper Large V3 Japanese Phone Accent
37
+
38
+ A Whisper model fine-tuned to transcribe Japanese speech into Katakana with pitch accent annotations. Built on whisper-large-v3-turbo, it uses a subset (1/20) of the Galgame-Speech dataset and the jsut-5000 dataset.
39
+ """
40
+
41
+ with gr.Blocks() as app:
42
+ gr.Markdown(initial_md)
43
+ audio = gr.Audio(type="filepath")
44
+ transcribe_btn = gr.Button("Transcribe")
45
+ output = gr.Textbox(label="Result")
46
+ transcribe_btn.click(fn=transcribe,inputs=[audio], outputs=[output])
47
+
48
  app.launch(inbrowser=True)