final
Browse files
app.py
CHANGED
@@ -15,7 +15,6 @@ is_hf = os.getenv("SYSTEM") == "spaces"
|
|
15 |
# reference from litagin / galgame-whisper-wip
|
16 |
|
17 |
generate_kwargs = {
|
18 |
-
"language": "Japanese",
|
19 |
"max_new_tokens": 256,
|
20 |
}
|
21 |
|
@@ -27,13 +26,8 @@ pipe = pipeline(
|
|
27 |
|
28 |
|
29 |
@spaces.GPU
|
30 |
-
def transcribe(audio: str) ->
|
31 |
-
|
32 |
-
# Read and resample audio to 16kHz
|
33 |
-
y, sr = librosa.load(audio, mono=True, sr=16000)
|
34 |
-
# Get duration of audio
|
35 |
-
result = pipe(y, generate_kwargs=generate_kwargs)["text"]
|
36 |
-
print(result)
|
37 |
return result
|
38 |
|
39 |
|
@@ -46,10 +40,8 @@ A Whisper model fine-tuned to transcribe Japanese speech into Katakana with pitc
|
|
46 |
with gr.Blocks() as app:
|
47 |
gr.Markdown(initial_md)
|
48 |
audio = gr.Audio(type="filepath")
|
49 |
-
transcribe_btn = gr.Button(
|
50 |
output = gr.Textbox(label="Result")
|
51 |
-
transcribe_btn.click(transcribe
|
52 |
|
53 |
-
|
54 |
-
# app.load(warmup, inputs=[], outputs=[warmup_result], queue=True)
|
55 |
app.launch(inbrowser=True)
|
|
|
15 |
# reference from litagin / galgame-whisper-wip
|
16 |
|
17 |
generate_kwargs = {
|
|
|
18 |
"max_new_tokens": 256,
|
19 |
}
|
20 |
|
|
|
26 |
|
27 |
|
28 |
@spaces.GPU
|
29 |
+
def transcribe(audio: str) -> str:
|
30 |
+
result = pipe(audio, generate_kwargs=generate_kwargs)["text"]
|
|
|
|
|
|
|
|
|
|
|
31 |
return result
|
32 |
|
33 |
|
|
|
40 |
with gr.Blocks() as app:
|
41 |
gr.Markdown(initial_md)
|
42 |
audio = gr.Audio(type="filepath")
|
43 |
+
transcribe_btn = gr.Button("Transcribe")
|
44 |
output = gr.Textbox(label="Result")
|
45 |
+
transcribe_btn.click(fn=transcribe,inputs=[audio], outputs=[output])
|
46 |
|
|
|
|
|
47 |
app.launch(inbrowser=True)
|