AkitoP commited on
Commit
478bebc
·
1 Parent(s): 955c5ad
Files changed (1) hide show
  1. app.py +4 -12
app.py CHANGED
@@ -15,7 +15,6 @@ is_hf = os.getenv("SYSTEM") == "spaces"
15
  # reference from litagin / galgame-whisper-wip
16
 
17
  generate_kwargs = {
18
- "language": "Japanese",
19
  "max_new_tokens": 256,
20
  }
21
 
@@ -27,13 +26,8 @@ pipe = pipeline(
27
 
28
 
29
  @spaces.GPU
30
- def transcribe(audio: str) -> tuple[str, float]:
31
- filename = Path(audio).name
32
- # Read and resample audio to 16kHz
33
- y, sr = librosa.load(audio, mono=True, sr=16000)
34
- # Get duration of audio
35
- result = pipe(y, generate_kwargs=generate_kwargs)["text"]
36
- print(result)
37
  return result
38
 
39
 
@@ -46,10 +40,8 @@ A Whisper model fine-tuned to transcribe Japanese speech into Katakana with pitc
46
  with gr.Blocks() as app:
47
  gr.Markdown(initial_md)
48
  audio = gr.Audio(type="filepath")
49
- transcribe_btn = gr.Button(label="Transcribe")
50
  output = gr.Textbox(label="Result")
51
- transcribe_btn.click(transcribe(audio=audio),inputs=[audio], outputs=[output])
52
 
53
-
54
- # app.load(warmup, inputs=[], outputs=[warmup_result], queue=True)
55
  app.launch(inbrowser=True)
 
15
  # reference from litagin / galgame-whisper-wip
16
 
17
  generate_kwargs = {
 
18
  "max_new_tokens": 256,
19
  }
20
 
 
26
 
27
 
28
  @spaces.GPU
29
+ def transcribe(audio: str) -> str:
30
+ result = pipe(audio, generate_kwargs=generate_kwargs)["text"]
 
 
 
 
 
31
  return result
32
 
33
 
 
40
  with gr.Blocks() as app:
41
  gr.Markdown(initial_md)
42
  audio = gr.Audio(type="filepath")
43
+ transcribe_btn = gr.Button("Transcribe")
44
  output = gr.Textbox(label="Result")
45
+ transcribe_btn.click(fn=transcribe,inputs=[audio], outputs=[output])
46
 
 
 
47
  app.launch(inbrowser=True)