aka7774 commited on
Commit
b22bcbc
1 Parent(s): 00f6f1d

Add language

Browse files
Files changed (3) hide show
  1. app.py +2 -1
  2. fn.py +6 -3
  3. main.py +2 -2
app.py CHANGED
@@ -8,6 +8,7 @@ with gr.Blocks() as demo:
8
  model = gr.Dropdown(value='large-v3', choices=["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"])
9
  run_button = gr.Button(value='Run')
10
  prompt = gr.Textbox(label='prompt')
 
11
  set_button = gr.Button(value='Set Prompt')
12
  text_only = gr.Textbox(label='output')
13
  text_with_timestamps = gr.Textbox(label='timestamps')
@@ -20,7 +21,7 @@ with gr.Blocks() as demo:
20
 
21
  set_button.click(
22
  fn=fn.set_prompt,
23
- inputs=[prompt],
24
  outputs=[],
25
  )
26
 
 
8
  model = gr.Dropdown(value='large-v3', choices=["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"])
9
  run_button = gr.Button(value='Run')
10
  prompt = gr.Textbox(label='prompt')
11
+ language = gr.Textbox(label='language')
12
  set_button = gr.Button(value='Set Prompt')
13
  text_only = gr.Textbox(label='output')
14
  text_with_timestamps = gr.Textbox(label='timestamps')
 
21
 
22
  set_button.click(
23
  fn=fn.set_prompt,
24
+ inputs=[prompt, language],
25
  outputs=[],
26
  )
27
 
fn.py CHANGED
@@ -3,6 +3,7 @@ from faster_whisper import WhisperModel
3
  model = None
4
  model_size = None
5
  initial_prompt = None
 
6
 
7
  def load_model(_model_size):
8
  global model_size, model
@@ -15,9 +16,11 @@ def load_model(_model_size):
15
  except:
16
  model = WhisperModel(model_size, device="cpu", compute_type="int8")
17
 
18
- def set_prompt(prompt):
19
- global initial_prompt
20
  initial_prompt = prompt
 
 
21
 
22
  def speech_to_text(audio_file, _model_size = None):
23
  global model_size, model
@@ -27,7 +30,7 @@ def speech_to_text(audio_file, _model_size = None):
27
  segments, info = model.transcribe(
28
  audio_file,
29
  initial_prompt=initial_prompt,
30
- language='ja',
31
  beam_size=5,
32
  vad_filter=True,
33
  without_timestamps=False,
 
3
  model = None
4
  model_size = None
5
  initial_prompt = None
6
+ language = 'ja'
7
 
8
  def load_model(_model_size):
9
  global model_size, model
 
16
  except:
17
  model = WhisperModel(model_size, device="cpu", compute_type="int8")
18
 
19
+ def set_prompt(prompt, _language = None):
20
+ global initial_prompt, language
21
  initial_prompt = prompt
22
+ if _language:
23
+ language = _language
24
 
25
  def speech_to_text(audio_file, _model_size = None):
26
  global model_size, model
 
30
  segments, info = model.transcribe(
31
  audio_file,
32
  initial_prompt=initial_prompt,
33
+ language=language,
34
  beam_size=5,
35
  vad_filter=True,
36
  without_timestamps=False,
main.py CHANGED
@@ -42,9 +42,9 @@ async def transcribe_audio(file: UploadFile = Form(...)):
42
  return {"error": str(e)}
43
 
44
  @app.post("/set_prompt")
45
- async def set_prompt(prompt: str):
46
  try:
47
- fn.set_prompt(prompt)
48
 
49
  return {"status": 0}
50
  except Exception as e:
 
42
  return {"error": str(e)}
43
 
44
  @app.post("/set_prompt")
45
+ async def set_prompt(prompt: str, language: str = None):
46
  try:
47
+ fn.set_prompt(prompt, language)
48
 
49
  return {"status": 0}
50
  except Exception as e: