aka7774 commited on
Commit
00f6f1d
1 Parent(s): 3ec3dc7

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +20 -7
  2. fn.py +6 -0
  3. main.py +9 -0
app.py CHANGED
@@ -3,13 +3,26 @@ import gradio as gr
3
 
4
  fn.load_model('large-v3')
5
 
6
- demo = gr.Interface(
7
- fn=fn.speech_to_text,
8
- inputs=[
9
- gr.Audio(sources="upload", type="filepath"),
10
- gr.Dropdown(value='large-v3', choices=["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"]),
11
- ],
12
- outputs=["text", "text"])
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  if __name__ == '__main__':
15
  demo.launch()
 
3
 
4
  fn.load_model('large-v3')
5
 
6
+ with gr.Blocks() as demo:
7
+ audio = gr.Audio(sources="upload", type="filepath")
8
+ model = gr.Dropdown(value='large-v3', choices=["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"])
9
+ run_button = gr.Button(value='Run')
10
+ prompt = gr.Textbox(label='prompt')
11
+ set_button = gr.Button(value='Set Prompt')
12
+ text_only = gr.Textbox(label='output')
13
+ text_with_timestamps = gr.Textbox(label='timestamps')
14
+
15
+ run_button.click(
16
+ fn=fn.speech_to_text,
17
+ inputs=[audio, model],
18
+ outputs=[text_only, text_with_timestamps],
19
+ )
20
+
21
+ set_button.click(
22
+ fn=fn.set_prompt,
23
+ inputs=[prompt],
24
+ outputs=[],
25
+ )
26
 
27
  if __name__ == '__main__':
28
  demo.launch()
fn.py CHANGED
@@ -2,6 +2,7 @@ from faster_whisper import WhisperModel
2
 
3
  model = None
4
  model_size = None
 
5
 
6
  def load_model(_model_size):
7
  global model_size, model
@@ -14,6 +15,10 @@ def load_model(_model_size):
14
  except:
15
  model = WhisperModel(model_size, device="cpu", compute_type="int8")
16
 
 
 
 
 
17
  def speech_to_text(audio_file, _model_size = None):
18
  global model_size, model
19
 
@@ -21,6 +26,7 @@ def speech_to_text(audio_file, _model_size = None):
21
 
22
  segments, info = model.transcribe(
23
  audio_file,
 
24
  language='ja',
25
  beam_size=5,
26
  vad_filter=True,
 
2
 
3
  model = None
4
  model_size = None
5
+ initial_prompt = None
6
 
7
  def load_model(_model_size):
8
  global model_size, model
 
15
  except:
16
  model = WhisperModel(model_size, device="cpu", compute_type="int8")
17
 
18
+ def set_prompt(prompt):
19
+ global initial_prompt
20
+ initial_prompt = prompt
21
+
22
  def speech_to_text(audio_file, _model_size = None):
23
  global model_size, model
24
 
 
26
 
27
  segments, info = model.transcribe(
28
  audio_file,
29
+ initial_prompt=initial_prompt,
30
  language='ja',
31
  beam_size=5,
32
  vad_filter=True,
main.py CHANGED
@@ -40,3 +40,12 @@ async def transcribe_audio(file: UploadFile = Form(...)):
40
  return {"transcription": text_only, "text_with_timestamps": text_with_timestamps}
41
  except Exception as e:
42
  return {"error": str(e)}
 
 
 
 
 
 
 
 
 
 
40
  return {"transcription": text_only, "text_with_timestamps": text_with_timestamps}
41
  except Exception as e:
42
  return {"error": str(e)}
43
+
44
+ @app.post("/set_prompt")
45
+ async def set_prompt(prompt: str):
46
+ try:
47
+ fn.set_prompt(prompt)
48
+
49
+ return {"status": 0}
50
+ except Exception as e:
51
+ return {"error": str(e)}