not-lain commited on
Commit
11abe35
1 Parent(s): 03ff7a4

fixed audio

Browse files
Files changed (1) hide show
  1. app.py +3 -12
app.py CHANGED
@@ -30,7 +30,7 @@ def check_hallucination(assertion,citation):
30
 
31
 
32
 
33
- def process_speech(audio_input,input_language):
34
  """
35
  processing sound using seamless_m4t
36
  """
@@ -38,14 +38,6 @@ def process_speech(audio_input,input_language):
38
  return "no audio or audio did not save yet \nplease try again ! "
39
  print(f"audio : {audio_input}")
40
  print(f"audio type : {type(audio_input)}")
41
-
42
- try :
43
- audio_name = f"{np.random.randint(0, 100)}.wav"
44
- sr, data = audio_input
45
- write(audio_name, sr, data.astype(np.int16))
46
- audio_input = audio_name
47
- except :
48
- pass
49
  out = seamless_client.predict(
50
  "S2TT",
51
  "file",
@@ -432,11 +424,11 @@ with gr.Blocks(theme='ParityError/Anime') as iface :
432
  image_output = gr.Markdown(label="output text")
433
  image_button = gr.Button("process image")
434
  with gr.Tab("speech to text"):
435
- with gr.Row():
436
- input_language = gr.Dropdown(languages, label="select the language",value="English",interactive=True)
437
  audio_input = gr.Audio(label="speak",type="filepath",sources="microphone")
438
  audio_output = gr.Markdown(label="output text")
439
  audio_button = gr.Button("process audio")
 
440
  with gr.Tab("hallucination check"):
441
  assertion = gr.Textbox(label="assertion")
442
  citation = gr.Textbox(label="citation text")
@@ -445,7 +437,6 @@ with gr.Blocks(theme='ParityError/Anime') as iface :
445
  gr.Examples([["i am drunk","sarah is pregnant"]],inputs=[assertion,citation])
446
  text_button.click(process_and_query, inputs=text_input, outputs=text_output)
447
  image_button.click(process_image, inputs=image_input, outputs=image_output)
448
- audio_button.click(process_speech, inputs=[audio_input,input_language], outputs=audio_output)
449
  audio_button.click(check_hallucination,inputs=[assertion,citation],outputs=hullucination_output)
450
 
451
 
 
30
 
31
 
32
 
33
+ def process_speech(input_language, audio_input):
34
  """
35
  processing sound using seamless_m4t
36
  """
 
38
  return "no audio or audio did not save yet \nplease try again ! "
39
  print(f"audio : {audio_input}")
40
  print(f"audio type : {type(audio_input)}")
 
 
 
 
 
 
 
 
41
  out = seamless_client.predict(
42
  "S2TT",
43
  "file",
 
424
  image_output = gr.Markdown(label="output text")
425
  image_button = gr.Button("process image")
426
  with gr.Tab("speech to text"):
427
+ input_language = gr.Dropdown(languages, label="select the language",value="English",interactive=True)
 
428
  audio_input = gr.Audio(label="speak",type="filepath",sources="microphone")
429
  audio_output = gr.Markdown(label="output text")
430
  audio_button = gr.Button("process audio")
431
+ audio_button.click(process_speech, inputs=[input_language,audio_input], outputs=audio_output)
432
  with gr.Tab("hallucination check"):
433
  assertion = gr.Textbox(label="assertion")
434
  citation = gr.Textbox(label="citation text")
 
437
  gr.Examples([["i am drunk","sarah is pregnant"]],inputs=[assertion,citation])
438
  text_button.click(process_and_query, inputs=text_input, outputs=text_output)
439
  image_button.click(process_image, inputs=image_input, outputs=image_output)
 
440
  audio_button.click(check_hallucination,inputs=[assertion,citation],outputs=hullucination_output)
441
 
442