HakimHa commited on
Commit
1b1c058
Β·
1 Parent(s): 9c0c186

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -34,13 +34,15 @@ def handle_image(img):
34
 
35
  # Function to handle audio input
36
  def handle_audio(audio):
37
- speech, _ = sf.read(audio)
38
- input_values = processor(speech, return_tensors="pt").input_values
 
39
  logits = wav2vec2_model(input_values).logits
40
  predicted_ids = torch.argmax(logits, dim=-1)
41
- transcriptions = processor.decode(predicted_ids[0])
42
  return handle_text(transcriptions)
43
 
 
44
  def chatbot(text, img, audio):
45
  text_output = handle_text(text) if text is not None else ''
46
  img_output = handle_image(img) if img is not None else ''
 
34
 
35
  # Function to handle audio input
36
  def handle_audio(audio):
37
+ # Gradio's Audio component returns a tuple of (sample_rate, audio_data)
38
+ sample_rate, audio_data = audio
39
+ input_values = wav2vec2_processor(audio_data, sampling_rate=sample_rate, return_tensors="pt").input_values
40
  logits = wav2vec2_model(input_values).logits
41
  predicted_ids = torch.argmax(logits, dim=-1)
42
+ transcriptions = wav2vec2_processor.decode(predicted_ids[0])
43
  return handle_text(transcriptions)
44
 
45
+
46
  def chatbot(text, img, audio):
47
  text_output = handle_text(text) if text is not None else ''
48
  img_output = handle_image(img) if img is not None else ''