Update app.py
Browse files
app.py
CHANGED
@@ -34,13 +34,15 @@ def handle_image(img):
|
|
34 |
|
35 |
# Function to handle audio input
|
36 |
def handle_audio(audio):
|
37 |
-
|
38 |
-
|
|
|
39 |
logits = wav2vec2_model(input_values).logits
|
40 |
predicted_ids = torch.argmax(logits, dim=-1)
|
41 |
-
transcriptions =
|
42 |
return handle_text(transcriptions)
|
43 |
|
|
|
44 |
def chatbot(text, img, audio):
|
45 |
text_output = handle_text(text) if text is not None else ''
|
46 |
img_output = handle_image(img) if img is not None else ''
|
|
|
34 |
|
35 |
# Function to handle audio input
|
36 |
def handle_audio(audio):
|
37 |
+
# Gradio's Audio component returns a tuple of (sample_rate, audio_data)
|
38 |
+
sample_rate, audio_data = audio
|
39 |
+
input_values = wav2vec2_processor(audio_data, sampling_rate=sample_rate, return_tensors="pt").input_values
|
40 |
logits = wav2vec2_model(input_values).logits
|
41 |
predicted_ids = torch.argmax(logits, dim=-1)
|
42 |
+
transcriptions = wav2vec2_processor.decode(predicted_ids[0])
|
43 |
return handle_text(transcriptions)
|
44 |
|
45 |
+
|
46 |
def chatbot(text, img, audio):
|
47 |
text_output = handle_text(text) if text is not None else ''
|
48 |
img_output = handle_image(img) if img is not None else ''
|