File size: 2,529 Bytes
d8755a6
 
 
 
 
78fbf94
 
 
 
 
 
 
 
 
 
d8755a6
 
 
78fbf94
d8755a6
 
 
 
 
78fbf94
d8755a6
 
9a533bd
 
d8755a6
 
 
78fbf94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8755a6
636f5bc
78fbf94
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from speechbrain.pretrained.interfaces import foreign_class

import warnings
warnings.filterwarnings("ignore")

import os
import gradio as gr

# Путь к каталогу с предзаписанными аудиофайлами
prerecorded_audio_path = 'prerecorded'
# Список файлов в каталоге prerecorded
prerecorded_audio_files = os.listdir(prerecorded_audio_path)
# Полные пути к файлам для Dropdown
prerecorded_audio_files_full_path = [os.path.join(prerecorded_audio_path, file) for file in prerecorded_audio_files]

# Loading the speechbrain emotion detection model
learner = foreign_class(
    source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
    pymodule_file="custom_interface.py",
    classname="CustomEncoderWav2vec2Classifier"
)

# Building prediction function for gradio
emotion_dict = {
    'sad': 'Sad',
    'hap': 'Happy',
    'ang': 'Anger',
    'fea': 'Fear',
    'sur': 'Surprised',
    'neu': 'Neutral'
}

def predict_emotion(uploaded_audio=None, prerecorded_audio=None):
    # Если выбран аудиофайл из выпадающего списка, использовать его
    if prerecorded_audio is not None:
        audio_file_path = prerecorded_audio
    elif uploaded_audio is not None:
        # Иначе, если загружен файл, использовать его
        audio_file_path = uploaded_audio.name
    else:
        # Если нет файла, вернуть сообщение об ошибке
        return "No audio file provided", 0

    out_prob, score, index, text_lab = learner.classify_file(audio_file_path)
    emotion_probability = out_prob[0][index[0]].item()
    
    # Возвращаем словарь с эмоцией и вероятностью
    return {"Emotion": emotion_dict[text_lab[0]], "Probability": f"{emotion_probability:.2f}"}

# Модифицированный Gradio interface
inputs = [
    gr.inputs.Dropdown(list(prerecorded_audio_files_full_path), label="Select Prerecorded Audio", default=None),
    gr.inputs.Audio(label="Or Upload Audio", type="file", source="upload", optional=True),
    gr.inputs.Audio(label="Or Record Audio", type="file", source="microphone", optional=True)
]

outputs = gr.outputs.Label(num_top_classes=2)

title = "ML Speech Emotion Detection"
description = "Detect emotions from speech using a Speechbrain powered model."

gr.Interface(fn=predict_emotion, inputs=inputs, outputs=outputs, title=title, description=description).launch()