from tensorflow import keras import os import soundfile as sf import numpy as np import librosa import gradio as gr import seaborn as sns import pandas as pd import as px model = keras.models.load_model('emotion.h5') labels = ['Angry', 'Disgusted', 'Fearful', 'Happy', 'Neutral', 'Sad', 'Suprised'] def predict(audio): wave, sr = librosa.load(audio, sr=None) segment_dur_secs = 3 segment_length = sr * segment_dur_secs num_sections = int(np.ceil(len(wave) / segment_length)) split = [] paths =[] for i in range(num_sections): t = wave[i * segment_length: (i + 1) * segment_length] split.append(t) out_dir = ('audio_data/splits/') os.makedirs(out_dir, exist_ok=True) for i in range(num_sections): recording_name = os.path.basename(audio[:-4]) out_file = f"{recording_name}_{str(i)}.wav" sf.write(os.path.join(out_dir, out_file), split[i], sr) paths.append(os.path.join(out_dir, out_file)) predicted_features = pd.DataFrame(columns=['features']) counter=0 for path in paths: X, sample_rate = librosa.load(path ,duration=2.5 ,sr=44100 ,offset=0.5 ) sample_rate = np.array(sample_rate) mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=13), axis=0) predicted_features.loc[counter] = [mfccs] counter=counter+1 predicted_features = pd.DataFrame(predicted_features['features'].values.tolist()) predicted_features.dropna(inplace=True) preds = model.predict(predicted_features) preds=preds.argmax(axis=1) df_preds = pd.DataFrame(preds,columns = ['prediction']) emotions = [] for i in df_preds['prediction']: emotion = labels[int(i)] emotions.append(emotion) df_preds['emotion'] = emotions df_preds = df_preds.reset_index() fig = px.line(df_preds, x="index", y="emotion", title='How emotion change over speech') fig.update_xaxes(title='The 3s intervals of speech') return fig outputs = gr.Plot() title = "Emotion recognition" description = "This model can shows how speaker emotion changes over the speech" infr = gr.Interface(fn=predict, inputs=gr.Audio(type="filepath"), examples=['audio_samples/1.mp3','audio_samples/2.mp3','audio_samples/3.mp3','audio_samples/4.mp3'], cache_examples=True, outputs=outputs, title=title,description=description,interpretation='default',) infr.launch()