Spaces:
Runtime error
Runtime error
from tensorflow import keras | |
import os | |
import soundfile as sf | |
import numpy as np | |
import librosa | |
import gradio as gr | |
import seaborn as sns | |
import pandas as pd | |
import plotly.express as px | |
model = keras.models.load_model('emotion.h5') | |
labels = ['Angry', 'Disgusted', 'Fearful', 'Happy', 'Neutral', 'Sad', 'Suprised'] | |
def predict(audio): | |
wave, sr = librosa.load(audio, sr=None) | |
segment_dur_secs = 3 | |
segment_length = sr * segment_dur_secs | |
num_sections = int(np.ceil(len(wave) / segment_length)) | |
split = [] | |
paths =[] | |
for i in range(num_sections): | |
t = wave[i * segment_length: (i + 1) * segment_length] | |
split.append(t) | |
out_dir = ('audio_data/splits/') | |
os.makedirs(out_dir, exist_ok=True) | |
for i in range(num_sections): | |
recording_name = os.path.basename(audio[:-4]) | |
out_file = f"{recording_name}_{str(i)}.wav" | |
sf.write(os.path.join(out_dir, out_file), split[i], sr) | |
paths.append(os.path.join(out_dir, out_file)) | |
predicted_features = pd.DataFrame(columns=['features']) | |
counter=0 | |
for path in paths: | |
X, sample_rate = librosa.load(path | |
,duration=2.5 | |
,sr=44100 | |
,offset=0.5 | |
) | |
sample_rate = np.array(sample_rate) | |
mfccs = np.mean(librosa.feature.mfcc(y=X, | |
sr=sample_rate, | |
n_mfcc=13), | |
axis=0) | |
predicted_features.loc[counter] = [mfccs] | |
counter=counter+1 | |
predicted_features = pd.DataFrame(predicted_features['features'].values.tolist()) | |
predicted_features.dropna(inplace=True) | |
preds = model.predict(predicted_features) | |
preds=preds.argmax(axis=1) | |
df_preds = pd.DataFrame(preds,columns = ['prediction']) | |
emotions = [] | |
for i in df_preds['prediction']: | |
emotion = labels[int(i)] | |
emotions.append(emotion) | |
df_preds['emotion'] = emotions | |
df_preds = df_preds.reset_index() | |
fig = px.line(df_preds, x="index", y="emotion", title='How emotion change over speech') | |
fig.update_xaxes(title='The 3s intervals of speech') | |
return fig | |
outputs = gr.Plot() | |
title = "Emotion recognition" | |
description = "This model can shows how speaker emotion changes over the speech" | |
infr = gr.Interface(fn=predict, | |
inputs=gr.Audio(type="filepath"), | |
examples=['audio_samples/1.mp3','audio_samples/2.mp3','audio_samples/3.mp3','audio_samples/4.mp3'], | |
cache_examples=True, | |
outputs=outputs, | |
title=title,description=description,interpretation='default',) | |
infr.launch() | |