Spaces:
Build error
Build error
import librosa | |
import joblib | |
from keras.models import load_model | |
import numpy as np | |
import pandas as pd | |
import gradio as gr | |
import h5py | |
TF_ENABLE_ONEDNN_OPTS=0 | |
root_path ="./model/" | |
num2label = {0:"Neutral", 1: "Calm", 2:"Happy", 3:"Sad", 4:"Angry", 5:"Fearful", 6:"Disgust", 7:"Surprised"} | |
SVM_spectral = joblib.load(root_path + "SVM_spectral.joblib") | |
SVM_prosodic = joblib.load(root_path + "SVM_prosodic.joblib") | |
SVM_full = joblib.load(root_path + "SVM_full.joblib") | |
SVM_mfcc = joblib.load(root_path + "SVM_mfcc.joblib") | |
NB_spectral = joblib.load(root_path + "NB_spectral.joblib") | |
NB_prosodic = joblib.load(root_path + "NB_prosodic.joblib") | |
NB_full = joblib.load(root_path + "NB_full.joblib") | |
NB_mfcc = joblib.load(root_path + "NB_mfcc.joblib") | |
DT_spectral = joblib.load(root_path + "DT_spectral.joblib") | |
DT_prosodic = joblib.load(root_path + "DT_prosodic.joblib") | |
DT_full = joblib.load(root_path + "DT_full.joblib") | |
DT_mfcc = joblib.load(root_path + "DT_mfcc.joblib") | |
MLP_spectral = joblib.load(root_path + "MLP_spectral.joblib") | |
MLP_prosodic = joblib.load(root_path + "MLP_prosodic.joblib") | |
MLP_full = joblib.load(root_path + "MLP_full.joblib") | |
MLP_mfcc = joblib.load(root_path + "MLP_mfcc.joblib") | |
RF_spectral = joblib.load(root_path + "RF_spectral.joblib") | |
RF_prosodic = joblib.load(root_path + "RF_prosodic.joblib") | |
RF_full = joblib.load(root_path + "RF_full.joblib") | |
RF_mfcc = joblib.load(root_path + "RF_mfcc.joblib") | |
def load_model_from_h5(file_path): | |
with h5py.File(file_path, 'r') as file: | |
model = load_model(file, compile=False) | |
return model | |
LSTM_spectral = load_model_from_h5(root_path + "LSTM_spectral.h5") | |
LSTM_prosodic = load_model_from_h5(root_path + "LSTM_prosodic.h5") | |
LSTM_full = load_model_from_h5(root_path + "LSTM_full.h5") | |
LSTM_mfcc = load_model_from_h5(root_path + "LSTM_mfcc.h5") | |
LSTM_CNN_spectral = load_model_from_h5(root_path + "LSTM_CNN_spectral.h5") | |
LSTM_CNN_prosodic = load_model_from_h5(root_path + "LSTM_CNN_prosodic.h5") | |
LSTM_CNN_full = load_model_from_h5(root_path + "LSTM_CNN_full.h5") | |
LSTM_CNN_mfcc = load_model_from_h5(root_path + "LSTM_CNN_mfcc.h5") | |
CNN_spectral = load_model_from_h5(root_path + "CNN_spectral.h5") | |
CNN_prosodic = load_model_from_h5(root_path + "CNN_prosodic.h5") | |
CNN_full = load_model_from_h5(root_path + "CNN_full.h5") | |
CNN_mfcc = load_model_from_h5(root_path + "CNN_mfcc.h5") | |
total_model = {"SVM": {'mfcc': SVM_mfcc, 'spectral': SVM_spectral, 'prosodic':SVM_prosodic, 'full':SVM_full}, | |
"NB": {'mfcc': NB_mfcc, 'spectral': NB_spectral, 'prosodic': NB_prosodic, 'full': NB_full}, | |
"DT": {'mfcc': DT_mfcc, 'spectral': DT_spectral, 'prosodic': DT_prosodic, 'full': DT_full}, | |
"MLP": {'mfcc': MLP_mfcc, 'spectral': MLP_spectral, 'prosodic':MLP_prosodic, 'full':MLP_full}, | |
"RF": {'mfcc': RF_mfcc, 'spectral': RF_spectral, 'prosodic': RF_prosodic, 'full': RF_full}, | |
"LSTM": {'mfcc': LSTM_mfcc, 'spectral': LSTM_spectral, 'prosodic': LSTM_prosodic, 'full': LSTM_full}, | |
"LSTM_CNN": {'mfcc': LSTM_CNN_mfcc, 'spectral': LSTM_CNN_spectral, 'prosodic': LSTM_CNN_prosodic, 'full': LSTM_CNN_full}, | |
"CNN": {'mfcc': CNN_mfcc, 'spectral': CNN_spectral, 'prosodic': CNN_prosodic, 'full': CNN_full} | |
} | |
spectral_scaler = joblib.load(root_path + 'spectral_features_standard_scaler.joblib') | |
prosodic_scaler = joblib.load(root_path + 'prosodic_features_standard_scaler.joblib') | |
full_scaler = joblib.load(root_path + 'full_features_standard_scaler.joblib') | |
mfcc_scaler = joblib.load(root_path + 'mfcc_features_standard_scaler.joblib') | |
scaler = {'mfcc': mfcc_scaler, 'spectral': spectral_scaler, 'prosodic': prosodic_scaler, 'full': full_scaler} | |
def Load_audio(audio_path): | |
# Đọc file âm thanh và tần số lấy mẫu | |
y, sr = librosa.load(audio_path, sr=48000) | |
return y | |
# Bạn có thể sử dụng y và sr cho các mục đích xử lý âm thanh tiếp theo | |
def Spectral_extract_features(audio): # data là một file âm thanh thôi | |
mfccs = librosa.feature.mfcc(y=audio, n_mfcc=40) # sr=sr, | |
chroma = librosa.feature.chroma_stft(y=audio) | |
spectral_contrast = librosa.feature.spectral_contrast(y=audio) | |
tonal_centroid = librosa.feature.tonnetz(y=audio) | |
mel_spectrogram = librosa.feature.melspectrogram(y=audio) | |
feature_vector = np.concatenate((mfccs.mean(axis=1), chroma.mean(axis=1), spectral_contrast.mean(axis=1), tonal_centroid.mean(axis = 1), mel_spectrogram.mean(axis = 1))) | |
return np.array(feature_vector) | |
def mfcc_extract_features(audio): | |
mfccs = librosa.feature.mfcc(y=audio, n_mfcc=40) # sr=sr, | |
mfcc_features = mfccs.mean(axis=1) | |
return mfcc_features | |
def Prosodic_extract_features(audio): | |
pitch, _ = librosa.piptrack(y=audio, n_fft=128, hop_length = 512) | |
#print("pitch:", pitch.mean(axis=1)) # ok | |
duration = librosa.get_duration(y=audio) | |
#print("duration:",duration) # ok | |
energy = librosa.feature.rms(y=audio) | |
#print("energy:", energy.shape) | |
duration = np.array([duration]).reshape(1,1) | |
#print("duration:", duration.shape) | |
feature_vector = np.concatenate((pitch.mean(axis=1), duration.mean(axis=1), energy.mean(axis=1))) | |
return np.array(feature_vector) | |
def Spectral_Prosodic(audio): | |
Spectral_features = Spectral_extract_features(audio) | |
Prosodic_features = Prosodic_extract_features(audio) | |
full_features = np.concatenate((Spectral_features, Prosodic_features)) | |
return full_features | |
def Total_features(audio, scaler): | |
features = {} | |
features['spectral'] = scaler['spectral'].transform(Spectral_extract_features(audio).reshape(1, -1)) | |
features['prosodic'] = scaler['prosodic'].transform(Prosodic_extract_features(audio).reshape(1, -1)) | |
features['full'] = scaler['full'].transform(Spectral_Prosodic(audio).reshape(1, -1)) | |
features['mfcc'] = scaler['mfcc'].transform(mfcc_extract_features(audio).reshape(1, -1)) | |
return features | |
def total_predict(feature, total_model): # feature là một dict tổng hợp 4 loại đặc trưng | |
result = {'mfcc': {}, 'spectral' : {}, 'prosodic': {}, 'full': {} } | |
f_keys = ['mfcc', 'spectral', 'prosodic', 'full'] | |
ML = ['SVM', 'NB', 'DT', 'MLP', 'RF'] | |
m_keys = ['SVM', 'NB', 'DT', 'MLP', 'RF', 'LSTM', 'LSTM_CNN', 'CNN'] | |
for f in f_keys: | |
for m in m_keys: | |
try: | |
if m in ML: | |
model = total_model[m][f] | |
result[f][m] = num2label[model.predict(feature[f])[0]] | |
else: | |
model = total_model[m][f] | |
temp = [np.array(feature[f]).reshape((1,-1))] | |
y_pred = model.predict(temp) | |
y_pred_labels = np.argmax(y_pred, axis=1)[0] | |
result[f][m] = num2label[y_pred_labels] | |
except: | |
print(f, m) | |
return result | |
# def main_function(audio_path, scaler, total_model): | |
# audio = Load_audio(audio_path) | |
# feature = Total_features(audio, scaler) | |
# labels = total_predict(feature, total_model) | |
# table = pd.DataFrame.from_dict(labels).T | |
# return table | |
def main_function(audio_path, scaler, total_model): | |
audio = Load_audio(audio_path) | |
feature = Total_features(audio, scaler) | |
labels = total_predict(feature, total_model) | |
table = pd.DataFrame.from_dict(labels).T | |
table.insert(0, 'Đặc trưng', ['mfcc', 'spectral', 'prosodic', 'full']) | |
return table | |
def main_interface(audio_file): | |
# print("đường dẫn", audio_file) | |
# sr, audio_data = audio_file | |
# print(sr, audio_data) | |
# if 1: | |
# audio_data = audio_data.astype(float) | |
# audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=48000) | |
# print("đã đọc lại file") | |
# else: | |
# pass | |
# # audio_path = "./uploaded.wav" | |
# # write(audio_path, 48000, np.int16(audio_data)) | |
# # print("đã lưu") | |
result_table = main_function(audio_file, scaler, total_model) | |
return result_table | |
# Create Gradio Interface | |
iface = gr.Interface( | |
fn=main_interface, | |
inputs=gr.Audio(type= 'filepath'), | |
outputs=gr.Dataframe(headers=['Đặc trưng', 'SVM', 'NB', 'DT', 'MLP', 'RF', 'LSTM', 'LSTM_CNN', 'CNN']), | |
) | |
# Launch the Gradio Interface | |
iface.launch() | |