Spaces:

ThuyNT03
/

CS337_demo

Build error

App Files Files Community

CS337_demo / app.py

ThuyNT03

Upload 39 files

7aac284 11 months ago

raw

history blame

8.14 kB

	import librosa
	import joblib
	from keras.models import load_model
	import numpy as np
	import pandas as pd
	import gradio as gr
	import h5py
	TF_ENABLE_ONEDNN_OPTS=0

	root_path ="./model/"
	num2label = {0:"Neutral", 1: "Calm", 2:"Happy", 3:"Sad", 4:"Angry", 5:"Fearful", 6:"Disgust", 7:"Surprised"}

	SVM_spectral = joblib.load(root_path + "SVM_spectral.joblib")
	SVM_prosodic = joblib.load(root_path + "SVM_prosodic.joblib")
	SVM_full = joblib.load(root_path + "SVM_full.joblib")
	SVM_mfcc = joblib.load(root_path + "SVM_mfcc.joblib")

	NB_spectral = joblib.load(root_path + "NB_spectral.joblib")
	NB_prosodic = joblib.load(root_path + "NB_prosodic.joblib")
	NB_full = joblib.load(root_path + "NB_full.joblib")
	NB_mfcc = joblib.load(root_path + "NB_mfcc.joblib")

	DT_spectral = joblib.load(root_path + "DT_spectral.joblib")
	DT_prosodic = joblib.load(root_path + "DT_prosodic.joblib")
	DT_full = joblib.load(root_path + "DT_full.joblib")
	DT_mfcc = joblib.load(root_path + "DT_mfcc.joblib")


	MLP_spectral = joblib.load(root_path + "MLP_spectral.joblib")
	MLP_prosodic = joblib.load(root_path + "MLP_prosodic.joblib")
	MLP_full = joblib.load(root_path + "MLP_full.joblib")
	MLP_mfcc = joblib.load(root_path + "MLP_mfcc.joblib")

	RF_spectral = joblib.load(root_path + "RF_spectral.joblib")
	RF_prosodic = joblib.load(root_path + "RF_prosodic.joblib")
	RF_full = joblib.load(root_path + "RF_full.joblib")
	RF_mfcc = joblib.load(root_path + "RF_mfcc.joblib")

	def load_model_from_h5(file_path):
	with h5py.File(file_path, 'r') as file:
	model = load_model(file, compile=False)
	return model

	LSTM_spectral = load_model_from_h5(root_path + "LSTM_spectral.h5")
	LSTM_prosodic = load_model_from_h5(root_path + "LSTM_prosodic.h5")
	LSTM_full = load_model_from_h5(root_path + "LSTM_full.h5")
	LSTM_mfcc = load_model_from_h5(root_path + "LSTM_mfcc.h5")

	LSTM_CNN_spectral = load_model_from_h5(root_path + "LSTM_CNN_spectral.h5")
	LSTM_CNN_prosodic = load_model_from_h5(root_path + "LSTM_CNN_prosodic.h5")
	LSTM_CNN_full = load_model_from_h5(root_path + "LSTM_CNN_full.h5")
	LSTM_CNN_mfcc = load_model_from_h5(root_path + "LSTM_CNN_mfcc.h5")

	CNN_spectral = load_model_from_h5(root_path + "CNN_spectral.h5")
	CNN_prosodic = load_model_from_h5(root_path + "CNN_prosodic.h5")
	CNN_full = load_model_from_h5(root_path + "CNN_full.h5")
	CNN_mfcc = load_model_from_h5(root_path + "CNN_mfcc.h5")

	total_model = {"SVM": {'mfcc': SVM_mfcc, 'spectral': SVM_spectral, 'prosodic':SVM_prosodic, 'full':SVM_full},
	"NB": {'mfcc': NB_mfcc, 'spectral': NB_spectral, 'prosodic': NB_prosodic, 'full': NB_full},
	"DT": {'mfcc': DT_mfcc, 'spectral': DT_spectral, 'prosodic': DT_prosodic, 'full': DT_full},
	"MLP": {'mfcc': MLP_mfcc, 'spectral': MLP_spectral, 'prosodic':MLP_prosodic, 'full':MLP_full},
	"RF": {'mfcc': RF_mfcc, 'spectral': RF_spectral, 'prosodic': RF_prosodic, 'full': RF_full},
	"LSTM": {'mfcc': LSTM_mfcc, 'spectral': LSTM_spectral, 'prosodic': LSTM_prosodic, 'full': LSTM_full},
	"LSTM_CNN": {'mfcc': LSTM_CNN_mfcc, 'spectral': LSTM_CNN_spectral, 'prosodic': LSTM_CNN_prosodic, 'full': LSTM_CNN_full},
	"CNN": {'mfcc': CNN_mfcc, 'spectral': CNN_spectral, 'prosodic': CNN_prosodic, 'full': CNN_full}
	}

	spectral_scaler = joblib.load(root_path + 'spectral_features_standard_scaler.joblib')
	prosodic_scaler = joblib.load(root_path + 'prosodic_features_standard_scaler.joblib')
	full_scaler = joblib.load(root_path + 'full_features_standard_scaler.joblib')
	mfcc_scaler = joblib.load(root_path + 'mfcc_features_standard_scaler.joblib')
	scaler = {'mfcc': mfcc_scaler, 'spectral': spectral_scaler, 'prosodic': prosodic_scaler, 'full': full_scaler}

	def Load_audio(audio_path):
	# Đọc file âm thanh và tần số lấy mẫu
	y, sr = librosa.load(audio_path, sr=48000)
	return y

	# Bạn có thể sử dụng y và sr cho các mục đích xử lý âm thanh tiếp theo

	def Spectral_extract_features(audio): # data là một file âm thanh thôi

	mfccs = librosa.feature.mfcc(y=audio, n_mfcc=40) # sr=sr,

	chroma = librosa.feature.chroma_stft(y=audio)

	spectral_contrast = librosa.feature.spectral_contrast(y=audio)

	tonal_centroid = librosa.feature.tonnetz(y=audio)

	mel_spectrogram = librosa.feature.melspectrogram(y=audio)
	feature_vector = np.concatenate((mfccs.mean(axis=1), chroma.mean(axis=1), spectral_contrast.mean(axis=1), tonal_centroid.mean(axis = 1), mel_spectrogram.mean(axis = 1)))

	return np.array(feature_vector)

	def mfcc_extract_features(audio):
	mfccs = librosa.feature.mfcc(y=audio, n_mfcc=40) # sr=sr,
	mfcc_features = mfccs.mean(axis=1)
	return mfcc_features

	def Prosodic_extract_features(audio):

	pitch, _ = librosa.piptrack(y=audio, n_fft=128, hop_length = 512)
	#print("pitch:", pitch.mean(axis=1)) # ok
	duration = librosa.get_duration(y=audio)
	#print("duration:",duration) # ok
	energy = librosa.feature.rms(y=audio)
	#print("energy:", energy.shape)
	duration = np.array([duration]).reshape(1,1)
	#print("duration:", duration.shape)
	feature_vector = np.concatenate((pitch.mean(axis=1), duration.mean(axis=1), energy.mean(axis=1)))
	return np.array(feature_vector)

	def Spectral_Prosodic(audio):
	Spectral_features = Spectral_extract_features(audio)
	Prosodic_features = Prosodic_extract_features(audio)
	full_features = np.concatenate((Spectral_features, Prosodic_features))
	return full_features

	def Total_features(audio, scaler):
	features = {}
	features['spectral'] = scaler['spectral'].transform(Spectral_extract_features(audio).reshape(1, -1))
	features['prosodic'] = scaler['prosodic'].transform(Prosodic_extract_features(audio).reshape(1, -1))
	features['full'] = scaler['full'].transform(Spectral_Prosodic(audio).reshape(1, -1))
	features['mfcc'] = scaler['mfcc'].transform(mfcc_extract_features(audio).reshape(1, -1))
	return features



	def total_predict(feature, total_model): # feature là một dict tổng hợp 4 loại đặc trưng
	result = {'mfcc': {}, 'spectral' : {}, 'prosodic': {}, 'full': {} }
	f_keys = ['mfcc', 'spectral', 'prosodic', 'full']
	ML = ['SVM', 'NB', 'DT', 'MLP', 'RF']
	m_keys = ['SVM', 'NB', 'DT', 'MLP', 'RF', 'LSTM', 'LSTM_CNN', 'CNN']
	for f in f_keys:
	for m in m_keys:
	try:
	if m in ML:
	model = total_model[m][f]
	result[f][m] = num2label[model.predict(feature[f])[0]]
	else:
	model = total_model[m][f]
	temp = [np.array(feature[f]).reshape((1,-1))]
	y_pred = model.predict(temp)
	y_pred_labels = np.argmax(y_pred, axis=1)[0]
	result[f][m] = num2label[y_pred_labels]
	except:
	print(f, m)
	return result

	# def main_function(audio_path, scaler, total_model):
	# audio = Load_audio(audio_path)
	# feature = Total_features(audio, scaler)
	# labels = total_predict(feature, total_model)
	# table = pd.DataFrame.from_dict(labels).T
	# return table
	def main_function(audio_path, scaler, total_model):
	audio = Load_audio(audio_path)
	feature = Total_features(audio, scaler)
	labels = total_predict(feature, total_model)
	table = pd.DataFrame.from_dict(labels).T
	table.insert(0, 'Đặc trưng', ['mfcc', 'spectral', 'prosodic', 'full'])
	return table

	def main_interface(audio_file):
	# print("đường dẫn", audio_file)
	# sr, audio_data = audio_file
	# print(sr, audio_data)
	# if 1:
	# audio_data = audio_data.astype(float)
	# audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=48000)
	# print("đã đọc lại file")
	# else:
	# pass
	# # audio_path = "./uploaded.wav"
	# # write(audio_path, 48000, np.int16(audio_data))
	# # print("đã lưu")
	result_table = main_function(audio_file, scaler, total_model)
	return result_table


	# Create Gradio Interface
	iface = gr.Interface(
	fn=main_interface,
	inputs=gr.Audio(type= 'filepath'),
	outputs=gr.Dataframe(headers=['Đặc trưng', 'SVM', 'NB', 'DT', 'MLP', 'RF', 'LSTM', 'LSTM_CNN', 'CNN']),
	)

	# Launch the Gradio Interface
	iface.launch()