Spaces:

ThuyNT03
/

CS337_demo

Build error

App Files Files Community

ThuyNT03 commited on Dec 18, 2023

Commit

7aac284

•

1 Parent(s): c0d3f36

Upload 39 files

Browse files

Files changed (39) hide show

app.py +192 -0
model/CNN_full.h5 +3 -0
model/CNN_mfcc.h5 +3 -0
model/CNN_prosodic.h5 +3 -0
model/CNN_spectral.h5 +3 -0
model/DT_full.joblib +3 -0
model/DT_mfcc.joblib +3 -0
model/DT_prosodic.joblib +3 -0
model/DT_spectral.joblib +3 -0
model/LSTM_CNN_full.h5 +3 -0
model/LSTM_CNN_mfcc.h5 +3 -0
model/LSTM_CNN_prosodic.h5 +3 -0
model/LSTM_CNN_spectral.h5 +3 -0
model/LSTM_full.h5 +3 -0
model/LSTM_mfcc.h5 +3 -0
model/LSTM_prosodic.h5 +3 -0
model/LSTM_spectral.h5 +3 -0
model/MLP_full.joblib +3 -0
model/MLP_mfcc.joblib +3 -0
model/MLP_prosodic.joblib +3 -0
model/MLP_spectral.joblib +3 -0
model/NB_full.joblib +3 -0
model/NB_mfcc.joblib +3 -0
model/NB_prosodic.joblib +3 -0
model/NB_spectral.joblib +3 -0
model/RF_full.joblib +3 -0
model/RF_mfcc.joblib +3 -0
model/RF_prosodic.joblib +3 -0
model/RF_spectral.joblib +3 -0
model/SVM_full.joblib +3 -0
model/SVM_mfcc.joblib +3 -0
model/SVM_prosodic.joblib +3 -0
model/SVM_spectral.joblib +3 -0
model/finetune_wav2vec_base/runs/Nov13_15-31-38_8a5ba29056be/events.out.tfevents.1699889504.8a5ba29056be.5820.0 +3 -0
model/full_features_standard_scaler.joblib +3 -0
model/mfcc_features_standard_scaler.joblib +3 -0
model/prosodic_features_standard_scaler.joblib +3 -0
model/spectral_features_standard_scaler.joblib +3 -0
requirements.txt +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,192 @@

+import librosa
+import joblib
+from keras.models import load_model
+import numpy as np
+import pandas as pd
+import gradio as gr
+import h5py
+TF_ENABLE_ONEDNN_OPTS=0
+root_path ="./model/"
+num2label = {0:"Neutral", 1: "Calm", 2:"Happy", 3:"Sad", 4:"Angry", 5:"Fearful", 6:"Disgust", 7:"Surprised"}
+SVM_spectral = joblib.load(root_path + "SVM_spectral.joblib")
+SVM_prosodic = joblib.load(root_path + "SVM_prosodic.joblib")
+SVM_full = joblib.load(root_path + "SVM_full.joblib")
+SVM_mfcc = joblib.load(root_path + "SVM_mfcc.joblib")
+NB_spectral = joblib.load(root_path + "NB_spectral.joblib")
+NB_prosodic = joblib.load(root_path + "NB_prosodic.joblib")
+NB_full = joblib.load(root_path + "NB_full.joblib")
+NB_mfcc = joblib.load(root_path + "NB_mfcc.joblib")
+DT_spectral = joblib.load(root_path + "DT_spectral.joblib")
+DT_prosodic = joblib.load(root_path + "DT_prosodic.joblib")
+DT_full = joblib.load(root_path + "DT_full.joblib")
+DT_mfcc = joblib.load(root_path + "DT_mfcc.joblib")
+MLP_spectral = joblib.load(root_path + "MLP_spectral.joblib")
+MLP_prosodic = joblib.load(root_path + "MLP_prosodic.joblib")
+MLP_full = joblib.load(root_path + "MLP_full.joblib")
+MLP_mfcc = joblib.load(root_path + "MLP_mfcc.joblib")
+RF_spectral = joblib.load(root_path + "RF_spectral.joblib")
+RF_prosodic = joblib.load(root_path + "RF_prosodic.joblib")
+RF_full = joblib.load(root_path + "RF_full.joblib")
+RF_mfcc = joblib.load(root_path + "RF_mfcc.joblib")
+def load_model_from_h5(file_path):
+    with h5py.File(file_path, 'r') as file:
+        model = load_model(file, compile=False)
+    return model
+LSTM_spectral = load_model_from_h5(root_path + "LSTM_spectral.h5")
+LSTM_prosodic = load_model_from_h5(root_path + "LSTM_prosodic.h5")
+LSTM_full = load_model_from_h5(root_path + "LSTM_full.h5")
+LSTM_mfcc = load_model_from_h5(root_path + "LSTM_mfcc.h5")
+LSTM_CNN_spectral = load_model_from_h5(root_path + "LSTM_CNN_spectral.h5")
+LSTM_CNN_prosodic = load_model_from_h5(root_path + "LSTM_CNN_prosodic.h5")
+LSTM_CNN_full = load_model_from_h5(root_path + "LSTM_CNN_full.h5")
+LSTM_CNN_mfcc = load_model_from_h5(root_path + "LSTM_CNN_mfcc.h5")
+CNN_spectral = load_model_from_h5(root_path + "CNN_spectral.h5")
+CNN_prosodic = load_model_from_h5(root_path + "CNN_prosodic.h5")
+CNN_full = load_model_from_h5(root_path + "CNN_full.h5")
+CNN_mfcc = load_model_from_h5(root_path + "CNN_mfcc.h5")
+total_model = {"SVM": {'mfcc': SVM_mfcc, 'spectral': SVM_spectral, 'prosodic':SVM_prosodic, 'full':SVM_full},
+               "NB": {'mfcc': NB_mfcc, 'spectral': NB_spectral, 'prosodic': NB_prosodic, 'full': NB_full},
+               "DT": {'mfcc': DT_mfcc, 'spectral': DT_spectral, 'prosodic': DT_prosodic, 'full': DT_full},
+               "MLP": {'mfcc': MLP_mfcc, 'spectral': MLP_spectral, 'prosodic':MLP_prosodic, 'full':MLP_full},
+               "RF": {'mfcc': RF_mfcc, 'spectral': RF_spectral, 'prosodic': RF_prosodic, 'full': RF_full},
+               "LSTM": {'mfcc': LSTM_mfcc, 'spectral': LSTM_spectral, 'prosodic': LSTM_prosodic, 'full': LSTM_full},
+               "LSTM_CNN": {'mfcc': LSTM_CNN_mfcc, 'spectral': LSTM_CNN_spectral, 'prosodic': LSTM_CNN_prosodic, 'full': LSTM_CNN_full},
+               "CNN": {'mfcc': CNN_mfcc, 'spectral': CNN_spectral, 'prosodic': CNN_prosodic, 'full': CNN_full}
+               }
+spectral_scaler = joblib.load(root_path + 'spectral_features_standard_scaler.joblib')
+prosodic_scaler = joblib.load(root_path + 'prosodic_features_standard_scaler.joblib')
+full_scaler = joblib.load(root_path + 'full_features_standard_scaler.joblib')
+mfcc_scaler = joblib.load(root_path + 'mfcc_features_standard_scaler.joblib')
+scaler = {'mfcc': mfcc_scaler, 'spectral': spectral_scaler, 'prosodic': prosodic_scaler, 'full': full_scaler}
+def Load_audio(audio_path):
+  # Đọc file âm thanh và tần số lấy mẫu
+  y, sr = librosa.load(audio_path, sr=48000)
+  return y
+# Bạn có thể sử dụng y và sr cho các mục đích xử lý âm thanh tiếp theo
+def Spectral_extract_features(audio): # data là một file âm thanh thôi
+    mfccs = librosa.feature.mfcc(y=audio, n_mfcc=40) # sr=sr,
+    chroma = librosa.feature.chroma_stft(y=audio)
+    spectral_contrast = librosa.feature.spectral_contrast(y=audio)
+    tonal_centroid = librosa.feature.tonnetz(y=audio)
+    mel_spectrogram = librosa.feature.melspectrogram(y=audio)
+    feature_vector = np.concatenate((mfccs.mean(axis=1), chroma.mean(axis=1), spectral_contrast.mean(axis=1), tonal_centroid.mean(axis = 1), mel_spectrogram.mean(axis = 1)))
+    return np.array(feature_vector)
+def mfcc_extract_features(audio):
+    mfccs = librosa.feature.mfcc(y=audio, n_mfcc=40) # sr=sr,
+    mfcc_features = mfccs.mean(axis=1)
+    return mfcc_features
+def Prosodic_extract_features(audio):
+    pitch, _ = librosa.piptrack(y=audio, n_fft=128, hop_length = 512)
+    #print("pitch:",  pitch.mean(axis=1)) # ok
+    duration = librosa.get_duration(y=audio)
+    #print("duration:",duration) # ok
+    energy = librosa.feature.rms(y=audio)
+    #print("energy:", energy.shape)
+    duration = np.array([duration]).reshape(1,1)
+    #print("duration:", duration.shape)
+    feature_vector = np.concatenate((pitch.mean(axis=1), duration.mean(axis=1), energy.mean(axis=1)))
+    return np.array(feature_vector)
+def Spectral_Prosodic(audio):
+  Spectral_features = Spectral_extract_features(audio)
+  Prosodic_features = Prosodic_extract_features(audio)
+  full_features = np.concatenate((Spectral_features, Prosodic_features))
+  return full_features
+def Total_features(audio, scaler):
+  features = {}
+  features['spectral'] = scaler['spectral'].transform(Spectral_extract_features(audio).reshape(1, -1))
+  features['prosodic'] = scaler['prosodic'].transform(Prosodic_extract_features(audio).reshape(1, -1))
+  features['full'] = scaler['full'].transform(Spectral_Prosodic(audio).reshape(1, -1))
+  features['mfcc'] = scaler['mfcc'].transform(mfcc_extract_features(audio).reshape(1, -1))
+  return features
+def total_predict(feature, total_model): # feature là một dict tổng hợp 4 loại đặc trưng
+  result = {'mfcc': {}, 'spectral' : {}, 'prosodic': {}, 'full': {} }
+  f_keys = ['mfcc', 'spectral', 'prosodic', 'full']
+  ML = ['SVM', 'NB', 'DT', 'MLP', 'RF']
+  m_keys = ['SVM', 'NB', 'DT', 'MLP', 'RF', 'LSTM', 'LSTM_CNN', 'CNN']
+  for f in f_keys:
+    for m in m_keys:
+      try:
+        if m in ML:
+          model = total_model[m][f]
+          result[f][m] = num2label[model.predict(feature[f])[0]]
+        else:
+          model = total_model[m][f]
+          temp = [np.array(feature[f]).reshape((1,-1))]
+          y_pred = model.predict(temp)
+          y_pred_labels = np.argmax(y_pred, axis=1)[0]
+          result[f][m] = num2label[y_pred_labels]
+      except:
+        print(f, m)
+  return result
+# def main_function(audio_path, scaler, total_model):
+#   audio = Load_audio(audio_path)
+#   feature = Total_features(audio, scaler)
+#   labels = total_predict(feature, total_model)
+#   table = pd.DataFrame.from_dict(labels).T
+#   return table
+def main_function(audio_path, scaler, total_model):
+  audio = Load_audio(audio_path)
+  feature = Total_features(audio, scaler)
+  labels = total_predict(feature, total_model)
+  table = pd.DataFrame.from_dict(labels).T
+  table.insert(0, 'Đặc trưng', ['mfcc', 'spectral', 'prosodic', 'full'])
+  return table
+def main_interface(audio_file):
+    # print("đường dẫn", audio_file)
+    # sr, audio_data = audio_file
+    # print(sr, audio_data)
+    # if 1:
+    #     audio_data = audio_data.astype(float)
+    #     audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=48000)
+    #     print("đã đọc lại file")
+    # else:
+    #     pass
+    # # audio_path = "./uploaded.wav"
+    # # write(audio_path, 48000, np.int16(audio_data))
+    # # print("đã lưu")
+    result_table = main_function(audio_file, scaler, total_model)
+    return result_table
+# Create Gradio Interface
+iface = gr.Interface(
+    fn=main_interface,
+    inputs=gr.Audio(type= 'filepath'),
+    outputs=gr.Dataframe(headers=['Đặc trưng', 'SVM', 'NB', 'DT', 'MLP', 'RF', 'LSTM', 'LSTM_CNN', 'CNN']),
+)
+# Launch the Gradio Interface
+iface.launch()

model/CNN_full.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c3545a62d07920223acaa1de7f22bf6a5b4409471eaa1c7311ab090336b550d
+size 2503312

model/CNN_mfcc.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fda65ce37cc725a0d7d32d212b3e0c2518d7a4e1e8f81eb1c4a4543e376d5dd3
+size 2392736

model/CNN_prosodic.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb6555c9b3cfffee7e60b6a401eb88db5d034526cba71c5cc401672a61222d8a
+size 2405024

model/CNN_spectral.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1bf7565b61d30b2daf8a41acac95fa8f00c62de40b677f77eeba0cd6712e3387
+size 2470560

model/DT_full.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3adc861f7b0c5f0914584daba15b14e96d81468c1f04ae8fa174c56809b2a167
+size 32169

model/DT_mfcc.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a4bc697799ebf39051d12b9bf1aa00883d13e4709b11435d0eb40a011aa6dc7
+size 32889

model/DT_prosodic.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:feb930d9f8325f801231dcb9c7615e54097ead3c869bf7a88d36cef784907e0f
+size 59049

model/DT_spectral.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e702b141c2a643b76890765f83d08b73366e4f21b3288156bfca342733d61f29
+size 31689

model/LSTM_CNN_full.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd6e1f630d9539ab35943ef3a13385f30f82fdc193c9cc6ac91c94c750a52c47
+size 4267408

model/LSTM_CNN_mfcc.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6009644eccef5f7f849b02fcdf8adbf8d1426284b430f070f18c0a03ea590e6b
+size 4267408

model/LSTM_CNN_prosodic.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9bde39f6906bd2eef95b5e12fa3eebca7ef6751e10f7601596127d9edfb0d618
+size 4267432

model/LSTM_CNN_spectral.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b70e987559b0e04e7e014d6f57bfcd76cf6c45cc9a1e7689c03b7002a987fe46
+size 4267432

model/LSTM_full.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21d48ca35abab2b0ac1973be60b43efc44cc442502e55844d315a8519e2c6d22
+size 6219560

model/LSTM_mfcc.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bee7322b6a4a316265581662abd4fba375293269d856b7062bc6a0fddca73f4
+size 6219560

model/LSTM_prosodic.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f0cec70f95186490a70581a981ee7bf5449311b361765caa70ffcbcce9ac2f6
+size 6219560

model/LSTM_spectral.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bde863d3b7f8c3d59c0a7bd4661d0cf8f38d0e9e03954f410316dbcdfbb9ad11
+size 6219560

model/MLP_full.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14e811efeed31cdb1599f6870aedec21f29dbd9189e55c918a6f90d2b5420165
+size 1622848

model/MLP_mfcc.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4f5396dfaebac0d1376fe11d7ea2b03feeb79cfdff3b332be65e001c65a9aef
+size 157936

model/MLP_prosodic.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0972d7d39e864fa62fb1c9eb1a2244d129a9868f71dc4d48060e7bfdf7782d05
+size 466880

model/MLP_spectral.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2a353c3202d7f5ad4c086ce6ffd7b20a4a7d4b16b04c29586ec72f71c58fb0e
+size 1222304

model/NB_full.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8896d2487e8e445b17090ec1d314b50b0e417644303aee85551bb968d1149e37
+size 34199

model/NB_mfcc.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:709e9f1419c08733593daba2e27f7c5739a5a9a10807e27748659845d67e03d9
+size 6039

model/NB_prosodic.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b5c07f1fc3eb9c79930d459afb12e283604e082e23aaafa946e2ac4b1636add
+size 9495

model/NB_spectral.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7a9d04273a40db6e422c61b737da5ace389ac23bc274eb2b1f1e9326dc425ca
+size 25623

model/RF_full.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f32918de6d3308a719f4e783e544fda054c2fd851e507bd810312b69c01d045
+size 526754681

model/RF_mfcc.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19c71556418f3647a53c7235286626a5bd0c9c996156b3bdf79f528b1a525186
+size 469029625

model/RF_prosodic.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:29b20e92c7804989605bbfdc7d0306f672d6712bd6808fd020e2257dc25a2891
+size 531140425

model/RF_spectral.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e58c8b5bd507085a3e2f2171cef2e83c67bf3941c804026e3706c8e1ce56db7
+size 526621225

model/SVM_full.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6843682d8c1e8d750abbcf2340e642dff24d298702d0852f1f5c44ebf3773f6d
+size 5031223

model/SVM_mfcc.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:987a47f81a0bb1d180768695a6c577c894aeb18ce2c3f74c73f5d36e2b1bfaa8
+size 872183

model/SVM_prosodic.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b7390b0915fdfca49bef690ba62c56b8a04eaaa6e99c71d0830eb326be62c35
+size 1726263

model/SVM_spectral.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a239cc65ffaae4b1c3a5f5cc654ecae697493210043acc46878aee13800d31e9
+size 3669319

model/finetune_wav2vec_base/runs/Nov13_15-31-38_8a5ba29056be/events.out.tfevents.1699889504.8a5ba29056be.5820.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba4e8edb9165ae3c73be83027482b673c468b37ebb7d81d595737fc148a7239b
+size 6969

model/full_features_standard_scaler.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aafe7f03fb68ee68fb17067f8923b5e2bfc2b0671d1bde078b2abb746f7aa511
+size 6855

model/mfcc_features_standard_scaler.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4c08c9200c4f4bcb4ee5d2ea0876495aadd0354a82d6f9ad01fcc6a070ee3de
+size 1575

model/prosodic_features_standard_scaler.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad281b77b47be60a2e98021b82fcc676493334d168f387c70bd56d1d416babe4
+size 2207

model/spectral_features_standard_scaler.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:527c6ca4cffbc02a3c06f54cefb98a3938b863731931416b31f10e8eecdf819e
+size 5231

requirements.txt ADDED Viewed

Binary file (4.34 kB). View file