ThuyNT03 commited on
Commit
7aac284
1 Parent(s): c0d3f36

Upload 39 files

Browse files
app.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import joblib
3
+ from keras.models import load_model
4
+ import numpy as np
5
+ import pandas as pd
6
+ import gradio as gr
7
+ import h5py
8
+ TF_ENABLE_ONEDNN_OPTS=0
9
+
10
+ root_path ="./model/"
11
+ num2label = {0:"Neutral", 1: "Calm", 2:"Happy", 3:"Sad", 4:"Angry", 5:"Fearful", 6:"Disgust", 7:"Surprised"}
12
+
13
+ SVM_spectral = joblib.load(root_path + "SVM_spectral.joblib")
14
+ SVM_prosodic = joblib.load(root_path + "SVM_prosodic.joblib")
15
+ SVM_full = joblib.load(root_path + "SVM_full.joblib")
16
+ SVM_mfcc = joblib.load(root_path + "SVM_mfcc.joblib")
17
+
18
+ NB_spectral = joblib.load(root_path + "NB_spectral.joblib")
19
+ NB_prosodic = joblib.load(root_path + "NB_prosodic.joblib")
20
+ NB_full = joblib.load(root_path + "NB_full.joblib")
21
+ NB_mfcc = joblib.load(root_path + "NB_mfcc.joblib")
22
+
23
+ DT_spectral = joblib.load(root_path + "DT_spectral.joblib")
24
+ DT_prosodic = joblib.load(root_path + "DT_prosodic.joblib")
25
+ DT_full = joblib.load(root_path + "DT_full.joblib")
26
+ DT_mfcc = joblib.load(root_path + "DT_mfcc.joblib")
27
+
28
+
29
+ MLP_spectral = joblib.load(root_path + "MLP_spectral.joblib")
30
+ MLP_prosodic = joblib.load(root_path + "MLP_prosodic.joblib")
31
+ MLP_full = joblib.load(root_path + "MLP_full.joblib")
32
+ MLP_mfcc = joblib.load(root_path + "MLP_mfcc.joblib")
33
+
34
+ RF_spectral = joblib.load(root_path + "RF_spectral.joblib")
35
+ RF_prosodic = joblib.load(root_path + "RF_prosodic.joblib")
36
+ RF_full = joblib.load(root_path + "RF_full.joblib")
37
+ RF_mfcc = joblib.load(root_path + "RF_mfcc.joblib")
38
+
39
+ def load_model_from_h5(file_path):
40
+ with h5py.File(file_path, 'r') as file:
41
+ model = load_model(file, compile=False)
42
+ return model
43
+
44
+ LSTM_spectral = load_model_from_h5(root_path + "LSTM_spectral.h5")
45
+ LSTM_prosodic = load_model_from_h5(root_path + "LSTM_prosodic.h5")
46
+ LSTM_full = load_model_from_h5(root_path + "LSTM_full.h5")
47
+ LSTM_mfcc = load_model_from_h5(root_path + "LSTM_mfcc.h5")
48
+
49
+ LSTM_CNN_spectral = load_model_from_h5(root_path + "LSTM_CNN_spectral.h5")
50
+ LSTM_CNN_prosodic = load_model_from_h5(root_path + "LSTM_CNN_prosodic.h5")
51
+ LSTM_CNN_full = load_model_from_h5(root_path + "LSTM_CNN_full.h5")
52
+ LSTM_CNN_mfcc = load_model_from_h5(root_path + "LSTM_CNN_mfcc.h5")
53
+
54
+ CNN_spectral = load_model_from_h5(root_path + "CNN_spectral.h5")
55
+ CNN_prosodic = load_model_from_h5(root_path + "CNN_prosodic.h5")
56
+ CNN_full = load_model_from_h5(root_path + "CNN_full.h5")
57
+ CNN_mfcc = load_model_from_h5(root_path + "CNN_mfcc.h5")
58
+
59
+ total_model = {"SVM": {'mfcc': SVM_mfcc, 'spectral': SVM_spectral, 'prosodic':SVM_prosodic, 'full':SVM_full},
60
+ "NB": {'mfcc': NB_mfcc, 'spectral': NB_spectral, 'prosodic': NB_prosodic, 'full': NB_full},
61
+ "DT": {'mfcc': DT_mfcc, 'spectral': DT_spectral, 'prosodic': DT_prosodic, 'full': DT_full},
62
+ "MLP": {'mfcc': MLP_mfcc, 'spectral': MLP_spectral, 'prosodic':MLP_prosodic, 'full':MLP_full},
63
+ "RF": {'mfcc': RF_mfcc, 'spectral': RF_spectral, 'prosodic': RF_prosodic, 'full': RF_full},
64
+ "LSTM": {'mfcc': LSTM_mfcc, 'spectral': LSTM_spectral, 'prosodic': LSTM_prosodic, 'full': LSTM_full},
65
+ "LSTM_CNN": {'mfcc': LSTM_CNN_mfcc, 'spectral': LSTM_CNN_spectral, 'prosodic': LSTM_CNN_prosodic, 'full': LSTM_CNN_full},
66
+ "CNN": {'mfcc': CNN_mfcc, 'spectral': CNN_spectral, 'prosodic': CNN_prosodic, 'full': CNN_full}
67
+ }
68
+
69
+ spectral_scaler = joblib.load(root_path + 'spectral_features_standard_scaler.joblib')
70
+ prosodic_scaler = joblib.load(root_path + 'prosodic_features_standard_scaler.joblib')
71
+ full_scaler = joblib.load(root_path + 'full_features_standard_scaler.joblib')
72
+ mfcc_scaler = joblib.load(root_path + 'mfcc_features_standard_scaler.joblib')
73
+ scaler = {'mfcc': mfcc_scaler, 'spectral': spectral_scaler, 'prosodic': prosodic_scaler, 'full': full_scaler}
74
+
75
+ def Load_audio(audio_path):
76
+ # Đọc file âm thanh và tần số lấy mẫu
77
+ y, sr = librosa.load(audio_path, sr=48000)
78
+ return y
79
+
80
+ # Bạn có thể sử dụng y và sr cho các mục đích xử lý âm thanh tiếp theo
81
+
82
+ def Spectral_extract_features(audio): # data là một file âm thanh thôi
83
+
84
+ mfccs = librosa.feature.mfcc(y=audio, n_mfcc=40) # sr=sr,
85
+
86
+ chroma = librosa.feature.chroma_stft(y=audio)
87
+
88
+ spectral_contrast = librosa.feature.spectral_contrast(y=audio)
89
+
90
+ tonal_centroid = librosa.feature.tonnetz(y=audio)
91
+
92
+ mel_spectrogram = librosa.feature.melspectrogram(y=audio)
93
+ feature_vector = np.concatenate((mfccs.mean(axis=1), chroma.mean(axis=1), spectral_contrast.mean(axis=1), tonal_centroid.mean(axis = 1), mel_spectrogram.mean(axis = 1)))
94
+
95
+ return np.array(feature_vector)
96
+
97
+ def mfcc_extract_features(audio):
98
+ mfccs = librosa.feature.mfcc(y=audio, n_mfcc=40) # sr=sr,
99
+ mfcc_features = mfccs.mean(axis=1)
100
+ return mfcc_features
101
+
102
+ def Prosodic_extract_features(audio):
103
+
104
+ pitch, _ = librosa.piptrack(y=audio, n_fft=128, hop_length = 512)
105
+ #print("pitch:", pitch.mean(axis=1)) # ok
106
+ duration = librosa.get_duration(y=audio)
107
+ #print("duration:",duration) # ok
108
+ energy = librosa.feature.rms(y=audio)
109
+ #print("energy:", energy.shape)
110
+ duration = np.array([duration]).reshape(1,1)
111
+ #print("duration:", duration.shape)
112
+ feature_vector = np.concatenate((pitch.mean(axis=1), duration.mean(axis=1), energy.mean(axis=1)))
113
+ return np.array(feature_vector)
114
+
115
+ def Spectral_Prosodic(audio):
116
+ Spectral_features = Spectral_extract_features(audio)
117
+ Prosodic_features = Prosodic_extract_features(audio)
118
+ full_features = np.concatenate((Spectral_features, Prosodic_features))
119
+ return full_features
120
+
121
+ def Total_features(audio, scaler):
122
+ features = {}
123
+ features['spectral'] = scaler['spectral'].transform(Spectral_extract_features(audio).reshape(1, -1))
124
+ features['prosodic'] = scaler['prosodic'].transform(Prosodic_extract_features(audio).reshape(1, -1))
125
+ features['full'] = scaler['full'].transform(Spectral_Prosodic(audio).reshape(1, -1))
126
+ features['mfcc'] = scaler['mfcc'].transform(mfcc_extract_features(audio).reshape(1, -1))
127
+ return features
128
+
129
+
130
+
131
+ def total_predict(feature, total_model): # feature là một dict tổng hợp 4 loại đặc trưng
132
+ result = {'mfcc': {}, 'spectral' : {}, 'prosodic': {}, 'full': {} }
133
+ f_keys = ['mfcc', 'spectral', 'prosodic', 'full']
134
+ ML = ['SVM', 'NB', 'DT', 'MLP', 'RF']
135
+ m_keys = ['SVM', 'NB', 'DT', 'MLP', 'RF', 'LSTM', 'LSTM_CNN', 'CNN']
136
+ for f in f_keys:
137
+ for m in m_keys:
138
+ try:
139
+ if m in ML:
140
+ model = total_model[m][f]
141
+ result[f][m] = num2label[model.predict(feature[f])[0]]
142
+ else:
143
+ model = total_model[m][f]
144
+ temp = [np.array(feature[f]).reshape((1,-1))]
145
+ y_pred = model.predict(temp)
146
+ y_pred_labels = np.argmax(y_pred, axis=1)[0]
147
+ result[f][m] = num2label[y_pred_labels]
148
+ except:
149
+ print(f, m)
150
+ return result
151
+
152
+ # def main_function(audio_path, scaler, total_model):
153
+ # audio = Load_audio(audio_path)
154
+ # feature = Total_features(audio, scaler)
155
+ # labels = total_predict(feature, total_model)
156
+ # table = pd.DataFrame.from_dict(labels).T
157
+ # return table
158
+ def main_function(audio_path, scaler, total_model):
159
+ audio = Load_audio(audio_path)
160
+ feature = Total_features(audio, scaler)
161
+ labels = total_predict(feature, total_model)
162
+ table = pd.DataFrame.from_dict(labels).T
163
+ table.insert(0, 'Đặc trưng', ['mfcc', 'spectral', 'prosodic', 'full'])
164
+ return table
165
+
166
+ def main_interface(audio_file):
167
+ # print("đường dẫn", audio_file)
168
+ # sr, audio_data = audio_file
169
+ # print(sr, audio_data)
170
+ # if 1:
171
+ # audio_data = audio_data.astype(float)
172
+ # audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=48000)
173
+ # print("đã đọc lại file")
174
+ # else:
175
+ # pass
176
+ # # audio_path = "./uploaded.wav"
177
+ # # write(audio_path, 48000, np.int16(audio_data))
178
+ # # print("đã lưu")
179
+ result_table = main_function(audio_file, scaler, total_model)
180
+ return result_table
181
+
182
+
183
+ # Create Gradio Interface
184
+ iface = gr.Interface(
185
+ fn=main_interface,
186
+ inputs=gr.Audio(type= 'filepath'),
187
+ outputs=gr.Dataframe(headers=['Đặc trưng', 'SVM', 'NB', 'DT', 'MLP', 'RF', 'LSTM', 'LSTM_CNN', 'CNN']),
188
+ )
189
+
190
+ # Launch the Gradio Interface
191
+ iface.launch()
192
+
model/CNN_full.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c3545a62d07920223acaa1de7f22bf6a5b4409471eaa1c7311ab090336b550d
3
+ size 2503312
model/CNN_mfcc.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fda65ce37cc725a0d7d32d212b3e0c2518d7a4e1e8f81eb1c4a4543e376d5dd3
3
+ size 2392736
model/CNN_prosodic.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb6555c9b3cfffee7e60b6a401eb88db5d034526cba71c5cc401672a61222d8a
3
+ size 2405024
model/CNN_spectral.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bf7565b61d30b2daf8a41acac95fa8f00c62de40b677f77eeba0cd6712e3387
3
+ size 2470560
model/DT_full.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3adc861f7b0c5f0914584daba15b14e96d81468c1f04ae8fa174c56809b2a167
3
+ size 32169
model/DT_mfcc.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a4bc697799ebf39051d12b9bf1aa00883d13e4709b11435d0eb40a011aa6dc7
3
+ size 32889
model/DT_prosodic.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feb930d9f8325f801231dcb9c7615e54097ead3c869bf7a88d36cef784907e0f
3
+ size 59049
model/DT_spectral.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e702b141c2a643b76890765f83d08b73366e4f21b3288156bfca342733d61f29
3
+ size 31689
model/LSTM_CNN_full.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd6e1f630d9539ab35943ef3a13385f30f82fdc193c9cc6ac91c94c750a52c47
3
+ size 4267408
model/LSTM_CNN_mfcc.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6009644eccef5f7f849b02fcdf8adbf8d1426284b430f070f18c0a03ea590e6b
3
+ size 4267408
model/LSTM_CNN_prosodic.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bde39f6906bd2eef95b5e12fa3eebca7ef6751e10f7601596127d9edfb0d618
3
+ size 4267432
model/LSTM_CNN_spectral.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b70e987559b0e04e7e014d6f57bfcd76cf6c45cc9a1e7689c03b7002a987fe46
3
+ size 4267432
model/LSTM_full.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21d48ca35abab2b0ac1973be60b43efc44cc442502e55844d315a8519e2c6d22
3
+ size 6219560
model/LSTM_mfcc.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bee7322b6a4a316265581662abd4fba375293269d856b7062bc6a0fddca73f4
3
+ size 6219560
model/LSTM_prosodic.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f0cec70f95186490a70581a981ee7bf5449311b361765caa70ffcbcce9ac2f6
3
+ size 6219560
model/LSTM_spectral.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bde863d3b7f8c3d59c0a7bd4661d0cf8f38d0e9e03954f410316dbcdfbb9ad11
3
+ size 6219560
model/MLP_full.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14e811efeed31cdb1599f6870aedec21f29dbd9189e55c918a6f90d2b5420165
3
+ size 1622848
model/MLP_mfcc.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4f5396dfaebac0d1376fe11d7ea2b03feeb79cfdff3b332be65e001c65a9aef
3
+ size 157936
model/MLP_prosodic.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0972d7d39e864fa62fb1c9eb1a2244d129a9868f71dc4d48060e7bfdf7782d05
3
+ size 466880
model/MLP_spectral.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2a353c3202d7f5ad4c086ce6ffd7b20a4a7d4b16b04c29586ec72f71c58fb0e
3
+ size 1222304
model/NB_full.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8896d2487e8e445b17090ec1d314b50b0e417644303aee85551bb968d1149e37
3
+ size 34199
model/NB_mfcc.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:709e9f1419c08733593daba2e27f7c5739a5a9a10807e27748659845d67e03d9
3
+ size 6039
model/NB_prosodic.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b5c07f1fc3eb9c79930d459afb12e283604e082e23aaafa946e2ac4b1636add
3
+ size 9495
model/NB_spectral.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7a9d04273a40db6e422c61b737da5ace389ac23bc274eb2b1f1e9326dc425ca
3
+ size 25623
model/RF_full.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f32918de6d3308a719f4e783e544fda054c2fd851e507bd810312b69c01d045
3
+ size 526754681
model/RF_mfcc.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19c71556418f3647a53c7235286626a5bd0c9c996156b3bdf79f528b1a525186
3
+ size 469029625
model/RF_prosodic.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29b20e92c7804989605bbfdc7d0306f672d6712bd6808fd020e2257dc25a2891
3
+ size 531140425
model/RF_spectral.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e58c8b5bd507085a3e2f2171cef2e83c67bf3941c804026e3706c8e1ce56db7
3
+ size 526621225
model/SVM_full.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6843682d8c1e8d750abbcf2340e642dff24d298702d0852f1f5c44ebf3773f6d
3
+ size 5031223
model/SVM_mfcc.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:987a47f81a0bb1d180768695a6c577c894aeb18ce2c3f74c73f5d36e2b1bfaa8
3
+ size 872183
model/SVM_prosodic.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b7390b0915fdfca49bef690ba62c56b8a04eaaa6e99c71d0830eb326be62c35
3
+ size 1726263
model/SVM_spectral.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a239cc65ffaae4b1c3a5f5cc654ecae697493210043acc46878aee13800d31e9
3
+ size 3669319
model/finetune_wav2vec_base/runs/Nov13_15-31-38_8a5ba29056be/events.out.tfevents.1699889504.8a5ba29056be.5820.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba4e8edb9165ae3c73be83027482b673c468b37ebb7d81d595737fc148a7239b
3
+ size 6969
model/full_features_standard_scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aafe7f03fb68ee68fb17067f8923b5e2bfc2b0671d1bde078b2abb746f7aa511
3
+ size 6855
model/mfcc_features_standard_scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4c08c9200c4f4bcb4ee5d2ea0876495aadd0354a82d6f9ad01fcc6a070ee3de
3
+ size 1575
model/prosodic_features_standard_scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad281b77b47be60a2e98021b82fcc676493334d168f387c70bd56d1d416babe4
3
+ size 2207
model/spectral_features_standard_scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:527c6ca4cffbc02a3c06f54cefb98a3938b863731931416b31f10e8eecdf819e
3
+ size 5231
requirements.txt ADDED
Binary file (4.34 kB). View file