Commit
·
6a12f17
1
Parent(s):
64bc3ed
Update app.py
Browse files
app.py
CHANGED
@@ -1,56 +1,74 @@
|
|
1 |
-
import tensorflow as tf
|
2 |
-
import gradio as gr
|
3 |
import librosa
|
4 |
import numpy as np
|
5 |
-
import
|
|
|
6 |
|
7 |
-
# File Paths
|
8 |
-
model_path = "audio_clf_model"
|
9 |
-
encoding_path = "label_encodings.json"
|
10 |
-
examples_path = "examples"
|
11 |
|
12 |
-
#
|
|
|
|
|
13 |
model = tf.keras.models.load_model(model_path)
|
14 |
-
classes = json.load(open(encoding_path, "r"))
|
15 |
-
labels = [classes[str(i)] for i in range(len(classes))]
|
16 |
-
labels = ["negative", "positive"]#classes[target].values()
|
17 |
|
18 |
-
def pre_processor(audio_path):
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
|
|
29 |
|
30 |
def clsf(audio_path):
|
31 |
|
32 |
# extracting the features
|
33 |
-
features =
|
34 |
-
print(len(features))
|
35 |
|
36 |
# batching the data
|
37 |
sample = np.expand_dims(features, axis=0)
|
38 |
|
39 |
# predicting
|
40 |
-
preds = model.predict(sample)
|
41 |
-
|
42 |
# results
|
43 |
-
confidences = {
|
44 |
|
45 |
return confidences
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
# GUI Component
|
48 |
gui_params = {
|
49 |
"fn":clsf,
|
50 |
"inputs":gr.Audio(source="upload", type="filepath"),
|
51 |
"outputs" : "label",
|
52 |
#live=True,
|
53 |
-
"examples" :
|
|
|
54 |
}
|
55 |
demo = gr.Interface(**gui_params)
|
56 |
|
|
|
|
|
|
|
1 |
import librosa
|
2 |
import numpy as np
|
3 |
+
import tensorflow as tf
|
4 |
+
import gradio as gr
|
5 |
|
|
|
|
|
|
|
|
|
6 |
|
7 |
+
# File Paths
|
8 |
+
model_path = "sound_emotion_rec_model"
|
9 |
+
categories = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'ps', 'sad']
|
10 |
model = tf.keras.models.load_model(model_path)
|
|
|
|
|
|
|
11 |
|
|
|
12 |
|
13 |
+
# loading the files
|
14 |
+
def extract_mfcc(audio_path, duration=3, offset=0.5, n_mfcc=40):
|
15 |
+
# loading the data
|
16 |
+
y, sr = librosa.load(audio_path, duration=duration, offset=offset)
|
17 |
+
|
18 |
+
# extracting the voice feature
|
19 |
+
mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc).T, axis=0)
|
20 |
+
|
21 |
+
return mfcc
|
22 |
+
|
23 |
+
def prepare_data(audio_path):
|
24 |
|
25 |
+
# extracting the features
|
26 |
+
features = extract_mfcc(audio_path)
|
27 |
+
|
28 |
+
# adjusting the shape
|
29 |
+
features = [x for x in features]
|
30 |
+
features = np.array(features)
|
31 |
+
features = np.expand_dims(features, -1)
|
32 |
|
33 |
+
return features
|
34 |
|
35 |
def clsf(audio_path):
|
36 |
|
37 |
# extracting the features
|
38 |
+
features = prepare_data(audio_path)
|
|
|
39 |
|
40 |
# batching the data
|
41 |
sample = np.expand_dims(features, axis=0)
|
42 |
|
43 |
# predicting
|
44 |
+
preds = model.predict(sample)[0]
|
45 |
+
|
46 |
# results
|
47 |
+
confidences = {categories[i]:np.round(float(preds[i]), 3) for i in range(len(categories))}
|
48 |
|
49 |
return confidences
|
50 |
|
51 |
+
def pre_processor(audio_path):
|
52 |
+
|
53 |
+
# load the audio file
|
54 |
+
x, sample_rate = librosa.load(audio_path)
|
55 |
+
|
56 |
+
# feature extracting (mfccs is an aduio feature)
|
57 |
+
mfccs = np.mean(librosa.feature.mfcc(y=x, sr=sample_rate, n_mfcc=40).T, axis=0)
|
58 |
+
feature = mfccs
|
59 |
+
|
60 |
+
return feature
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
# GUI Component
|
65 |
gui_params = {
|
66 |
"fn":clsf,
|
67 |
"inputs":gr.Audio(source="upload", type="filepath"),
|
68 |
"outputs" : "label",
|
69 |
#live=True,
|
70 |
+
"examples" : "examples"
|
71 |
+
|
72 |
}
|
73 |
demo = gr.Interface(**gui_params)
|
74 |
|