pragnakalp commited on
Commit
1c256c5
·
1 Parent(s): be5077b

Upload ser_detection.py

Browse files
Files changed (1) hide show
  1. ser_detection.py +149 -0
ser_detection.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import, division, print_function, unicode_literals
2
+
3
+ from flask import Flask, make_response, render_template, request, jsonify, redirect, url_for, send_from_directory
4
+ from flask_cors import CORS
5
+
6
+ import sys
7
+ import os
8
+
9
+ import librosa
10
+ import librosa.display
11
+ import numpy as np
12
+
13
+ import warnings
14
+ import tensorflow as tf
15
+ from keras.models import Sequential
16
+ from keras.layers import Dense
17
+ from keras.utils import to_categorical
18
+ from keras.layers import Flatten, Dropout, Activation
19
+ from keras.layers import Conv2D, MaxPooling2D
20
+ from keras.layers.normalization import BatchNormalization
21
+ from sklearn.model_selection import train_test_split
22
+ from tqdm import tqdm
23
+ # import scipy.io.wavfile as wav
24
+ # from speechpy.feature import mfcc
25
+
26
+ import pyaudio
27
+ import wave
28
+
29
+ warnings.filterwarnings("ignore")
30
+
31
+ app = Flask(__name__)
32
+ CORS(app)
33
+
34
+ classLabels = ('Angry', 'Fear', 'Disgust', 'Happy', 'Sad', 'Surprised', 'Neutral')
35
+ numLabels = len(classLabels)
36
+ in_shape = (39,216)
37
+ model = Sequential()
38
+
39
+ model.add(Conv2D(8, (13, 13), input_shape=(in_shape[0], in_shape[1], 1)))
40
+ model.add(BatchNormalization(axis=-1))
41
+ model.add(Activation('relu'))
42
+ model.add(Conv2D(8, (13, 13)))
43
+ model.add(BatchNormalization(axis=-1))
44
+ model.add(Activation('relu'))
45
+ model.add(MaxPooling2D(pool_size=(2, 1)))
46
+ model.add(Conv2D(8, (3, 3)))
47
+ model.add(BatchNormalization(axis=-1))
48
+ model.add(Activation('relu'))
49
+ model.add(Conv2D(8, (1, 1)))
50
+ model.add(BatchNormalization(axis=-1))
51
+ model.add(Activation('relu'))
52
+ model.add(MaxPooling2D(pool_size=(2, 1)))
53
+ model.add(Flatten())
54
+ model.add(Dense(64))
55
+ model.add(BatchNormalization())
56
+ model.add(Activation('relu'))
57
+ model.add(Dropout(0.2))
58
+
59
+ model.add(Dense(numLabels, activation='softmax'))
60
+ model.compile(loss='binary_crossentropy', optimizer='adam',
61
+ metrics=['accuracy'])
62
+ # print(model.summary(), file=sys.stderr)
63
+
64
+ model.load_weights('speech_emotion_detection_ravdess_savee.h5')
65
+
66
+ def detect_emotion(file_name):
67
+ X, sample_rate = librosa.load(file_name, res_type='kaiser_best',duration=2.5,sr=22050*2,offset=0.5)
68
+ sample_rate = np.array(sample_rate)
69
+ mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=39)
70
+ feature = mfccs
71
+ print("Feature_shape =>",feature.shape)
72
+ feature = feature.reshape(39, 216, 1)
73
+ result = classLabels[np.argmax(model.predict(np.array([feature])))]
74
+ print("Result ==> ",result)
75
+ return result
76
+
77
+ @app.route("/speech-emotion-recognition/")
78
+ def emotion_detection():
79
+ filename = 'audio_files/Happy.wav'
80
+
81
+ result = detect_emotion(filename)
82
+ return result
83
+
84
+ @app.route("/record_audio/")
85
+ def record_audio():
86
+ CHUNK = 1024
87
+ FORMAT = pyaudio.paInt16 #paInt8
88
+ CHANNELS = 2
89
+ RATE = 44100 #sample rate
90
+ RECORD_SECONDS = 4
91
+
92
+ fileList = os.listdir('recorded_audio')
93
+ print("Audio File List ==> ",fileList)
94
+
95
+ new_wav_file = ""
96
+
97
+ if(fileList):
98
+ filename_list = []
99
+ for i in fileList:
100
+ print(i)
101
+ filename = i.split('.')[0]
102
+ filename_list.append(filename)
103
+
104
+ max_file = max(filename_list)
105
+ print(type(max_file))
106
+
107
+ new_wav_file = int(max_file) + 1
108
+ else:
109
+ new_wav_file="1"
110
+
111
+ new_wav_file = str(new_wav_file) + ".wav"
112
+ filepath = os.path.join('recorded_audio', new_wav_file)
113
+ WAVE_OUTPUT_FILENAME = filepath
114
+
115
+ print(WAVE_OUTPUT_FILENAME)
116
+
117
+ p = pyaudio.PyAudio()
118
+
119
+ stream = p.open(format=FORMAT,
120
+ channels=CHANNELS,
121
+ rate=RATE,
122
+ input=True,
123
+ frames_per_buffer=CHUNK) #buffer
124
+
125
+ print("* recording")
126
+
127
+ frames = []
128
+
129
+ for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
130
+ data = stream.read(CHUNK)
131
+ frames.append(data) # 2 bytes(16 bits) per channel
132
+
133
+ print("* done recording")
134
+
135
+ stream.stop_stream()
136
+ stream.close()
137
+ p.terminate()
138
+
139
+ wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
140
+ wf.setnchannels(CHANNELS)
141
+ wf.setsampwidth(p.get_sample_size(FORMAT))
142
+ wf.setframerate(RATE)
143
+ wf.writeframes(b''.join(frames))
144
+ wf.close()
145
+ return "Audio Recorded"
146
+
147
+ if __name__ == "__main__":
148
+ app.run()
149
+