Prathamesh1420 commited on
Commit
8ab4f78
·
verified ·
1 Parent(s): e08db09

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +252 -0
app.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pyttsx3
2
+ import speech_recognition as sr
3
+ from playsound import playsound
4
+ import random
5
+ import datetime
6
+ hour = datetime.datetime.now().strftime('%H:%M')
7
+ #print(hour)
8
+ date = datetime.date.today().strftime('%d/%B/%Y')
9
+ #print(date)
10
+ date = date.split('/')
11
+ #print(date)
12
+ import webbrowser as wb
13
+ import tensorflow as tf
14
+ import numpy as np
15
+ import librosa
16
+ import matplotlib.pyplot as plt
17
+ import seaborn as sns
18
+ sns.set()
19
+ from modules import commands_answers, load_agenda
20
+ commands = commands_answers.commands
21
+ answers = commands_answers.answers
22
+ #print(commands)
23
+ #print(answers)
24
+
25
+ my_name = 'Bob'
26
+
27
+ # MacOS
28
+ chrome_path = 'open -a /Applications/Google\ Chrome.app %s'
29
+ # Windows
30
+ #chrome_path = 'C:/Program Files/Google/Chrome/Application/chrome.exe %s'
31
+ # Linux
32
+ # chrome_path = '/usr/bin/google-chrome %s'
33
+
34
+ def search(sentence):
35
+ wb.get(chrome_path).open('https://www.google.com/search?q=' + sentence)
36
+
37
+ #search('python programming language')
38
+
39
+ MODEL_TYPES = ['EMOTION']
40
+ def load_model_by_name(model_type):
41
+ if model_type == MODEL_TYPES[0]:
42
+ model = tf.keras.models.load_model('models/speech_emotion_recognition.hdf5')
43
+ model_dict = list(['calm', 'happy', 'fear', 'nervous', 'neutral', 'disgust', 'surprise', 'sad'])
44
+ SAMPLE_RATE = 48000
45
+ return model, model_dict, SAMPLE_RATE
46
+
47
+ #print(load_model_by_name('EMOTION'))
48
+ #print(load_model_by_name('EMOTION')[0].summary())
49
+
50
+ model_type = 'EMOTION'
51
+ loaded_model = load_model_by_name(model_type)
52
+
53
+ def predict_sound(AUDIO, SAMPLE_RATE, plot = True):
54
+ results = []
55
+ wav_data, sample_rate = librosa.load(AUDIO, sr = SAMPLE_RATE)
56
+ #print(wav_data.shape)
57
+ #print(sample_rate)
58
+ #print(wav_data)
59
+ # ' librosa ' -> 'librosa'
60
+ # https://librosa.org/doc/main/generated/librosa.effects.trim.html
61
+ clip, index = librosa.effects.trim(wav_data, top_db=60, frame_length=512, hop_length=64)
62
+ splitted_audio_data = tf.signal.frame(clip, sample_rate, sample_rate, pad_end = True, pad_value = 0)
63
+ for i, data in enumerate(splitted_audio_data.numpy()):
64
+ #print('Audio split: ', i)
65
+ #print(data.shape)
66
+ #print(data)
67
+ # Mel frequency: https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
68
+ # PCA
69
+ mfccs_features = librosa.feature.mfcc(y = data, sr = sample_rate, n_mfcc=40)
70
+ #print(mfccs_features.shape)
71
+ #print(mfccs_features)
72
+ mfccs_scaled_features = np.mean(mfccs_features.T, axis = 0)
73
+ mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1)
74
+ #print(mfccs_scaled_features.shape)
75
+ mfccs_scaled_features = mfccs_scaled_features[:, :, np.newaxis]
76
+ # batch
77
+ #print(mfccs_scaled_features.shape)
78
+ predictions = loaded_model[0].predict(mfccs_scaled_features)
79
+ #print(predictions)
80
+ #print(predictions.sum())
81
+ if plot:
82
+ plt.figure(figsize=(len(splitted_audio_data), 5))
83
+ plt.barh(loaded_model[1], predictions[0])
84
+ plt.tight_layout()
85
+ plt.show()
86
+
87
+ predictions = predictions.argmax(axis = 1)
88
+ #print(predictions)
89
+ predictions = predictions.astype(int).flatten()
90
+ predictions = loaded_model[1][predictions[0]]
91
+ results.append(predictions)
92
+ #print(results)
93
+
94
+ result_str = 'PART ' + str(i) + ': ' + str(predictions).upper()
95
+ #print(result_str)
96
+
97
+ count_results = [[results.count(x), x] for x in set(results)]
98
+ #print(count_results)
99
+
100
+ #print(max(count_results))
101
+ return max(count_results)
102
+
103
+ #playsound('sad.wav')
104
+ #predict_sound('sad.wav', loaded_model[2], plot=False)
105
+
106
+ def play_music_youtube(emotion):
107
+ play = False
108
+ if emotion == 'sad' or emotion == 'fear':
109
+ wb.get(chrome_path).open('https://www.youtube.com/watch?v=k32IPg4dbz0&ab_channel=Amelhorm%C3%BAsicainstrumental')
110
+ play = True
111
+ if emotion == 'nervous' or emotion == 'surprise':
112
+ wb.get(chrome_path).open('https://www.youtube.com/watch?v=pWjmpSD-ph0&ab_channel=CassioToledo')
113
+ play = True
114
+ return play
115
+
116
+ #play_music_youtube('sad')
117
+ #play_music_youtube('surprise')
118
+ #emotion = predict_sound('sad.wav', loaded_model[2], plot=False)
119
+ #print(emotion)
120
+ #play_music_youtube(emotion[1])
121
+
122
+ def speak(text):
123
+ engine = pyttsx3.init()
124
+ engine.setProperty('rate', 90) # number of words per second
125
+ engine.setProperty('volume', 1) # min: 0, max: 1
126
+ engine.say(text)
127
+ engine.runAndWait()
128
+
129
+ #speak("Testing the Assistant's Speech Synthesizer")
130
+
131
+ def listen_microphone():
132
+ microphone = sr.Recognizer()
133
+ with sr.Microphone() as source:
134
+ microphone.adjust_for_ambient_noise(source, duration=0.8)
135
+ print('Listening: ')
136
+ audio = microphone.listen(source)
137
+ with open('recordings/speech.wav', 'wb') as f:
138
+ f.write(audio.get_wav_data())
139
+ try:
140
+ # https://pypi.org/project/SpeechRecognition/
141
+ sentence = microphone.recognize_google(audio, language='en-US')
142
+ print('You said: ' + sentence)
143
+ except sr.UnknownValueError:
144
+ sentence = ''
145
+ print('Not understood')
146
+ return sentence
147
+
148
+ #playsound('recordings/speech.wav')
149
+ #listen_microphone()
150
+
151
+ def test_models():
152
+ audio_source = '/Users/jonesgranatyr/Documents/Ensino/IA Expert/Cursos/Virtual assistent/virtual_assistant/recordings/speech.wav'
153
+ prediction = predict_sound(audio_source, loaded_model[2], plot = False)
154
+ return prediction
155
+
156
+ #print(test_models())
157
+
158
+ playing = False
159
+ mode_control = False
160
+ print('[INFO] Ready to start!')
161
+ playsound('n1.mp3')
162
+
163
+ while (1):
164
+ result = listen_microphone()
165
+
166
+ if my_name in result:
167
+ result = str(result.split(my_name + ' ')[1])
168
+ result = result.lower()
169
+ #print('The assistant has been activacted!')
170
+ #print('After processing: ', result)
171
+
172
+ if result in commands[0]:
173
+ playsound('n2.mp3')
174
+ speak('I will read my list of functionalities: ' + answers[0])
175
+
176
+ if result in commands[3]:
177
+ playsound('n2.mp3')
178
+ speak('It is now ' + datetime.datetime.now().strftime('%H:%M'))
179
+
180
+ if result in commands[4]:
181
+ playsound('n2.mp3')
182
+ speak('Today is ' + date[0] + ' of ' + date[1])
183
+
184
+ if result in commands[1]:
185
+ playsound('n2.mp3')
186
+ speak('Please, tell me the activity!')
187
+ result = listen_microphone()
188
+ annotation = open('annotation.txt', mode='a+', encoding='utf-8')
189
+ annotation.write(result + '\n')
190
+ annotation.close()
191
+ speak(''.join(random.sample(answers[1], k = 1)))
192
+ speak('Want me to read the notes?')
193
+ result = listen_microphone()
194
+ if result == 'yes' or result == 'sure':
195
+ with open('annotation.txt') as file_source:
196
+ lines = file_source.readlines()
197
+ for line in lines:
198
+ speak(line)
199
+ else:
200
+ speak('Ok!')
201
+
202
+ if result in commands[2]:
203
+ playsound('n2.mp3')
204
+ speak(''.join(random.sample(answers[2], k = 1)))
205
+ result = listen_microphone()
206
+ search(result)
207
+
208
+ if result in commands[6]:
209
+ playsound('n2.mp3')
210
+ if load_agenda.load_agenda():
211
+ speak('These are the events for today:')
212
+ for i in range(len(load_agenda.load_agenda()[1])):
213
+ speak(load_agenda.load_agenda()[1][i] + ' ' + load_agenda.load_agenda()[0][i] + ' schedule for ' + str(load_agenda.load_agenda()[2][i]))
214
+ else:
215
+ speak('There are not events for today considering the current time!')
216
+
217
+ if result in commands[5]:
218
+ mode_control = True
219
+ playsound('n1.mp3')
220
+ speak('Emotion analysis mode has been activacted!')
221
+
222
+ if mode_control:
223
+ analyse = test_models()
224
+ print(f'I heard {analyse} in your voice!')
225
+ if not playing:
226
+ playing = play_music_youtube(analyse[1])
227
+
228
+ if result == 'turn off':
229
+ playsound('n2.mp3')
230
+ speak(''.join(random.sample(answers[4], k = 1)))
231
+ break
232
+ else:
233
+ playsound('n3.mp3')
234
+
235
+
236
+
237
+
238
+
239
+
240
+
241
+
242
+
243
+
244
+
245
+
246
+
247
+
248
+
249
+
250
+
251
+
252
+