Adal Abilbekov commited on
Commit
c9f5661
·
1 Parent(s): 580eae2

Adding CNN model

Browse files
Files changed (5) hide show
  1. __pycache__/model.cpython-39.pyc +0 -0
  2. app.py +44 -3
  3. cnn_class_17.pt +3 -0
  4. model.py +67 -0
  5. try.ipynb +0 -0
__pycache__/model.cpython-39.pyc ADDED
Binary file (1.65 kB). View file
 
app.py CHANGED
@@ -1,7 +1,48 @@
1
  import gradio as gr
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  iface.launch()
 
1
  import gradio as gr
2
+ import torch
3
+ # from lr_ed.model import CNNEmotinoalClassifier
4
+ import torchaudio
5
+ import IPython.display as ipd
6
+ from torch import nn
7
+ from model import CNNEmotinoalClassifier
8
 
9
+ model = CNNEmotinoalClassifier()
10
+ model.load_state_dict(torch.load('./cnn_class_17.pt'))
11
+ model.eval()
12
 
13
+ to_melspec = torchaudio.transforms.MelSpectrogram(
14
+ sample_rate= 22050,
15
+ n_fft = 1024,
16
+ hop_length = 512,
17
+ n_mels=64
18
+ )
19
+
20
+ def _get_right_pad(target_waveform, waveform):
21
+ target_waveform = target_waveform
22
+ waveform_samples_number = waveform.shape[1]
23
+ if waveform_samples_number < target_waveform:
24
+ right_pad = target_waveform - waveform_samples_number
25
+ padding_touple = (0, right_pad)
26
+ waveform_padded = nn.functional.pad(waveform, padding_touple)
27
+ else:
28
+ waveform_padded = waveform
29
+ return waveform_padded
30
+
31
+ def get_probs(audio):
32
+ emotions = ['happy', 'angry', 'sad', 'neutral', 'surprised', 'fear']
33
+ emotions = sorted(emotions)
34
+
35
+ sr, waveform = audio
36
+ waveform = _get_right_pad(400384, waveform)
37
+ input_x = to_melspec(waveform)
38
+ input_x = torch.unsqueeze(input_x, dim=1)
39
+
40
+ probs = model(input_x)
41
+ prediction = emotions[probs.argmax(dim=1).item()]
42
+ return dict(zip(emotions, list(map(float, probs[0]))))
43
+
44
+ mic = gr.Audio(source="microphone", type="numpy", label="Speak here...")
45
+ label = gr.Label()
46
+
47
+ iface = gr.Interface(fn=get_probs, inputs=mic, outputs=label)
48
  iface.launch()
cnn_class_17.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:776cb5ff06d6d2ddacef3ee8fdb61c67f241f82777c3f8a830d7c338256f174e
3
+ size 16823379
model.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ import numpy as np
4
+ from torch import nn
5
+ from torchsummary import summary
6
+
7
+ class CNNEmotinoalClassifier(nn.Module):
8
+ def __init__(self):
9
+ super(CNNEmotinoalClassifier, self).__init__()
10
+
11
+ # conv : 4, flatten, linear, softmax
12
+
13
+ self.conv1 = nn.Sequential(
14
+ nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1),
15
+ nn.ReLU(),
16
+ nn.MaxPool2d(kernel_size=2)
17
+ )
18
+
19
+ self.conv2 = nn.Sequential(
20
+ nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
21
+ nn.ReLU(),
22
+ nn.MaxPool2d(kernel_size=2)
23
+ )
24
+
25
+ self.conv3 = nn.Sequential(
26
+ nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=2),
27
+ nn.ReLU(),
28
+ nn.MaxPool2d(kernel_size=2)
29
+ )
30
+
31
+ self.conv4 = nn.Sequential(
32
+ nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=2),
33
+ nn.ReLU(),
34
+ nn.MaxPool2d(kernel_size=2)
35
+ )
36
+
37
+ self.flatten = nn.Flatten()
38
+
39
+ self.fully_connected = nn.Sequential(
40
+ nn.Linear(128 * 5 * 50, 128),
41
+ nn.ReLU(),
42
+ nn.Linear(128, 64),
43
+ nn.ReLU(),
44
+ nn.Linear(64, 32),
45
+ nn.ReLU(),
46
+ nn.Linear(32, 16),
47
+ nn.ReLU(),
48
+ nn.Linear(16, 6)
49
+ )
50
+
51
+ self.softmax = nn.Softmax(dim=1)
52
+
53
+ def forward(self, x):
54
+ x = self.conv1(x)
55
+ x = self.conv2(x)
56
+ x = self.conv3(x)
57
+ x = self.conv4(x)
58
+ x = self.flatten(x)
59
+ logits = self.fully_connected(x)
60
+ probs = self.softmax(logits)
61
+ return probs
62
+
63
+
64
+ if __name__ == '__main__':
65
+ device = ('cuda' if torch.cuda.is_available() else 'cpu')
66
+ model = CNNEmotinoalClassifier().to(device)
67
+ summary(model, (1, 64, 783))
try.ipynb ADDED
The diff for this file is too large to render. See raw diff