Kikirilkov commited on
Commit
b32773b
·
1 Parent(s): 64f5a64

Delete TTS/vocoder/models/deepmind_version.py

Browse files
TTS/vocoder/models/deepmind_version.py DELETED
@@ -1,170 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
- import torch.nn.functional as F
4
- from utils.display import *
5
- from utils.dsp import *
6
-
7
-
8
- class WaveRNN(nn.Module) :
9
- def __init__(self, hidden_size=896, quantisation=256) :
10
- super(WaveRNN, self).__init__()
11
-
12
- self.hidden_size = hidden_size
13
- self.split_size = hidden_size // 2
14
-
15
- # The main matmul
16
- self.R = nn.Linear(self.hidden_size, 3 * self.hidden_size, bias=False)
17
-
18
- # Output fc layers
19
- self.O1 = nn.Linear(self.split_size, self.split_size)
20
- self.O2 = nn.Linear(self.split_size, quantisation)
21
- self.O3 = nn.Linear(self.split_size, self.split_size)
22
- self.O4 = nn.Linear(self.split_size, quantisation)
23
-
24
- # Input fc layers
25
- self.I_coarse = nn.Linear(2, 3 * self.split_size, bias=False)
26
- self.I_fine = nn.Linear(3, 3 * self.split_size, bias=False)
27
-
28
- # biases for the gates
29
- self.bias_u = nn.Parameter(torch.zeros(self.hidden_size))
30
- self.bias_r = nn.Parameter(torch.zeros(self.hidden_size))
31
- self.bias_e = nn.Parameter(torch.zeros(self.hidden_size))
32
-
33
- # display num params
34
- self.num_params()
35
-
36
-
37
- def forward(self, prev_y, prev_hidden, current_coarse) :
38
-
39
- # Main matmul - the projection is split 3 ways
40
- R_hidden = self.R(prev_hidden)
41
- R_u, R_r, R_e, = torch.split(R_hidden, self.hidden_size, dim=1)
42
-
43
- # Project the prev input
44
- coarse_input_proj = self.I_coarse(prev_y)
45
- I_coarse_u, I_coarse_r, I_coarse_e = \
46
- torch.split(coarse_input_proj, self.split_size, dim=1)
47
-
48
- # Project the prev input and current coarse sample
49
- fine_input = torch.cat([prev_y, current_coarse], dim=1)
50
- fine_input_proj = self.I_fine(fine_input)
51
- I_fine_u, I_fine_r, I_fine_e = \
52
- torch.split(fine_input_proj, self.split_size, dim=1)
53
-
54
- # concatenate for the gates
55
- I_u = torch.cat([I_coarse_u, I_fine_u], dim=1)
56
- I_r = torch.cat([I_coarse_r, I_fine_r], dim=1)
57
- I_e = torch.cat([I_coarse_e, I_fine_e], dim=1)
58
-
59
- # Compute all gates for coarse and fine
60
- u = F.sigmoid(R_u + I_u + self.bias_u)
61
- r = F.sigmoid(R_r + I_r + self.bias_r)
62
- e = F.tanh(r * R_e + I_e + self.bias_e)
63
- hidden = u * prev_hidden + (1. - u) * e
64
-
65
- # Split the hidden state
66
- hidden_coarse, hidden_fine = torch.split(hidden, self.split_size, dim=1)
67
-
68
- # Compute outputs
69
- out_coarse = self.O2(F.relu(self.O1(hidden_coarse)))
70
- out_fine = self.O4(F.relu(self.O3(hidden_fine)))
71
-
72
- return out_coarse, out_fine, hidden
73
-
74
-
75
- def generate(self, seq_len):
76
- with torch.no_grad():
77
- # First split up the biases for the gates
78
- b_coarse_u, b_fine_u = torch.split(self.bias_u, self.split_size)
79
- b_coarse_r, b_fine_r = torch.split(self.bias_r, self.split_size)
80
- b_coarse_e, b_fine_e = torch.split(self.bias_e, self.split_size)
81
-
82
- # Lists for the two output seqs
83
- c_outputs, f_outputs = [], []
84
-
85
- # Some initial inputs
86
- out_coarse = torch.LongTensor([0]).cuda()
87
- out_fine = torch.LongTensor([0]).cuda()
88
-
89
- # We'll meed a hidden state
90
- hidden = self.init_hidden()
91
-
92
- # Need a clock for display
93
- start = time.time()
94
-
95
- # Loop for generation
96
- for i in range(seq_len) :
97
-
98
- # Split into two hidden states
99
- hidden_coarse, hidden_fine = \
100
- torch.split(hidden, self.split_size, dim=1)
101
-
102
- # Scale and concat previous predictions
103
- out_coarse = out_coarse.unsqueeze(0).float() / 127.5 - 1.
104
- out_fine = out_fine.unsqueeze(0).float() / 127.5 - 1.
105
- prev_outputs = torch.cat([out_coarse, out_fine], dim=1)
106
-
107
- # Project input
108
- coarse_input_proj = self.I_coarse(prev_outputs)
109
- I_coarse_u, I_coarse_r, I_coarse_e = \
110
- torch.split(coarse_input_proj, self.split_size, dim=1)
111
-
112
- # Project hidden state and split 6 ways
113
- R_hidden = self.R(hidden)
114
- R_coarse_u , R_fine_u, \
115
- R_coarse_r, R_fine_r, \
116
- R_coarse_e, R_fine_e = torch.split(R_hidden, self.split_size, dim=1)
117
-
118
- # Compute the coarse gates
119
- u = F.sigmoid(R_coarse_u + I_coarse_u + b_coarse_u)
120
- r = F.sigmoid(R_coarse_r + I_coarse_r + b_coarse_r)
121
- e = F.tanh(r * R_coarse_e + I_coarse_e + b_coarse_e)
122
- hidden_coarse = u * hidden_coarse + (1. - u) * e
123
-
124
- # Compute the coarse output
125
- out_coarse = self.O2(F.relu(self.O1(hidden_coarse)))
126
- posterior = F.softmax(out_coarse, dim=1)
127
- distrib = torch.distributions.Categorical(posterior)
128
- out_coarse = distrib.sample()
129
- c_outputs.append(out_coarse)
130
-
131
- # Project the [prev outputs and predicted coarse sample]
132
- coarse_pred = out_coarse.float() / 127.5 - 1.
133
- fine_input = torch.cat([prev_outputs, coarse_pred.unsqueeze(0)], dim=1)
134
- fine_input_proj = self.I_fine(fine_input)
135
- I_fine_u, I_fine_r, I_fine_e = \
136
- torch.split(fine_input_proj, self.split_size, dim=1)
137
-
138
- # Compute the fine gates
139
- u = F.sigmoid(R_fine_u + I_fine_u + b_fine_u)
140
- r = F.sigmoid(R_fine_r + I_fine_r + b_fine_r)
141
- e = F.tanh(r * R_fine_e + I_fine_e + b_fine_e)
142
- hidden_fine = u * hidden_fine + (1. - u) * e
143
-
144
- # Compute the fine output
145
- out_fine = self.O4(F.relu(self.O3(hidden_fine)))
146
- posterior = F.softmax(out_fine, dim=1)
147
- distrib = torch.distributions.Categorical(posterior)
148
- out_fine = distrib.sample()
149
- f_outputs.append(out_fine)
150
-
151
- # Put the hidden state back together
152
- hidden = torch.cat([hidden_coarse, hidden_fine], dim=1)
153
-
154
- # Display progress
155
- speed = (i + 1) / (time.time() - start)
156
- stream('Gen: %i/%i -- Speed: %i', (i + 1, seq_len, speed))
157
-
158
- coarse = torch.stack(c_outputs).squeeze(1).cpu().data.numpy()
159
- fine = torch.stack(f_outputs).squeeze(1).cpu().data.numpy()
160
- output = combine_signal(coarse, fine)
161
-
162
- return output, coarse, fine
163
-
164
- def init_hidden(self, batch_size=1) :
165
- return torch.zeros(batch_size, self.hidden_size).cuda()
166
-
167
- def num_params(self) :
168
- parameters = filter(lambda p: p.requires_grad, self.parameters())
169
- parameters = sum([np.prod(p.size()) for p in parameters]) / 1_000_000
170
- print('Trainable Parameters: %.3f million' % parameters)