Spaces:
Sleeping
Sleeping
Nithya
commited on
Commit
·
a16f46b
1
Parent(s):
85b0298
added more logging
Browse files
app.py
CHANGED
@@ -108,10 +108,11 @@ def generate_audio(audio_model, f0s, invert_audio_fn, singers=[3], num_steps=100
|
|
108 |
|
109 |
return audio
|
110 |
|
111 |
-
@spaces.GPU(duration=
|
112 |
def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None ):
|
113 |
|
114 |
logging.log(logging.INFO, 'Generate function')
|
|
|
115 |
pitch, inverted_pitch = generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100)
|
116 |
if pitch_qt is not None:
|
117 |
# if there is not pitch quantile transformer, undo the default quantile transformation that occurs
|
@@ -124,6 +125,7 @@ def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp',
|
|
124 |
interpolated_pitch = p2a.interpolate_pitch(pitch=pitch, audio_seq_len=audio_seq_len) # interpolate pitch values to match the audio model's input size
|
125 |
interpolated_pitch = torch.nan_to_num(interpolated_pitch, nan=196) # replace nan values with silent token
|
126 |
interpolated_pitch = interpolated_pitch.squeeze(1) # to match input size by removing the extra dimension
|
|
|
127 |
audio = generate_audio(audio_model, interpolated_pitch, invert_audio_fn, singers=singers, num_steps=100)
|
128 |
audio = audio.detach().cpu().numpy()
|
129 |
pitch = pitch.detach().cpu().numpy()
|
@@ -151,7 +153,7 @@ audio_model, audio_qt, audio_seq_len, invert_audio_fn = load_audio_fns(
|
|
151 |
)
|
152 |
partial_generate = partial(generate, num_samples=1, num_steps=100, singers=[3], outfolder=None, pitch_qt=pitch_qt) # generate function with default arguments
|
153 |
|
154 |
-
@spaces.GPU(duration=
|
155 |
def set_guide_and_generate(audio):
|
156 |
global selected_prime, pitch_task_fn
|
157 |
|
@@ -168,6 +170,7 @@ def set_guide_and_generate(audio):
|
|
168 |
audio = audio[-12*16000:] # consider only last 12 s
|
169 |
_, f0, _ = extract_pitch(audio)
|
170 |
mic_f0 = f0.copy() # save the user input pitch values
|
|
|
171 |
f0 = pitch_task_fn(**{
|
172 |
'inputs': {
|
173 |
'pitch': {
|
@@ -182,6 +185,7 @@ def set_guide_and_generate(audio):
|
|
182 |
# pdb.set_trace()
|
183 |
f0 = f0.reshape(1, 1, -1)
|
184 |
f0 = torch.tensor(f0).to(pitch_model.device).float()
|
|
|
185 |
audio, pitch, _ = partial_generate(f0)
|
186 |
mic_f0 = np.where(mic_f0 == 0, np.nan, mic_f0)
|
187 |
# plot user input
|
@@ -203,7 +207,7 @@ with gr.Blocks() as demo:
|
|
203 |
|
204 |
def main(argv):
|
205 |
|
206 |
-
demo.launch(
|
207 |
|
208 |
if __name__ == '__main__':
|
209 |
main(sys.argv)
|
|
|
108 |
|
109 |
return audio
|
110 |
|
111 |
+
@spaces.GPU(duration=180)
|
112 |
def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None ):
|
113 |
|
114 |
logging.log(logging.INFO, 'Generate function')
|
115 |
+
logging.log(logging.INFO, 'Generating pitch')
|
116 |
pitch, inverted_pitch = generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100)
|
117 |
if pitch_qt is not None:
|
118 |
# if there is not pitch quantile transformer, undo the default quantile transformation that occurs
|
|
|
125 |
interpolated_pitch = p2a.interpolate_pitch(pitch=pitch, audio_seq_len=audio_seq_len) # interpolate pitch values to match the audio model's input size
|
126 |
interpolated_pitch = torch.nan_to_num(interpolated_pitch, nan=196) # replace nan values with silent token
|
127 |
interpolated_pitch = interpolated_pitch.squeeze(1) # to match input size by removing the extra dimension
|
128 |
+
logging.log(logging.INFO, 'Generating audio')
|
129 |
audio = generate_audio(audio_model, interpolated_pitch, invert_audio_fn, singers=singers, num_steps=100)
|
130 |
audio = audio.detach().cpu().numpy()
|
131 |
pitch = pitch.detach().cpu().numpy()
|
|
|
153 |
)
|
154 |
partial_generate = partial(generate, num_samples=1, num_steps=100, singers=[3], outfolder=None, pitch_qt=pitch_qt) # generate function with default arguments
|
155 |
|
156 |
+
@spaces.GPU(duration=180)
|
157 |
def set_guide_and_generate(audio):
|
158 |
global selected_prime, pitch_task_fn
|
159 |
|
|
|
170 |
audio = audio[-12*16000:] # consider only last 12 s
|
171 |
_, f0, _ = extract_pitch(audio)
|
172 |
mic_f0 = f0.copy() # save the user input pitch values
|
173 |
+
logging.log(logging.INFO, 'Pitch extracted')
|
174 |
f0 = pitch_task_fn(**{
|
175 |
'inputs': {
|
176 |
'pitch': {
|
|
|
185 |
# pdb.set_trace()
|
186 |
f0 = f0.reshape(1, 1, -1)
|
187 |
f0 = torch.tensor(f0).to(pitch_model.device).float()
|
188 |
+
logging.log(logging.INFO, 'Calling generate function')
|
189 |
audio, pitch, _ = partial_generate(f0)
|
190 |
mic_f0 = np.where(mic_f0 == 0, np.nan, mic_f0)
|
191 |
# plot user input
|
|
|
207 |
|
208 |
def main(argv):
|
209 |
|
210 |
+
demo.launch()
|
211 |
|
212 |
if __name__ == '__main__':
|
213 |
main(sys.argv)
|