Nithya commited on
Commit
a16f46b
·
1 Parent(s): 85b0298

added more logging

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -108,10 +108,11 @@ def generate_audio(audio_model, f0s, invert_audio_fn, singers=[3], num_steps=100
108
 
109
  return audio
110
 
111
- @spaces.GPU(duration=120)
112
  def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None ):
113
 
114
  logging.log(logging.INFO, 'Generate function')
 
115
  pitch, inverted_pitch = generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100)
116
  if pitch_qt is not None:
117
  # if there is not pitch quantile transformer, undo the default quantile transformation that occurs
@@ -124,6 +125,7 @@ def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp',
124
  interpolated_pitch = p2a.interpolate_pitch(pitch=pitch, audio_seq_len=audio_seq_len) # interpolate pitch values to match the audio model's input size
125
  interpolated_pitch = torch.nan_to_num(interpolated_pitch, nan=196) # replace nan values with silent token
126
  interpolated_pitch = interpolated_pitch.squeeze(1) # to match input size by removing the extra dimension
 
127
  audio = generate_audio(audio_model, interpolated_pitch, invert_audio_fn, singers=singers, num_steps=100)
128
  audio = audio.detach().cpu().numpy()
129
  pitch = pitch.detach().cpu().numpy()
@@ -151,7 +153,7 @@ audio_model, audio_qt, audio_seq_len, invert_audio_fn = load_audio_fns(
151
  )
152
  partial_generate = partial(generate, num_samples=1, num_steps=100, singers=[3], outfolder=None, pitch_qt=pitch_qt) # generate function with default arguments
153
 
154
- @spaces.GPU(duration=120)
155
  def set_guide_and_generate(audio):
156
  global selected_prime, pitch_task_fn
157
 
@@ -168,6 +170,7 @@ def set_guide_and_generate(audio):
168
  audio = audio[-12*16000:] # consider only last 12 s
169
  _, f0, _ = extract_pitch(audio)
170
  mic_f0 = f0.copy() # save the user input pitch values
 
171
  f0 = pitch_task_fn(**{
172
  'inputs': {
173
  'pitch': {
@@ -182,6 +185,7 @@ def set_guide_and_generate(audio):
182
  # pdb.set_trace()
183
  f0 = f0.reshape(1, 1, -1)
184
  f0 = torch.tensor(f0).to(pitch_model.device).float()
 
185
  audio, pitch, _ = partial_generate(f0)
186
  mic_f0 = np.where(mic_f0 == 0, np.nan, mic_f0)
187
  # plot user input
@@ -203,7 +207,7 @@ with gr.Blocks() as demo:
203
 
204
  def main(argv):
205
 
206
- demo.launch(share=True)
207
 
208
  if __name__ == '__main__':
209
  main(sys.argv)
 
108
 
109
  return audio
110
 
111
+ @spaces.GPU(duration=180)
112
  def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None ):
113
 
114
  logging.log(logging.INFO, 'Generate function')
115
+ logging.log(logging.INFO, 'Generating pitch')
116
  pitch, inverted_pitch = generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100)
117
  if pitch_qt is not None:
118
  # if there is not pitch quantile transformer, undo the default quantile transformation that occurs
 
125
  interpolated_pitch = p2a.interpolate_pitch(pitch=pitch, audio_seq_len=audio_seq_len) # interpolate pitch values to match the audio model's input size
126
  interpolated_pitch = torch.nan_to_num(interpolated_pitch, nan=196) # replace nan values with silent token
127
  interpolated_pitch = interpolated_pitch.squeeze(1) # to match input size by removing the extra dimension
128
+ logging.log(logging.INFO, 'Generating audio')
129
  audio = generate_audio(audio_model, interpolated_pitch, invert_audio_fn, singers=singers, num_steps=100)
130
  audio = audio.detach().cpu().numpy()
131
  pitch = pitch.detach().cpu().numpy()
 
153
  )
154
  partial_generate = partial(generate, num_samples=1, num_steps=100, singers=[3], outfolder=None, pitch_qt=pitch_qt) # generate function with default arguments
155
 
156
+ @spaces.GPU(duration=180)
157
  def set_guide_and_generate(audio):
158
  global selected_prime, pitch_task_fn
159
 
 
170
  audio = audio[-12*16000:] # consider only last 12 s
171
  _, f0, _ = extract_pitch(audio)
172
  mic_f0 = f0.copy() # save the user input pitch values
173
+ logging.log(logging.INFO, 'Pitch extracted')
174
  f0 = pitch_task_fn(**{
175
  'inputs': {
176
  'pitch': {
 
185
  # pdb.set_trace()
186
  f0 = f0.reshape(1, 1, -1)
187
  f0 = torch.tensor(f0).to(pitch_model.device).float()
188
+ logging.log(logging.INFO, 'Calling generate function')
189
  audio, pitch, _ = partial_generate(f0)
190
  mic_f0 = np.where(mic_f0 == 0, np.nan, mic_f0)
191
  # plot user input
 
207
 
208
  def main(argv):
209
 
210
+ demo.launch()
211
 
212
  if __name__ == '__main__':
213
  main(sys.argv)