Nithya commited on
Commit
0821a2f
·
1 Parent(s): ad4d894

added reinterp slider

Browse files
Files changed (2) hide show
  1. app.py +19 -11
  2. requirements.txt +1 -1
app.py CHANGED
@@ -100,12 +100,12 @@ def extract_pitch(audio, unvoice=True, sr=16000, frame_shift_ms=10, log=True):
100
 
101
  return time, f0, confidence
102
 
103
- def generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples, num_steps, noise_std=0.4):
104
  '''Generate pitch values for the melodic reinterpretation task'''
105
  # hardcoding the amount of noise to be added
106
- noisy_pitch = torch.Tensor(pitch[:, :, -1200:]).to(pitch_model.device) + (torch.normal(mean=0.0, std=noise_std*torch.ones((1200)))).to(pitch_model.device)
107
- noisy_pitch = torch.clamp(noisy_pitch, -5.19, 5.19) # clipping the pitch values to be within the range of the model
108
- samples = pitch_model.sample_sdedit(noisy_pitch, num_samples, num_steps)
109
  inverted_pitches = invert_pitch_fn(f0=samples.detach().cpu().numpy()[0]).flatten() # pitch values in Hz
110
 
111
  return samples, inverted_pitches
@@ -127,7 +127,7 @@ def generate_audio(audio_model, f0s, invert_audio_fn, singers=[3], num_steps=100
127
  return audio
128
 
129
  @spaces.GPU(duration=30)
130
- def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None, type='response', invert_pitch_fn=None):
131
  global pitch_model, audio_model
132
  # move the models to device
133
  pitch_model = pitch_model.to(device)
@@ -140,7 +140,7 @@ def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp',
140
  if type == 'response':
141
  pitch, inverted_pitch = generate_pitch_response(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100)
142
  elif type == 'reinterp':
143
- pitch, inverted_pitch = generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100)
144
 
145
  else:
146
  raise ValueError(f'Invalid type: {type}')
@@ -189,7 +189,7 @@ def load_pitch_model(model_selection):
189
  return pitch_model, pitch_qt, pitch_task_fn, invert_pitch_fn
190
 
191
  @debug_profile
192
- def container_generate(model_selection, task_selection, audio, singer_id):
193
  global pitch_model, pitch_qt, pitch_task_fn, invert_pitch_fn, model_loaded
194
  # load pitch model
195
  if model_loaded is None or model_loaded != model_selection:
@@ -241,7 +241,7 @@ def container_generate(model_selection, task_selection, audio, singer_id):
241
  if task_selection == 'Call and Response':
242
  partial_generate = partial(generate, num_samples=1, num_steps=100, singers=singer, outfolder=None, pitch_qt=pitch_qt, type='response', invert_pitch_fn=invert_pitch_fn)
243
  else:
244
- partial_generate = partial(generate, num_samples=1, num_steps=100, singers=singer, outfolder=None, pitch_qt=pitch_qt, type='reinterp', invert_pitch_fn=invert_pitch_fn)
245
  audio, output_plot = partial_generate(f0)
246
  return audio, user_input_plot, output_plot
247
 
@@ -254,6 +254,13 @@ css = """
254
  }
255
  """
256
 
 
 
 
 
 
 
 
257
  with gr.Blocks(css=css) as demo:
258
  gr.Markdown("# GaMaDHaNi: Hierarchical Generative Modeling of Melodic Vocal Contours in Hindustani Classical Music", elem_classes="center-text")
259
  gr.Markdown("### Abstract", elem_classes="center-text")
@@ -276,7 +283,7 @@ with gr.Blocks(css=css) as demo:
276
  This is still a work in progress, so please feel free to share any weird or interesting examples, we would love to hear them! Contact us at snnithya[at]mit[dot]edu.
277
  """)
278
  gr.Markdown("""
279
- *Note: If you see an error message on the screen after clicking 'Submit', please wait for five seconds and click 'Submit' again.*
280
  """)
281
  gr.Markdown("""
282
  *Another note: The model may take around 20-30s to generate an output. Hang tight! But if you're left hanging for too long, let me know!*
@@ -286,8 +293,9 @@ with gr.Blocks(css=css) as demo:
286
  """)
287
  model_dropdown = gr.Dropdown(["Diffusion Pitch Generator"], label="Select a model type")
288
  task_dropdown = gr.Dropdown(label="Select a task", choices=["Call and Response", "Melodic Reinterpretation"])
 
 
289
  singer_dropdown = gr.Dropdown(label="Select a singer", choices=["Singer 1", "Singer 2"])
290
-
291
  with gr.Row(equal_height=True):
292
  with gr.Column():
293
  audio = gr.Audio(label="Input", show_download_button=True)
@@ -311,7 +319,7 @@ with gr.Blocks(css=css) as demo:
311
  with gr.Accordion("View Pitch Plot"):
312
  generated_pitch = gr.Plot(label="Generated Pitch")
313
  sbmt = gr.Button()
314
- sbmt.click(container_generate, inputs=[model_dropdown, task_dropdown, audio, singer_dropdown], outputs=[generated_audio, user_input, generated_pitch])
315
 
316
  def main(argv):
317
 
 
100
 
101
  return time, f0, confidence
102
 
103
+ def generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples, num_steps, noise_std=0.4, t0=0.5):
104
  '''Generate pitch values for the melodic reinterpretation task'''
105
  # hardcoding the amount of noise to be added
106
+ # noisy_pitch = torch.Tensor(pitch[:, :, -1200:]).to(pitch_model.device) + (torch.normal(mean=0.0, std=noise_std*torch.ones((1200)))).to(pitch_model.device)
107
+ # noisy_pitch = torch.clamp(noisy_pitch, -5.19, 5.19) # clipping the pitch values to be within the range of the model
108
+ samples = pitch_model.sample_sdedit(pitch[:, :, -1200:].to(pitch_model.device), num_samples, num_steps, t0=t0)
109
  inverted_pitches = invert_pitch_fn(f0=samples.detach().cpu().numpy()[0]).flatten() # pitch values in Hz
110
 
111
  return samples, inverted_pitches
 
127
  return audio
128
 
129
  @spaces.GPU(duration=30)
130
+ def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None, type='response', invert_pitch_fn=None, t0=0.5):
131
  global pitch_model, audio_model
132
  # move the models to device
133
  pitch_model = pitch_model.to(device)
 
140
  if type == 'response':
141
  pitch, inverted_pitch = generate_pitch_response(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100)
142
  elif type == 'reinterp':
143
+ pitch, inverted_pitch = generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100, t0=t0)
144
 
145
  else:
146
  raise ValueError(f'Invalid type: {type}')
 
189
  return pitch_model, pitch_qt, pitch_task_fn, invert_pitch_fn
190
 
191
  @debug_profile
192
+ def container_generate(model_selection, task_selection, audio, singer_id, t0):
193
  global pitch_model, pitch_qt, pitch_task_fn, invert_pitch_fn, model_loaded
194
  # load pitch model
195
  if model_loaded is None or model_loaded != model_selection:
 
241
  if task_selection == 'Call and Response':
242
  partial_generate = partial(generate, num_samples=1, num_steps=100, singers=singer, outfolder=None, pitch_qt=pitch_qt, type='response', invert_pitch_fn=invert_pitch_fn)
243
  else:
244
+ partial_generate = partial(generate, num_samples=1, num_steps=100, singers=singer, outfolder=None, pitch_qt=pitch_qt, type='reinterp', invert_pitch_fn=invert_pitch_fn, t0=t0)
245
  audio, output_plot = partial_generate(f0)
246
  return audio, user_input_plot, output_plot
247
 
 
254
  }
255
  """
256
 
257
+ def toggle_visibility(selection):
258
+ # Show element if selection is "Show", otherwise hide it
259
+ if selection == "Melodic Reinterpretation":
260
+ return gr.update(visible=True)
261
+ else:
262
+ return gr.update(visible=False)
263
+
264
  with gr.Blocks(css=css) as demo:
265
  gr.Markdown("# GaMaDHaNi: Hierarchical Generative Modeling of Melodic Vocal Contours in Hindustani Classical Music", elem_classes="center-text")
266
  gr.Markdown("### Abstract", elem_classes="center-text")
 
283
  This is still a work in progress, so please feel free to share any weird or interesting examples, we would love to hear them! Contact us at snnithya[at]mit[dot]edu.
284
  """)
285
  gr.Markdown("""
286
+ *Note: If you see an error message on the screen after clicking 'Run', please wait for five seconds and click 'Run' again.*
287
  """)
288
  gr.Markdown("""
289
  *Another note: The model may take around 20-30s to generate an output. Hang tight! But if you're left hanging for too long, let me know!*
 
293
  """)
294
  model_dropdown = gr.Dropdown(["Diffusion Pitch Generator"], label="Select a model type")
295
  task_dropdown = gr.Dropdown(label="Select a task", choices=["Call and Response", "Melodic Reinterpretation"])
296
+ t0 = gr.Slider(label="Faithfulness to the input (For melodic reinterpretation task only)", minimum=0.0, maximum=1.0, step=0.01, value=0.3, visible=False)
297
+ task_dropdown.change(toggle_visibility, inputs=task_dropdown, outputs=t0)
298
  singer_dropdown = gr.Dropdown(label="Select a singer", choices=["Singer 1", "Singer 2"])
 
299
  with gr.Row(equal_height=True):
300
  with gr.Column():
301
  audio = gr.Audio(label="Input", show_download_button=True)
 
319
  with gr.Accordion("View Pitch Plot"):
320
  generated_pitch = gr.Plot(label="Generated Pitch")
321
  sbmt = gr.Button()
322
+ sbmt.click(container_generate, inputs=[model_dropdown, task_dropdown, audio, singer_dropdown, t0], outputs=[generated_audio, user_input, generated_pitch])
323
 
324
  def main(argv):
325
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  crepe==0.0.15
2
  hmmlearn==0.3.2
3
  tensorflow==2.17.0
4
- GaMaDHaNi @ git+https://github.com/snnithya/GaMaDHaNi.git@37788f4c900fc425cd193052784e88afbfdd19e2
 
1
  crepe==0.0.15
2
  hmmlearn==0.3.2
3
  tensorflow==2.17.0
4
+ GaMaDHaNi @ git+https://github.com/snnithya/GaMaDHaNi.git@c59e24e28ef7b80d43f56b39d3a9dd0563e01df6