Spaces:
Running
on
Zero
Running
on
Zero
Nithya
commited on
Commit
·
0821a2f
1
Parent(s):
ad4d894
added reinterp slider
Browse files- app.py +19 -11
- requirements.txt +1 -1
app.py
CHANGED
@@ -100,12 +100,12 @@ def extract_pitch(audio, unvoice=True, sr=16000, frame_shift_ms=10, log=True):
|
|
100 |
|
101 |
return time, f0, confidence
|
102 |
|
103 |
-
def generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples, num_steps, noise_std=0.4):
|
104 |
'''Generate pitch values for the melodic reinterpretation task'''
|
105 |
# hardcoding the amount of noise to be added
|
106 |
-
noisy_pitch = torch.Tensor(pitch[:, :, -1200:]).to(pitch_model.device) + (torch.normal(mean=0.0, std=noise_std*torch.ones((1200)))).to(pitch_model.device)
|
107 |
-
noisy_pitch = torch.clamp(noisy_pitch, -5.19, 5.19) # clipping the pitch values to be within the range of the model
|
108 |
-
samples = pitch_model.sample_sdedit(
|
109 |
inverted_pitches = invert_pitch_fn(f0=samples.detach().cpu().numpy()[0]).flatten() # pitch values in Hz
|
110 |
|
111 |
return samples, inverted_pitches
|
@@ -127,7 +127,7 @@ def generate_audio(audio_model, f0s, invert_audio_fn, singers=[3], num_steps=100
|
|
127 |
return audio
|
128 |
|
129 |
@spaces.GPU(duration=30)
|
130 |
-
def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None, type='response', invert_pitch_fn=None):
|
131 |
global pitch_model, audio_model
|
132 |
# move the models to device
|
133 |
pitch_model = pitch_model.to(device)
|
@@ -140,7 +140,7 @@ def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp',
|
|
140 |
if type == 'response':
|
141 |
pitch, inverted_pitch = generate_pitch_response(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100)
|
142 |
elif type == 'reinterp':
|
143 |
-
pitch, inverted_pitch = generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100)
|
144 |
|
145 |
else:
|
146 |
raise ValueError(f'Invalid type: {type}')
|
@@ -189,7 +189,7 @@ def load_pitch_model(model_selection):
|
|
189 |
return pitch_model, pitch_qt, pitch_task_fn, invert_pitch_fn
|
190 |
|
191 |
@debug_profile
|
192 |
-
def container_generate(model_selection, task_selection, audio, singer_id):
|
193 |
global pitch_model, pitch_qt, pitch_task_fn, invert_pitch_fn, model_loaded
|
194 |
# load pitch model
|
195 |
if model_loaded is None or model_loaded != model_selection:
|
@@ -241,7 +241,7 @@ def container_generate(model_selection, task_selection, audio, singer_id):
|
|
241 |
if task_selection == 'Call and Response':
|
242 |
partial_generate = partial(generate, num_samples=1, num_steps=100, singers=singer, outfolder=None, pitch_qt=pitch_qt, type='response', invert_pitch_fn=invert_pitch_fn)
|
243 |
else:
|
244 |
-
partial_generate = partial(generate, num_samples=1, num_steps=100, singers=singer, outfolder=None, pitch_qt=pitch_qt, type='reinterp', invert_pitch_fn=invert_pitch_fn)
|
245 |
audio, output_plot = partial_generate(f0)
|
246 |
return audio, user_input_plot, output_plot
|
247 |
|
@@ -254,6 +254,13 @@ css = """
|
|
254 |
}
|
255 |
"""
|
256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
with gr.Blocks(css=css) as demo:
|
258 |
gr.Markdown("# GaMaDHaNi: Hierarchical Generative Modeling of Melodic Vocal Contours in Hindustani Classical Music", elem_classes="center-text")
|
259 |
gr.Markdown("### Abstract", elem_classes="center-text")
|
@@ -276,7 +283,7 @@ with gr.Blocks(css=css) as demo:
|
|
276 |
This is still a work in progress, so please feel free to share any weird or interesting examples, we would love to hear them! Contact us at snnithya[at]mit[dot]edu.
|
277 |
""")
|
278 |
gr.Markdown("""
|
279 |
-
*Note: If you see an error message on the screen after clicking '
|
280 |
""")
|
281 |
gr.Markdown("""
|
282 |
*Another note: The model may take around 20-30s to generate an output. Hang tight! But if you're left hanging for too long, let me know!*
|
@@ -286,8 +293,9 @@ with gr.Blocks(css=css) as demo:
|
|
286 |
""")
|
287 |
model_dropdown = gr.Dropdown(["Diffusion Pitch Generator"], label="Select a model type")
|
288 |
task_dropdown = gr.Dropdown(label="Select a task", choices=["Call and Response", "Melodic Reinterpretation"])
|
|
|
|
|
289 |
singer_dropdown = gr.Dropdown(label="Select a singer", choices=["Singer 1", "Singer 2"])
|
290 |
-
|
291 |
with gr.Row(equal_height=True):
|
292 |
with gr.Column():
|
293 |
audio = gr.Audio(label="Input", show_download_button=True)
|
@@ -311,7 +319,7 @@ with gr.Blocks(css=css) as demo:
|
|
311 |
with gr.Accordion("View Pitch Plot"):
|
312 |
generated_pitch = gr.Plot(label="Generated Pitch")
|
313 |
sbmt = gr.Button()
|
314 |
-
sbmt.click(container_generate, inputs=[model_dropdown, task_dropdown, audio, singer_dropdown], outputs=[generated_audio, user_input, generated_pitch])
|
315 |
|
316 |
def main(argv):
|
317 |
|
|
|
100 |
|
101 |
return time, f0, confidence
|
102 |
|
103 |
+
def generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples, num_steps, noise_std=0.4, t0=0.5):
|
104 |
'''Generate pitch values for the melodic reinterpretation task'''
|
105 |
# hardcoding the amount of noise to be added
|
106 |
+
# noisy_pitch = torch.Tensor(pitch[:, :, -1200:]).to(pitch_model.device) + (torch.normal(mean=0.0, std=noise_std*torch.ones((1200)))).to(pitch_model.device)
|
107 |
+
# noisy_pitch = torch.clamp(noisy_pitch, -5.19, 5.19) # clipping the pitch values to be within the range of the model
|
108 |
+
samples = pitch_model.sample_sdedit(pitch[:, :, -1200:].to(pitch_model.device), num_samples, num_steps, t0=t0)
|
109 |
inverted_pitches = invert_pitch_fn(f0=samples.detach().cpu().numpy()[0]).flatten() # pitch values in Hz
|
110 |
|
111 |
return samples, inverted_pitches
|
|
|
127 |
return audio
|
128 |
|
129 |
@spaces.GPU(duration=30)
|
130 |
+
def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None, type='response', invert_pitch_fn=None, t0=0.5):
|
131 |
global pitch_model, audio_model
|
132 |
# move the models to device
|
133 |
pitch_model = pitch_model.to(device)
|
|
|
140 |
if type == 'response':
|
141 |
pitch, inverted_pitch = generate_pitch_response(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100)
|
142 |
elif type == 'reinterp':
|
143 |
+
pitch, inverted_pitch = generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100, t0=t0)
|
144 |
|
145 |
else:
|
146 |
raise ValueError(f'Invalid type: {type}')
|
|
|
189 |
return pitch_model, pitch_qt, pitch_task_fn, invert_pitch_fn
|
190 |
|
191 |
@debug_profile
|
192 |
+
def container_generate(model_selection, task_selection, audio, singer_id, t0):
|
193 |
global pitch_model, pitch_qt, pitch_task_fn, invert_pitch_fn, model_loaded
|
194 |
# load pitch model
|
195 |
if model_loaded is None or model_loaded != model_selection:
|
|
|
241 |
if task_selection == 'Call and Response':
|
242 |
partial_generate = partial(generate, num_samples=1, num_steps=100, singers=singer, outfolder=None, pitch_qt=pitch_qt, type='response', invert_pitch_fn=invert_pitch_fn)
|
243 |
else:
|
244 |
+
partial_generate = partial(generate, num_samples=1, num_steps=100, singers=singer, outfolder=None, pitch_qt=pitch_qt, type='reinterp', invert_pitch_fn=invert_pitch_fn, t0=t0)
|
245 |
audio, output_plot = partial_generate(f0)
|
246 |
return audio, user_input_plot, output_plot
|
247 |
|
|
|
254 |
}
|
255 |
"""
|
256 |
|
257 |
+
def toggle_visibility(selection):
|
258 |
+
# Show element if selection is "Show", otherwise hide it
|
259 |
+
if selection == "Melodic Reinterpretation":
|
260 |
+
return gr.update(visible=True)
|
261 |
+
else:
|
262 |
+
return gr.update(visible=False)
|
263 |
+
|
264 |
with gr.Blocks(css=css) as demo:
|
265 |
gr.Markdown("# GaMaDHaNi: Hierarchical Generative Modeling of Melodic Vocal Contours in Hindustani Classical Music", elem_classes="center-text")
|
266 |
gr.Markdown("### Abstract", elem_classes="center-text")
|
|
|
283 |
This is still a work in progress, so please feel free to share any weird or interesting examples, we would love to hear them! Contact us at snnithya[at]mit[dot]edu.
|
284 |
""")
|
285 |
gr.Markdown("""
|
286 |
+
*Note: If you see an error message on the screen after clicking 'Run', please wait for five seconds and click 'Run' again.*
|
287 |
""")
|
288 |
gr.Markdown("""
|
289 |
*Another note: The model may take around 20-30s to generate an output. Hang tight! But if you're left hanging for too long, let me know!*
|
|
|
293 |
""")
|
294 |
model_dropdown = gr.Dropdown(["Diffusion Pitch Generator"], label="Select a model type")
|
295 |
task_dropdown = gr.Dropdown(label="Select a task", choices=["Call and Response", "Melodic Reinterpretation"])
|
296 |
+
t0 = gr.Slider(label="Faithfulness to the input (For melodic reinterpretation task only)", minimum=0.0, maximum=1.0, step=0.01, value=0.3, visible=False)
|
297 |
+
task_dropdown.change(toggle_visibility, inputs=task_dropdown, outputs=t0)
|
298 |
singer_dropdown = gr.Dropdown(label="Select a singer", choices=["Singer 1", "Singer 2"])
|
|
|
299 |
with gr.Row(equal_height=True):
|
300 |
with gr.Column():
|
301 |
audio = gr.Audio(label="Input", show_download_button=True)
|
|
|
319 |
with gr.Accordion("View Pitch Plot"):
|
320 |
generated_pitch = gr.Plot(label="Generated Pitch")
|
321 |
sbmt = gr.Button()
|
322 |
+
sbmt.click(container_generate, inputs=[model_dropdown, task_dropdown, audio, singer_dropdown, t0], outputs=[generated_audio, user_input, generated_pitch])
|
323 |
|
324 |
def main(argv):
|
325 |
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
crepe==0.0.15
|
2 |
hmmlearn==0.3.2
|
3 |
tensorflow==2.17.0
|
4 |
-
GaMaDHaNi @ git+https://github.com/snnithya/GaMaDHaNi.git@
|
|
|
1 |
crepe==0.0.15
|
2 |
hmmlearn==0.3.2
|
3 |
tensorflow==2.17.0
|
4 |
+
GaMaDHaNi @ git+https://github.com/snnithya/GaMaDHaNi.git@c59e24e28ef7b80d43f56b39d3a9dd0563e01df6
|