Spaces:

snnithya
/

GaMaDHaNi

Running on Zero

App Files Files Community

Nithya commited on Nov 4, 2024

Commit

0821a2f

1 Parent(s): ad4d894

added reinterp slider

Browse files

Files changed (2) hide show

app.py +19 -11
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -100,12 +100,12 @@ def extract_pitch(audio, unvoice=True, sr=16000, frame_shift_ms=10, log=True):
     return time, f0, confidence
-def generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples, num_steps, noise_std=0.4):
     '''Generate pitch values for the melodic reinterpretation task'''
     # hardcoding the amount of noise to be added
-    noisy_pitch = torch.Tensor(pitch[:, :, -1200:]).to(pitch_model.device) + (torch.normal(mean=0.0, std=noise_std*torch.ones((1200)))).to(pitch_model.device)
-    noisy_pitch = torch.clamp(noisy_pitch, -5.19, 5.19)     # clipping the pitch values to be within the range of the model
-    samples = pitch_model.sample_sdedit(noisy_pitch, num_samples, num_steps)
     inverted_pitches = invert_pitch_fn(f0=samples.detach().cpu().numpy()[0]).flatten()   # pitch values in Hz
     return samples, inverted_pitches
@@ -127,7 +127,7 @@ def generate_audio(audio_model, f0s, invert_audio_fn, singers=[3], num_steps=100
     return audio
 @spaces.GPU(duration=30)
-def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None, type='response', invert_pitch_fn=None):
     global pitch_model, audio_model
     # move the models to device
     pitch_model = pitch_model.to(device)
@@ -140,7 +140,7 @@ def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp',
     if type == 'response':
         pitch, inverted_pitch = generate_pitch_response(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100)
     elif type == 'reinterp':
-        pitch, inverted_pitch = generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100)
     else:
         raise ValueError(f'Invalid type: {type}')
@@ -189,7 +189,7 @@ def load_pitch_model(model_selection):
     return pitch_model, pitch_qt, pitch_task_fn, invert_pitch_fn
 @debug_profile
-def container_generate(model_selection, task_selection, audio, singer_id):
     global pitch_model, pitch_qt, pitch_task_fn, invert_pitch_fn, model_loaded
     # load pitch model
     if model_loaded is None or model_loaded != model_selection:
@@ -241,7 +241,7 @@ def container_generate(model_selection, task_selection, audio, singer_id):
     if task_selection == 'Call and Response':
         partial_generate = partial(generate, num_samples=1, num_steps=100, singers=singer, outfolder=None, pitch_qt=pitch_qt, type='response', invert_pitch_fn=invert_pitch_fn)
     else:
-        partial_generate = partial(generate, num_samples=1, num_steps=100, singers=singer, outfolder=None, pitch_qt=pitch_qt, type='reinterp', invert_pitch_fn=invert_pitch_fn)
     audio, output_plot = partial_generate(f0)
     return audio, user_input_plot, output_plot
@@ -254,6 +254,13 @@ css = """
 }
 """
 with gr.Blocks(css=css) as demo:
     gr.Markdown("# GaMaDHaNi: Hierarchical Generative Modeling of Melodic Vocal Contours in Hindustani Classical Music", elem_classes="center-text")
     gr.Markdown("### Abstract", elem_classes="center-text")
@@ -276,7 +283,7 @@ with gr.Blocks(css=css) as demo:
                     This is still a work in progress, so please feel free to share any weird or interesting examples, we would love to hear them! Contact us at snnithya[at]mit[dot]edu.
                     """)
         gr.Markdown("""
-            *Note: If you see an error message on the screen after clicking 'Submit', please wait for five seconds and click 'Submit' again.*
         """)
         gr.Markdown("""
             *Another note: The model may take around 20-30s to generate an output. Hang tight! But if you're left hanging for too long, let me know!*
@@ -286,8 +293,9 @@ with gr.Blocks(css=css) as demo:
             """)
     model_dropdown = gr.Dropdown(["Diffusion Pitch Generator"], label="Select a model type")
     task_dropdown = gr.Dropdown(label="Select a task", choices=["Call and Response", "Melodic Reinterpretation"])
     singer_dropdown = gr.Dropdown(label="Select a singer", choices=["Singer 1", "Singer 2"])
     with gr.Row(equal_height=True):
         with gr.Column():
             audio = gr.Audio(label="Input", show_download_button=True)
@@ -311,7 +319,7 @@ with gr.Blocks(css=css) as demo:
             with gr.Accordion("View Pitch Plot"):
                 generated_pitch = gr.Plot(label="Generated Pitch")
     sbmt = gr.Button()
-    sbmt.click(container_generate, inputs=[model_dropdown, task_dropdown, audio, singer_dropdown], outputs=[generated_audio, user_input, generated_pitch])
 def main(argv):

     return time, f0, confidence
+def generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples, num_steps, noise_std=0.4, t0=0.5):
     '''Generate pitch values for the melodic reinterpretation task'''
     # hardcoding the amount of noise to be added
+    # noisy_pitch = torch.Tensor(pitch[:, :, -1200:]).to(pitch_model.device) + (torch.normal(mean=0.0, std=noise_std*torch.ones((1200)))).to(pitch_model.device)
+    # noisy_pitch = torch.clamp(noisy_pitch, -5.19, 5.19)     # clipping the pitch values to be within the range of the model
+    samples = pitch_model.sample_sdedit(pitch[:, :, -1200:].to(pitch_model.device), num_samples, num_steps, t0=t0)
     inverted_pitches = invert_pitch_fn(f0=samples.detach().cpu().numpy()[0]).flatten()   # pitch values in Hz
     return samples, inverted_pitches
     return audio
 @spaces.GPU(duration=30)
+def generate(pitch, num_samples=1, num_steps=100, singers=[3], outfolder='temp', audio_seq_len=750, pitch_qt=None, type='response', invert_pitch_fn=None, t0=0.5):
     global pitch_model, audio_model
     # move the models to device
     pitch_model = pitch_model.to(device)
     if type == 'response':
         pitch, inverted_pitch = generate_pitch_response(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100)
     elif type == 'reinterp':
+        pitch, inverted_pitch = generate_pitch_reinterp(pitch, pitch_model, invert_pitch_fn, num_samples=num_samples, num_steps=100, t0=t0)
     else:
         raise ValueError(f'Invalid type: {type}')
     return pitch_model, pitch_qt, pitch_task_fn, invert_pitch_fn
 @debug_profile
+def container_generate(model_selection, task_selection, audio, singer_id, t0):
     global pitch_model, pitch_qt, pitch_task_fn, invert_pitch_fn, model_loaded
     # load pitch model
     if model_loaded is None or model_loaded != model_selection:
     if task_selection == 'Call and Response':
         partial_generate = partial(generate, num_samples=1, num_steps=100, singers=singer, outfolder=None, pitch_qt=pitch_qt, type='response', invert_pitch_fn=invert_pitch_fn)
     else:
+        partial_generate = partial(generate, num_samples=1, num_steps=100, singers=singer, outfolder=None, pitch_qt=pitch_qt, type='reinterp', invert_pitch_fn=invert_pitch_fn, t0=t0)
     audio, output_plot = partial_generate(f0)
     return audio, user_input_plot, output_plot
 }
 """
+def toggle_visibility(selection):
+    # Show element if selection is "Show", otherwise hide it
+    if selection == "Melodic Reinterpretation":
+        return gr.update(visible=True)
+    else:
+        return gr.update(visible=False)
 with gr.Blocks(css=css) as demo:
     gr.Markdown("# GaMaDHaNi: Hierarchical Generative Modeling of Melodic Vocal Contours in Hindustani Classical Music", elem_classes="center-text")
     gr.Markdown("### Abstract", elem_classes="center-text")
                     This is still a work in progress, so please feel free to share any weird or interesting examples, we would love to hear them! Contact us at snnithya[at]mit[dot]edu.
                     """)
         gr.Markdown("""
+            *Note: If you see an error message on the screen after clicking 'Run', please wait for five seconds and click 'Run' again.*
         """)
         gr.Markdown("""
             *Another note: The model may take around 20-30s to generate an output. Hang tight! But if you're left hanging for too long, let me know!*
             """)
     model_dropdown = gr.Dropdown(["Diffusion Pitch Generator"], label="Select a model type")
     task_dropdown = gr.Dropdown(label="Select a task", choices=["Call and Response", "Melodic Reinterpretation"])
+    t0 = gr.Slider(label="Faithfulness to the input (For melodic reinterpretation task only)", minimum=0.0, maximum=1.0, step=0.01, value=0.3, visible=False)
+    task_dropdown.change(toggle_visibility, inputs=task_dropdown, outputs=t0)
     singer_dropdown = gr.Dropdown(label="Select a singer", choices=["Singer 1", "Singer 2"])
     with gr.Row(equal_height=True):
         with gr.Column():
             audio = gr.Audio(label="Input", show_download_button=True)
             with gr.Accordion("View Pitch Plot"):
                 generated_pitch = gr.Plot(label="Generated Pitch")
     sbmt = gr.Button()
+    sbmt.click(container_generate, inputs=[model_dropdown, task_dropdown, audio, singer_dropdown, t0], outputs=[generated_audio, user_input, generated_pitch])
 def main(argv):

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
 crepe==0.0.15
 hmmlearn==0.3.2
 tensorflow==2.17.0
-GaMaDHaNi @ git+https://github.com/snnithya/GaMaDHaNi.git@37788f4c900fc425cd193052784e88afbfdd19e2

 crepe==0.0.15
 hmmlearn==0.3.2
 tensorflow==2.17.0
+GaMaDHaNi @ git+https://github.com/snnithya/GaMaDHaNi.git@c59e24e28ef7b80d43f56b39d3a9dd0563e01df6