ai-tube-model-musicgen-1

Running on A10G

App Files Files Community

adefossez commited on Jun 9, 2023

Commit

1897b6f

•

1 Parent(s): 0f1b90d

adapting full app

Browse files

Files changed (2) hide show

app.py +30 -21
app_batched.py +1 -2

app.py CHANGED Viewed

@@ -6,10 +6,13 @@ This source code is licensed under the license found in the
 LICENSE file in the root directory of this source tree.
 """
 import torch
 import gradio as gr
 from hf_loading import get_pretrained
 MODEL = None
@@ -51,8 +54,11 @@ def predict(model, text, melody, duration, topk, topp, temperature, cfg_coef):
     else:
         output = MODEL.generate(descriptions=[text], progress=False)
-    output = output.detach().cpu().numpy()
-    return MODEL.sample_rate, output
 with gr.Blocks() as demo:
@@ -60,25 +66,12 @@ with gr.Blocks() as demo:
         """
         # MusicGen
-        This is the demo for MusicGen, a simple and controllable model for music generation presented at: "Simple and Controllable Music Generation".
-        Below we present 3 model variations:
-        1. Melody -- a music generation model capable of generating music condition on text and melody inputs. **Note**, you can also use text only.
-        2. Small -- a 300M transformer decoder conditioned on text only.
-        3. Medium -- a 1.5B transformer decoder conditioned on text only.
-        4. Large -- a 3.3B transformer decoder conditioned on text only (might OOM for the longest sequences.)
-        When the optional melody conditioning wav is provided, the model will extract
-        a broad melody and try to follow it in the generated samples.
-        For skipping queue, you can duplicate this space, and upgrade to GPU in the settings.
         <br/>
-        <a href="https://huggingface.co/spaces/musicgen/MusicGen?duplicate=true">
-        <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
-        </p>
-        See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft)
-        for more details.
         """
     )
     with gr.Row():
@@ -98,7 +91,7 @@ with gr.Blocks() as demo:
                 temperature = gr.Number(label="Temperature", value=1.0, interactive=True)
                 cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
         with gr.Column():
-            output = gr.Audio(label="Generated Music", type="numpy")
     submit.click(predict, inputs=[model, text, melody, duration, topk, topp, temperature, cfg_coef], outputs=[output])
     gr.Examples(
         fn=predict,
@@ -132,5 +125,21 @@ with gr.Blocks() as demo:
         inputs=[text, melody, model],
         outputs=[output]
     )
 demo.launch()

 LICENSE file in the root directory of this source tree.
 """
+from tempfile import NamedTemporaryFile
 import torch
 import gradio as gr
 from hf_loading import get_pretrained
+from audiocraft.data.audio import audio_write
 MODEL = None
     else:
         output = MODEL.generate(descriptions=[text], progress=False)
+    output = output.detach().cpu().float()[0]
+    with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
+        audio_write(file.name, output, MODEL.sample_rate, strategy="loudness", add_suffix=False)
+        waveform_video = gr.make_waveform(file.name)
+    return waveform_video
 with gr.Blocks() as demo:
         """
         # MusicGen
+        This is the demo for [MusicGen](https://github.com/facebookresearch/audiocraft), a simple and controllable model for music generation
+        presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284).
         <br/>
+        <a href="https://huggingface.co/spaces/musicgen/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
+        <img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
+        for longer sequences, more control and no queue.</p>
         """
     )
     with gr.Row():
                 temperature = gr.Number(label="Temperature", value=1.0, interactive=True)
                 cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
         with gr.Column():
+            output = gr.Video(label="Generated Music")
     submit.click(predict, inputs=[model, text, melody, duration, topk, topp, temperature, cfg_coef], outputs=[output])
     gr.Examples(
         fn=predict,
         inputs=[text, melody, model],
         outputs=[output]
     )
+    gr.Markdown(
+        """
+        ### More details
+        By typing a description of the music you want and an optional audio used for melody conditioning,
+        We present 4 model variations:
+        1. Melody -- a music generation model capable of generating music condition on text and melody inputs. **Note**, you can also use text only.
+        2. Small -- a 300M transformer decoder conditioned on text only.
+        3. Medium -- a 1.5B transformer decoder conditioned on text only.
+        4. Large -- a 3.3B transformer decoder conditioned on text only (might OOM for the longest sequences.)
+        When the optional melody conditioning wav is provided, the model will extract
+        a broad melody and try to follow it in the generated samples.
+        """
+    )
 demo.launch()

app_batched.py CHANGED Viewed

@@ -60,7 +60,6 @@ def predict(texts, melodies):
             audio_write(file.name, output, MODEL.sample_rate, strategy="loudness", add_suffix=False)
             waveform_video = gr.make_waveform(file.name)
             out_files.append(waveform_video)
-    print(out_files)
     return [out_files]
@@ -72,7 +71,7 @@ with gr.Blocks() as demo:
         This is the demo for [MusicGen](https://github.com/facebookresearch/audiocraft), a simple and controllable model for music generation
         presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284).
         <br/>
-        <a href="https://huggingface.co/spaces/facebook/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
         <img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
         for longer sequences, more control and no queue</p>
         """

             audio_write(file.name, output, MODEL.sample_rate, strategy="loudness", add_suffix=False)
             waveform_video = gr.make_waveform(file.name)
             out_files.append(waveform_video)
     return [out_files]
         This is the demo for [MusicGen](https://github.com/facebookresearch/audiocraft), a simple and controllable model for music generation
         presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284).
         <br/>
+        <a href="https://huggingface.co/spaces/musicgen/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
         <img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
         for longer sequences, more control and no queue</p>
         """