Spaces:

unpairedelectron07
/

Text-to-Music-Generator

Running

App Files Files Community

unpairedelectron07 commited on Jan 21, 2024

Commit

8860e6a

verified ·

1 Parent(s): 50fa952

Upload 7 files

Browse files

Files changed (7) hide show

audiocraft/grids/musicgen/_explorers.py +93 -0
audiocraft/grids/musicgen/musicgen_base_32khz.py +43 -0
audiocraft/grids/musicgen/musicgen_base_cached_32khz.py +67 -0
audiocraft/grids/musicgen/musicgen_clapemb_32khz.py +32 -0
audiocraft/grids/musicgen/musicgen_melody_32khz.py +65 -0
audiocraft/grids/musicgen/musicgen_pretrained_32khz_eval.py +99 -0
audiocraft/grids/musicgen/musicgen_stereo_finetune_32khz.py +57 -0

audiocraft/grids/musicgen/_explorers.py ADDED Viewed

	@@ -0,0 +1,93 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import typing as tp
+import treetable as tt
+from .._base_explorers import BaseExplorer
+class LMExplorer(BaseExplorer):
+    eval_metrics: tp.List[str] = []
+    def stages(self) -> tp.List[str]:
+        return ['train', 'valid']
+    def get_grid_metrics(self):
+        """Return the metrics that should be displayed in the tracking table."""
+        return [
+            tt.group(
+                'train',
+                [
+                    tt.leaf('epoch'),
+                    tt.leaf('duration', '.1f'),  # duration in minutes
+                    tt.leaf('ping'),
+                    tt.leaf('ce', '.4f'),  # cross entropy
+                    tt.leaf("ppl", '.3f'),  # perplexity
+                ],
+                align='>',
+            ),
+            tt.group(
+                'valid',
+                [
+                    tt.leaf('ce', '.4f'),
+                    tt.leaf('ppl', '.3f'),
+                    tt.leaf('best_ppl', '.3f'),
+                ],
+                align='>',
+            ),
+        ]
+    def process_sheep(self, sheep, history):
+        parts = super().process_sheep(sheep, history)
+        track_by = {'ppl': 'lower'}  # values should be in ['lower', 'higher']
+        best_metrics = {k: (1 if v == 'lower' else -1) * float('inf') for k, v in track_by.items()}
+        def comparator(mode, a, b):
+            return a < b if mode == 'lower' else a > b
+        for metrics in history:
+            for key, sub in metrics.items():
+                for metric in track_by:
+                    # for the validation set, keep track of best metrics (ppl in this example)
+                    # this is so we can conveniently compare metrics between runs in the grid
+                    if key == 'valid' and metric in sub and comparator(
+                        track_by[metric], sub[metric], best_metrics[metric]
+                    ):
+                        best_metrics[metric] = sub[metric]
+        if 'valid' in parts:
+            parts['valid'].update({f'best_{k}': v for k, v in best_metrics.items()})
+        return parts
+class GenerationEvalExplorer(BaseExplorer):
+    eval_metrics: tp.List[str] = []
+    def stages(self) -> tp.List[str]:
+        return ['evaluate']
+    def get_grid_metrics(self):
+        """Return the metrics that should be displayed in the tracking table."""
+        return [
+            tt.group(
+                'evaluate',
+                [
+                    tt.leaf('epoch', '.3f'),
+                    tt.leaf('duration', '.1f'),
+                    tt.leaf('ping'),
+                    tt.leaf('ce', '.4f'),
+                    tt.leaf('ppl', '.3f'),
+                    tt.leaf('fad', '.3f'),
+                    tt.leaf('kld', '.3f'),
+                    tt.leaf('text_consistency', '.3f'),
+                    tt.leaf('chroma_cosine', '.3f'),
+                ],
+                align='>',
+            ),
+        ]

audiocraft/grids/musicgen/musicgen_base_32khz.py ADDED Viewed

	@@ -0,0 +1,43 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from ._explorers import LMExplorer
+from ...environment import AudioCraftEnvironment
+@LMExplorer
+def explorer(launcher):
+    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
+    launcher.slurm_(gpus=32, partition=partitions)
+    launcher.bind_(solver='musicgen/musicgen_base_32khz')
+    # replace this by the desired music dataset
+    launcher.bind_(dset='internal/music_400k_32khz')
+    fsdp = {'autocast': False, 'fsdp.use': True}
+    medium = {'model/lm/model_scale': 'medium'}
+    large = {'model/lm/model_scale': 'large'}
+    cfg_low = {'classifier_free_guidance.training_dropout': 0.2}
+    wd_low = {'conditioners.description.t5.word_dropout': 0.2}
+    adam = {'optim.optimizer': 'adamw', 'optim.lr': 1e-4}
+    launcher.bind_(fsdp)
+    launcher.slurm_(gpus=32).bind_(label='32gpus')
+    with launcher.job_array():
+        sub = launcher.bind()
+        sub()
+    launcher.slurm_(gpus=64).bind_(label='64gpus')
+    with launcher.job_array():
+        sub = launcher.bind()
+        sub(medium, adam)
+    launcher.slurm_(gpus=96).bind_(label='96gpus')
+    with launcher.job_array():
+        sub = launcher.bind()
+        sub(large, cfg_low, wd_low, adam, {'optim.max_norm': 3})

audiocraft/grids/musicgen/musicgen_base_cached_32khz.py ADDED Viewed

	@@ -0,0 +1,67 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from ._explorers import LMExplorer
+from ...environment import AudioCraftEnvironment
+@LMExplorer
+def explorer(launcher):
+    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
+    launcher.slurm_(gpus=32, partition=partitions)
+    launcher.bind_(solver='musicgen/musicgen_base_32khz')
+    # replace this by the desired music dataset
+    launcher.bind_(dset='internal/music_400k_32khz')
+    fsdp = {'autocast': False, 'fsdp.use': True}
+    medium = {'model/lm/model_scale': 'medium'}
+    large = {'model/lm/model_scale': 'large'}
+    cfg_low = {'classifier_free_guidance.training_dropout': 0.2}
+    wd_low = {'conditioners.description.t5.word_dropout': 0.2}
+    adam = {'optim.optimizer': 'adamw', 'optim.lr': 1e-4}
+    # BEGINNING OF CACHE WRITING JOBS.
+    cache_write = {
+        'cache.path': '/fsx-codegen/defossez/cache/interleave_stereo_nv_32k',
+        'cache.write': True,
+        'generate.every': 500,
+        'evaluate.every': 500,
+        'logging.log_updates': 50,
+    }
+    cache_sub = launcher.bind({'model/lm/model_scale': 'xsmall', 'conditioner': 'none'})
+    cache_sub.bind_({'deadlock.use': True})
+    cache_sub.slurm_(gpus=8)
+    with launcher.job_array():
+        num_shards = 10  # total number of jobs running in parallel.
+        for shard in range(0, num_shards):
+            launcher(cache_write, {'cache.write_num_shards': num_shards, 'cache.write_shard': shard})
+    # REMOVE THE FOLLOWING RETURN STATEMENT ONCE THE ABOVE JOBS ARE DONE,
+    # OR SUFFICIENTLY AHEAD.
+    return
+    cache = {
+        'cache.path': '/fsx-codegen/defossez/cache/interleave_stereo_nv_32k',
+    }
+    launcher.bind_(fsdp, cache)
+    launcher.slurm_(gpus=32).bind_(label='32gpus')
+    with launcher.job_array():
+        sub = launcher.bind()
+        sub()
+    launcher.slurm_(gpus=64).bind_(label='64gpus')
+    with launcher.job_array():
+        sub = launcher.bind()
+        sub(medium, adam)
+    launcher.slurm_(gpus=96).bind_(label='96gpus')
+    with launcher.job_array():
+        sub = launcher.bind()
+        sub(large, cfg_low, wd_low, adam, {'optim.max_norm': 3})

audiocraft/grids/musicgen/musicgen_clapemb_32khz.py ADDED Viewed

	@@ -0,0 +1,32 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from ._explorers import LMExplorer
+from ...environment import AudioCraftEnvironment
+@LMExplorer
+def explorer(launcher):
+    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
+    launcher.slurm_(gpus=32, partition=partitions)
+    launcher.bind_(solver='musicgen/musicgen_base_32khz')
+    # replace this by the desired music dataset
+    launcher.bind_(dset='internal/music_400k_32khz')
+    launcher.bind_(conditioner='clapemb2music')
+    fsdp = {'autocast': False, 'fsdp.use': True}
+    cache_path = {'conditioners.description.clap.cache_path':
+                  '/fsx-audio-craft-llm/jadecopet/experiments/audiocraft/caches/clap_embed_music'}
+    text_wav_training_opt = {'conditioners.description.clap.text_p': 0.5}
+    launcher.bind_(fsdp)
+    launcher.slurm_(gpus=32).bind_(label='32gpus')
+    with launcher.job_array():
+        launcher()
+        launcher(text_wav_training_opt)
+        launcher(cache_path)
+        launcher(cache_path, text_wav_training_opt)

audiocraft/grids/musicgen/musicgen_melody_32khz.py ADDED Viewed

	@@ -0,0 +1,65 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from ._explorers import LMExplorer
+from ...environment import AudioCraftEnvironment
+@LMExplorer
+def explorer(launcher):
+    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
+    launcher.slurm_(gpus=32, partition=partitions)
+    launcher.bind_(solver='musicgen/musicgen_melody_32khz')
+    # replace this by the desired music dataset
+    launcher.bind_(dset='internal/music_400k_32khz')
+    fsdp = {'autocast': False, 'fsdp.use': True}
+    medium = {'model/lm/model_scale': 'medium'}
+    large = {'model/lm/model_scale': 'large'}
+    cfg_low = {'classifier_free_guidance.training_dropout': 0.2}
+    wd_low = {'conditioners.description.t5.word_dropout': 0.2}
+    adam = {'optim.optimizer': 'adamw', 'optim.lr': 1e-4}
+    cache_path = {'conditioners.self_wav.chroma_stem.cache_path':
+                  '/fsx-audio-craft-llm/jadecopet/experiments/audiocraft/caches/chroma_stem'}
+    # CACHE GENERATION JOBS
+    n_cache_gen_jobs = 4
+    gen_sub = launcher.slurm(gpus=1)
+    gen_sub.bind_(
+        cache_path, {
+            # the cache is always computed over the whole file, so duration doesn't matter here.
+            'dataset.segment_duration': 2.,
+            'dataset.batch_size': 8,
+            'dataset.train.permutation_on_files': True,  # try to not repeat files.
+            'optim.epochs': 10,
+            'model/lm/model_scale': 'xsmall',
+        })
+    with gen_sub.job_array():
+        for gen_job in range(n_cache_gen_jobs):
+            gen_sub({'dataset.train.shuffle_seed': gen_job})
+    # ACTUAL TRAINING JOBS.
+    launcher.bind_(fsdp)
+    launcher.slurm_(gpus=32).bind_(label='32gpus')
+    with launcher.job_array():
+        sub = launcher.bind()
+        sub()
+        sub(cache_path)
+    launcher.slurm_(gpus=64).bind_(label='64gpus')
+    with launcher.job_array():
+        sub = launcher.bind()
+        sub(medium, adam)
+    launcher.slurm_(gpus=96).bind_(label='96gpus')
+    with launcher.job_array():
+        sub = launcher.bind()
+        sub(large, cfg_low, wd_low, adam, {'optim.max_norm': 3})

audiocraft/grids/musicgen/musicgen_pretrained_32khz_eval.py ADDED Viewed

	@@ -0,0 +1,99 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Evaluation with objective metrics for the pretrained MusicGen models.
+This grid takes signature from the training grid and runs evaluation-only stage.
+When running the grid for the first time, please use:
+REGEN=1 dora grid musicgen.musicgen_pretrained_32khz_eval
+and re-use the REGEN=1 option when the grid is changed to force regenerating it.
+Note that you need the proper metrics external libraries setup to use all
+the objective metrics activated in this grid. Refer to the README for more information.
+"""
+import os
+from ._explorers import GenerationEvalExplorer
+from ...environment import AudioCraftEnvironment
+from ... import train
+def eval(launcher, batch_size: int = 32, eval_melody: bool = False):
+    opts = {
+        'dset': 'audio/musiccaps_32khz',
+        'solver/musicgen/evaluation': 'objective_eval',
+        'execute_only': 'evaluate',
+        '+dataset.evaluate.batch_size': batch_size,
+        '+metrics.fad.tf.batch_size': 16,
+    }
+    # chroma-specific evaluation
+    chroma_opts = {
+        'dset': 'internal/music_400k_32khz',
+        'dataset.evaluate.segment_duration': 30,
+        'dataset.evaluate.num_samples': 1000,
+        'evaluate.metrics.chroma_cosine': True,
+        'evaluate.metrics.fad': False,
+        'evaluate.metrics.kld': False,
+        'evaluate.metrics.text_consistency': False,
+    }
+    # binary for FAD computation: replace this path with your own path
+    metrics_opts = {
+        'metrics.fad.tf.bin': '/data/home/jadecopet/local/usr/opt/google-research'
+    }
+    opt1 = {'generate.lm.use_sampling': True, 'generate.lm.top_k': 250, 'generate.lm.top_p': 0.}
+    opt2 = {'transformer_lm.two_step_cfg': True}
+    sub = launcher.bind(opts)
+    sub.bind_(metrics_opts)
+    # base objective metrics
+    sub(opt1, opt2)
+    if eval_melody:
+        # chroma-specific metrics
+        sub(opt1, opt2, chroma_opts)
+@GenerationEvalExplorer
+def explorer(launcher):
+    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
+    launcher.slurm_(gpus=4, partition=partitions)
+    if 'REGEN' not in os.environ:
+        folder = train.main.dora.dir / 'grids' / __name__.split('.', 2)[-1]
+        with launcher.job_array():
+            for sig in folder.iterdir():
+                if not sig.is_symlink():
+                    continue
+                xp = train.main.get_xp_from_sig(sig.name)
+                launcher(xp.argv)
+        return
+    with launcher.job_array():
+        musicgen_base = launcher.bind(solver="musicgen/musicgen_base_32khz")
+        musicgen_base.bind_({'autocast': False, 'fsdp.use': True})
+        # base musicgen models
+        musicgen_base_small = musicgen_base.bind({'continue_from': '//pretrained/facebook/musicgen-small'})
+        eval(musicgen_base_small, batch_size=128)
+        musicgen_base_medium = musicgen_base.bind({'continue_from': '//pretrained/facebook/musicgen-medium'})
+        musicgen_base_medium.bind_({'model/lm/model_scale': 'medium'})
+        eval(musicgen_base_medium, batch_size=128)
+        musicgen_base_large = musicgen_base.bind({'continue_from': '//pretrained/facebook/musicgen-large'})
+        musicgen_base_large.bind_({'model/lm/model_scale': 'large'})
+        eval(musicgen_base_large, batch_size=128)
+        # melody musicgen model
+        musicgen_melody = launcher.bind(solver="musicgen/musicgen_melody_32khz")
+        musicgen_melody.bind_({'autocast': False, 'fsdp.use': True})
+        musicgen_melody_medium = musicgen_melody.bind({'continue_from': '//pretrained/facebook/musicgen-melody'})
+        musicgen_melody_medium.bind_({'model/lm/model_scale': 'medium'})
+        eval(musicgen_melody_medium, batch_size=128, eval_melody=True)

audiocraft/grids/musicgen/musicgen_stereo_finetune_32khz.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from pathlib import Path
+from ._explorers import LMExplorer
+from ...environment import AudioCraftEnvironment
+@LMExplorer
+def explorer(launcher):
+    partitions = AudioCraftEnvironment.get_slurm_partitions(['team', 'global'])
+    launcher.slurm_(gpus=32, partition=partitions)
+    launcher.bind_(solver='musicgen/musicgen_base_32khz')
+    # replace this by the desired music dataset, which needs to be stereo
+    launcher.bind_(dset='audio/example')
+    fsdp = {'autocast': False, 'fsdp.use': True}
+    medium = {'model/lm/model_scale': 'medium'}
+    large = {'model/lm/model_scale': 'large'}
+    cfg_low = {'classifier_free_guidance.training_dropout': 0.2}
+    wd_low = {'conditioners.description.t5.word_dropout': 0.2}
+    adam = {'optim.optimizer': 'adamw', 'optim.lr': 1e-4}
+    stereo = {
+        'codebooks_pattern.delay.delays': [0, 0, 1, 1, 2, 2, 3, 3],
+        'transformer_lm.n_q': 8,
+        'interleave_stereo_codebooks.use': True,
+        'channels': 2,
+    }
+    # You must follow the instructions in docs/MUSICGEN.md about the creation
+    # of the proper fine tuning checkpoints. We will assume they are stored under
+    # ~/checkpoints/{mode_name}.
+    checkpoints = Path.home() / 'checkpoints'
+    launcher.bind_(fsdp, stereo, {'optim.epochs': 100})
+    launcher.slurm_(gpus=32).bind_(label='32gpus')
+    with launcher.job_array():
+        sub = launcher.bind({'continue_from': str(checkpoints / 'stereo_finetune_musicgen-small.th')})
+        sub()
+    launcher.slurm_(gpus=64).bind_(label='64gpus')
+    with launcher.job_array():
+        sub = launcher.bind({'continue_from': str(checkpoints / 'stereo_finetune_musicgen-medium.th')})
+        sub(medium, adam)
+    launcher.slurm_(gpus=96).bind_(label='96gpus')
+    with launcher.job_array():
+        sub = launcher.bind({'continue_from': str(checkpoints / 'stereo_finetune_musicgen-large.th')})
+        sub(large, cfg_low, wd_low, adam, {'optim.max_norm': 3})