nsd

Configuration error

App Files Files Community

Alexxggs commited on Dec 26, 2023

Commit

ac08452

1 Parent(s): 60f415d

Delete tests

Browse files

Files changed (19) hide show

tests/__init__.py +0 -5
tests/common_utils/__init__.py +0 -9
tests/common_utils/temp_utils.py +0 -56
tests/common_utils/wav_utils.py +0 -32
tests/data/__init__.py +0 -5
tests/data/test_audio.py +0 -239
tests/data/test_audio_dataset.py +0 -352
tests/data/test_audio_utils.py +0 -110
tests/models/test_encodec_model.py +0 -60
tests/models/test_musicgen.py +0 -58
tests/modules/__init__.py +0 -5
tests/modules/test_codebooks_patterns.py +0 -246
tests/modules/test_conv.py +0 -203
tests/modules/test_lstm.py +0 -32
tests/modules/test_rope.py +0 -168
tests/modules/test_seanet.py +0 -115
tests/modules/test_transformer.py +0 -253
tests/quantization/test_vq.py +0 -18
tests/utils/__init__.py +0 -5

tests/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.

tests/common_utils/__init__.py DELETED Viewed

@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-# flake8: noqa
-from .temp_utils import TempDirMixin
-from .wav_utils import get_batch_white_noise, get_white_noise, save_wav

tests/common_utils/temp_utils.py DELETED Viewed

@@ -1,56 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-import os
-import tempfile
-class TempDirMixin:
-    """Mixin to provide easy access to temp dir.
-    """
-    temp_dir_ = None
-    @classmethod
-    def get_base_temp_dir(cls):
-        # If AUDIOCRAFT_TEST_DIR is set, use it instead of temporary directory.
-        # this is handy for debugging.
-        key = "AUDIOCRAFT_TEST_DIR"
-        if key in os.environ:
-            return os.environ[key]
-        if cls.temp_dir_ is None:
-            cls.temp_dir_ = tempfile.TemporaryDirectory()
-        return cls.temp_dir_.name
-    @classmethod
-    def tearDownClass(cls):
-        if cls.temp_dir_ is not None:
-            try:
-                cls.temp_dir_.cleanup()
-                cls.temp_dir_ = None
-            except PermissionError:
-                # On Windows there is a know issue with `shutil.rmtree`,
-                # which fails intermittenly.
-                # https://github.com/python/cpython/issues/74168
-                # Following the above thread, we ignore it.
-                pass
-        super().tearDownClass()
-    @property
-    def id(self):
-        return self.__class__.__name__
-    def get_temp_path(self, *paths):
-        temp_dir = os.path.join(self.get_base_temp_dir(), self.id)
-        path = os.path.join(temp_dir, *paths)
-        os.makedirs(os.path.dirname(path), exist_ok=True)
-        return path
-    def get_temp_dir(self, *paths):
-        temp_dir = os.path.join(self.get_base_temp_dir(), self.id)
-        path = os.path.join(temp_dir, *paths)
-        os.makedirs(path, exist_ok=True)
-        return path

tests/common_utils/wav_utils.py DELETED Viewed

@@ -1,32 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from pathlib import Path
-import typing as tp
-import torch
-import torchaudio
-def get_white_noise(chs: int = 1, num_frames: int = 1):
-    wav = torch.randn(chs, num_frames)
-    return wav
-def get_batch_white_noise(bs: int = 1, chs: int = 1, num_frames: int = 1):
-    wav = torch.randn(bs, chs, num_frames)
-    return wav
-def save_wav(path: str, wav: torch.Tensor, sample_rate: int):
-    fp = Path(path)
-    kwargs: tp.Dict[str, tp.Any] = {}
-    if fp.suffix == '.wav':
-        kwargs['encoding'] = 'PCM_S'
-        kwargs['bits_per_sample'] = 16
-    elif fp.suffix == '.mp3':
-        kwargs['compression'] = 320
-    torchaudio.save(str(fp), wav, sample_rate, **kwargs)

tests/data/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.

tests/data/test_audio.py DELETED Viewed

@@ -1,239 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from itertools import product
-import random
-import numpy as np
-import torch
-import torchaudio
-from audiocraft.data.audio import audio_info, audio_read, audio_write, _av_read
-from ..common_utils import TempDirMixin, get_white_noise, save_wav
-class TestInfo(TempDirMixin):
-    def test_info_mp3(self):
-        sample_rates = [8000, 16_000]
-        channels = [1, 2]
-        duration = 1.
-        for sample_rate, ch in product(sample_rates, channels):
-            wav = get_white_noise(ch, int(sample_rate * duration))
-            path = self.get_temp_path('sample_wav.mp3')
-            save_wav(path, wav, sample_rate)
-            info = audio_info(path)
-            assert info.sample_rate == sample_rate
-            assert info.channels == ch
-            # we cannot trust torchaudio for num_frames, so we don't check
-    def _test_info_format(self, ext: str):
-        sample_rates = [8000, 16_000]
-        channels = [1, 2]
-        duration = 1.
-        for sample_rate, ch in product(sample_rates, channels):
-            n_frames = int(sample_rate * duration)
-            wav = get_white_noise(ch, n_frames)
-            path = self.get_temp_path(f'sample_wav{ext}')
-            save_wav(path, wav, sample_rate)
-            info = audio_info(path)
-            assert info.sample_rate == sample_rate
-            assert info.channels == ch
-            assert np.isclose(info.duration, duration, atol=1e-5)
-    def test_info_wav(self):
-        self._test_info_format('.wav')
-    def test_info_flac(self):
-        self._test_info_format('.flac')
-    def test_info_ogg(self):
-        self._test_info_format('.ogg')
-    def test_info_m4a(self):
-        # TODO: generate m4a file programmatically
-        # self._test_info_format('.m4a')
-        pass
-class TestRead(TempDirMixin):
-    def test_read_full_wav(self):
-        sample_rates = [8000, 16_000]
-        channels = [1, 2]
-        duration = 1.
-        for sample_rate, ch in product(sample_rates, channels):
-            n_frames = int(sample_rate * duration)
-            wav = get_white_noise(ch, n_frames).clamp(-0.99, 0.99)
-            path = self.get_temp_path('sample_wav.wav')
-            save_wav(path, wav, sample_rate)
-            read_wav, read_sr = audio_read(path)
-            assert read_sr == sample_rate
-            assert read_wav.shape[0] == wav.shape[0]
-            assert read_wav.shape[1] == wav.shape[1]
-            assert torch.allclose(read_wav, wav, rtol=1e-03, atol=1e-04)
-    def test_read_partial_wav(self):
-        sample_rates = [8000, 16_000]
-        channels = [1, 2]
-        duration = 1.
-        read_duration = torch.rand(1).item()
-        for sample_rate, ch in product(sample_rates, channels):
-            n_frames = int(sample_rate * duration)
-            read_frames = int(sample_rate * read_duration)
-            wav = get_white_noise(ch, n_frames).clamp(-0.99, 0.99)
-            path = self.get_temp_path('sample_wav.wav')
-            save_wav(path, wav, sample_rate)
-            read_wav, read_sr = audio_read(path, 0, read_duration)
-            assert read_sr == sample_rate
-            assert read_wav.shape[0] == wav.shape[0]
-            assert read_wav.shape[1] == read_frames
-            assert torch.allclose(read_wav[..., 0:read_frames], wav[..., 0:read_frames], rtol=1e-03, atol=1e-04)
-    def test_read_seek_time_wav(self):
-        sample_rates = [8000, 16_000]
-        channels = [1, 2]
-        duration = 1.
-        read_duration = 1.
-        for sample_rate, ch in product(sample_rates, channels):
-            n_frames = int(sample_rate * duration)
-            wav = get_white_noise(ch, n_frames).clamp(-0.99, 0.99)
-            path = self.get_temp_path('sample_wav.wav')
-            save_wav(path, wav, sample_rate)
-            seek_time = torch.rand(1).item()
-            read_wav, read_sr = audio_read(path, seek_time, read_duration)
-            seek_frames = int(sample_rate * seek_time)
-            expected_frames = n_frames - seek_frames
-            assert read_sr == sample_rate
-            assert read_wav.shape[0] == wav.shape[0]
-            assert read_wav.shape[1] == expected_frames
-            assert torch.allclose(read_wav, wav[..., seek_frames:], rtol=1e-03, atol=1e-04)
-    def test_read_seek_time_wav_padded(self):
-        sample_rates = [8000, 16_000]
-        channels = [1, 2]
-        duration = 1.
-        read_duration = 1.
-        for sample_rate, ch in product(sample_rates, channels):
-            n_frames = int(sample_rate * duration)
-            read_frames = int(sample_rate * read_duration)
-            wav = get_white_noise(ch, n_frames).clamp(-0.99, 0.99)
-            path = self.get_temp_path('sample_wav.wav')
-            save_wav(path, wav, sample_rate)
-            seek_time = torch.rand(1).item()
-            seek_frames = int(sample_rate * seek_time)
-            expected_frames = n_frames - seek_frames
-            read_wav, read_sr = audio_read(path, seek_time, read_duration, pad=True)
-            expected_pad_wav = torch.zeros(wav.shape[0], read_frames - expected_frames)
-            assert read_sr == sample_rate
-            assert read_wav.shape[0] == wav.shape[0]
-            assert read_wav.shape[1] == read_frames
-            assert torch.allclose(read_wav[..., :expected_frames], wav[..., seek_frames:], rtol=1e-03, atol=1e-04)
-            assert torch.allclose(read_wav[..., expected_frames:], expected_pad_wav)
-class TestAvRead(TempDirMixin):
-    def test_avread_seek_base(self):
-        sample_rates = [8000, 16_000]
-        channels = [1, 2]
-        duration = 2.
-        for sample_rate, ch in product(sample_rates, channels):
-            n_frames = int(sample_rate * duration)
-            wav = get_white_noise(ch, n_frames)
-            path = self.get_temp_path(f'reference_a_{sample_rate}_{ch}.wav')
-            save_wav(path, wav, sample_rate)
-            for _ in range(100):
-                # seek will always load a full duration segment in the file
-                seek_time = random.uniform(0.0, 1.0)
-                seek_duration = random.uniform(0.001, 1.0)
-                read_wav, read_sr = _av_read(path, seek_time, seek_duration)
-                assert read_sr == sample_rate
-                assert read_wav.shape[0] == wav.shape[0]
-                assert read_wav.shape[-1] == int(seek_duration * sample_rate)
-    def test_avread_seek_partial(self):
-        sample_rates = [8000, 16_000]
-        channels = [1, 2]
-        duration = 1.
-        for sample_rate, ch in product(sample_rates, channels):
-            n_frames = int(sample_rate * duration)
-            wav = get_white_noise(ch, n_frames)
-            path = self.get_temp_path(f'reference_b_{sample_rate}_{ch}.wav')
-            save_wav(path, wav, sample_rate)
-            for _ in range(100):
-                # seek will always load a partial segment
-                seek_time = random.uniform(0.5, 1.)
-                seek_duration = 1.
-                expected_num_frames = n_frames - int(seek_time * sample_rate)
-                read_wav, read_sr = _av_read(path, seek_time, seek_duration)
-                assert read_sr == sample_rate
-                assert read_wav.shape[0] == wav.shape[0]
-                assert read_wav.shape[-1] == expected_num_frames
-    def test_avread_seek_outofbound(self):
-        sample_rates = [8000, 16_000]
-        channels = [1, 2]
-        duration = 1.
-        for sample_rate, ch in product(sample_rates, channels):
-            n_frames = int(sample_rate * duration)
-            wav = get_white_noise(ch, n_frames)
-            path = self.get_temp_path(f'reference_c_{sample_rate}_{ch}.wav')
-            save_wav(path, wav, sample_rate)
-            seek_time = 1.5
-            read_wav, read_sr = _av_read(path, seek_time, 1.)
-            assert read_sr == sample_rate
-            assert read_wav.shape[0] == wav.shape[0]
-            assert read_wav.shape[-1] == 0
-    def test_avread_seek_edge(self):
-        sample_rates = [8000, 16_000]
-        # some of these values will have
-        # int(((frames - 1) / sample_rate) * sample_rate) != (frames - 1)
-        n_frames = [1000, 1001, 1002]
-        channels = [1, 2]
-        for sample_rate, ch, frames in product(sample_rates, channels, n_frames):
-            duration = frames / sample_rate
-            wav = get_white_noise(ch, frames)
-            path = self.get_temp_path(f'reference_d_{sample_rate}_{ch}.wav')
-            save_wav(path, wav, sample_rate)
-            seek_time = (frames - 1) / sample_rate
-            seek_frames = int(seek_time * sample_rate)
-            read_wav, read_sr = _av_read(path, seek_time, duration)
-            assert read_sr == sample_rate
-            assert read_wav.shape[0] == wav.shape[0]
-            assert read_wav.shape[-1] == (frames - seek_frames)
-class TestAudioWrite(TempDirMixin):
-    def test_audio_write_wav(self):
-        torch.manual_seed(1234)
-        sample_rates = [8000, 16_000]
-        n_frames = [1000, 1001, 1002]
-        channels = [1, 2]
-        strategies = ["peak", "clip", "rms"]
-        formats = ["wav", "mp3"]
-        for sample_rate, ch, frames in product(sample_rates, channels, n_frames):
-            for format_, strategy in product(formats, strategies):
-                wav = get_white_noise(ch, frames)
-                path = self.get_temp_path(f'pred_{sample_rate}_{ch}')
-                audio_write(path, wav, sample_rate, format_, strategy=strategy)
-                read_wav, read_sr = torchaudio.load(f'{path}.{format_}')
-                if format_ == "wav":
-                    assert read_wav.shape == wav.shape
-                if format_ == "wav" and strategy in ["peak", "rms"]:
-                    rescaled_read_wav = read_wav / read_wav.abs().max() * wav.abs().max()
-                    # for a Gaussian, the typical max scale will be less than ~5x the std.
-                    # The error when writing to disk will ~ 1/2**15, and when rescaling, 5x that.
-                    # For RMS target, rescaling leaves more headroom by default, leading
-                    # to a 20x rescaling typically
-                    atol = (5 if strategy == "peak" else 20) / 2**15
-                    delta = (rescaled_read_wav - wav).abs().max()
-                    assert torch.allclose(wav, rescaled_read_wav, rtol=0, atol=atol), (delta, atol)
-            formats = ["wav"]  # faster unit tests

tests/data/test_audio_dataset.py DELETED Viewed

@@ -1,352 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from functools import partial
-from itertools import product
-import json
-import math
-import os
-import random
-import typing as tp
-import pytest
-import torch
-from torch.utils.data import DataLoader
-from audiocraft.data.audio_dataset import (
-    AudioDataset,
-    AudioMeta,
-    _get_audio_meta,
-    load_audio_meta,
-    save_audio_meta
-)
-from audiocraft.data.zip import PathInZip
-from ..common_utils import TempDirMixin, get_white_noise, save_wav
-class TestAudioMeta(TempDirMixin):
-    def test_get_audio_meta(self):
-        sample_rates = [8000, 16_000]
-        channels = [1, 2]
-        duration = 1.
-        for sample_rate, ch in product(sample_rates, channels):
-            n_frames = int(duration * sample_rate)
-            wav = get_white_noise(ch, n_frames)
-            path = self.get_temp_path('sample.wav')
-            save_wav(path, wav, sample_rate)
-            m = _get_audio_meta(path, minimal=True)
-            assert m.path == path, 'path does not match'
-            assert m.sample_rate == sample_rate, 'sample rate does not match'
-            assert m.duration == duration, 'duration does not match'
-            assert m.amplitude is None
-            assert m.info_path is None
-    def test_save_audio_meta(self):
-        audio_meta = [
-            AudioMeta("mypath1", 1., 16_000, None, None, PathInZip('/foo/bar.zip:/relative/file1.json')),
-            AudioMeta("mypath2", 2., 16_000, None, None, PathInZip('/foo/bar.zip:/relative/file2.json'))
-            ]
-        empty_audio_meta = []
-        for idx, meta in enumerate([audio_meta, empty_audio_meta]):
-            path = self.get_temp_path(f'data_{idx}_save.jsonl')
-            save_audio_meta(path, meta)
-            with open(path, 'r') as f:
-                lines = f.readlines()
-                read_meta = [AudioMeta.from_dict(json.loads(line)) for line in lines]
-                assert len(read_meta) == len(meta)
-                for m, read_m in zip(meta, read_meta):
-                    assert m == read_m
-    def test_load_audio_meta(self):
-        try:
-            import dora
-        except ImportError:
-            dora = None  # type: ignore
-        audio_meta = [
-            AudioMeta("mypath1", 1., 16_000, None, None, PathInZip('/foo/bar.zip:/relative/file1.json')),
-            AudioMeta("mypath2", 2., 16_000, None, None, PathInZip('/foo/bar.zip:/relative/file2.json'))
-            ]
-        empty_meta = []
-        for idx, meta in enumerate([audio_meta, empty_meta]):
-            path = self.get_temp_path(f'data_{idx}_load.jsonl')
-            with open(path, 'w') as f:
-                for m in meta:
-                    json_str = json.dumps(m.to_dict()) + '\n'
-                    f.write(json_str)
-            read_meta = load_audio_meta(path)
-            assert len(read_meta) == len(meta)
-            for m, read_m in zip(meta, read_meta):
-                if dora:
-                    m.path = dora.git_save.to_absolute_path(m.path)
-                assert m == read_m, f'original={m}, read={read_m}'
-class TestAudioDataset(TempDirMixin):
-    def _create_audio_files(self,
-                            root_name: str,
-                            num_examples: int,
-                            durations: tp.Union[float, tp.Tuple[float, float]] = (0.1, 1.),
-                            sample_rate: int = 16_000,
-                            channels: int = 1):
-        root_dir = self.get_temp_dir(root_name)
-        for i in range(num_examples):
-            if isinstance(durations, float):
-                duration = durations
-            elif isinstance(durations, tuple) and len(durations) == 1:
-                duration = durations[0]
-            elif isinstance(durations, tuple) and len(durations) == 2:
-                duration = random.uniform(durations[0], durations[1])
-            else:
-                assert False
-            n_frames = int(duration * sample_rate)
-            wav = get_white_noise(channels, n_frames)
-            path = os.path.join(root_dir, f'example_{i}.wav')
-            save_wav(path, wav, sample_rate)
-        return root_dir
-    def _create_audio_dataset(self,
-                              root_name: str,
-                              total_num_examples: int,
-                              durations: tp.Union[float, tp.Tuple[float, float]] = (0.1, 1.),
-                              sample_rate: int = 16_000,
-                              channels: int = 1,
-                              segment_duration: tp.Optional[float] = None,
-                              num_examples: int = 10,
-                              shuffle: bool = True,
-                              return_info: bool = False):
-        root_dir = self._create_audio_files(root_name, total_num_examples, durations, sample_rate, channels)
-        dataset = AudioDataset.from_path(root_dir,
-                                         minimal_meta=True,
-                                         segment_duration=segment_duration,
-                                         num_samples=num_examples,
-                                         sample_rate=sample_rate,
-                                         channels=channels,
-                                         shuffle=shuffle,
-                                         return_info=return_info)
-        return dataset
-    def test_dataset_full(self):
-        total_examples = 10
-        min_duration, max_duration = 1., 4.
-        sample_rate = 16_000
-        channels = 1
-        dataset = self._create_audio_dataset(
-            'dset', total_examples, durations=(min_duration, max_duration),
-            sample_rate=sample_rate, channels=channels, segment_duration=None)
-        assert len(dataset) == total_examples
-        assert dataset.sample_rate == sample_rate
-        assert dataset.channels == channels
-        for idx in range(len(dataset)):
-            sample = dataset[idx]
-            assert sample.shape[0] == channels
-            assert sample.shape[1] <= int(max_duration * sample_rate)
-            assert sample.shape[1] >= int(min_duration * sample_rate)
-    def test_dataset_segment(self):
-        total_examples = 10
-        num_samples = 20
-        min_duration, max_duration = 1., 4.
-        segment_duration = 1.
-        sample_rate = 16_000
-        channels = 1
-        dataset = self._create_audio_dataset(
-            'dset', total_examples, durations=(min_duration, max_duration), sample_rate=sample_rate,
-            channels=channels, segment_duration=segment_duration, num_examples=num_samples)
-        assert len(dataset) == num_samples
-        assert dataset.sample_rate == sample_rate
-        assert dataset.channels == channels
-        for idx in range(len(dataset)):
-            sample = dataset[idx]
-            assert sample.shape[0] == channels
-            assert sample.shape[1] == int(segment_duration * sample_rate)
-    def test_dataset_equal_audio_and_segment_durations(self):
-        total_examples = 1
-        num_samples = 2
-        audio_duration = 1.
-        segment_duration = 1.
-        sample_rate = 16_000
-        channels = 1
-        dataset = self._create_audio_dataset(
-            'dset', total_examples, durations=audio_duration, sample_rate=sample_rate,
-            channels=channels, segment_duration=segment_duration, num_examples=num_samples)
-        assert len(dataset) == num_samples
-        assert dataset.sample_rate == sample_rate
-        assert dataset.channels == channels
-        for idx in range(len(dataset)):
-            sample = dataset[idx]
-            assert sample.shape[0] == channels
-            assert sample.shape[1] == int(segment_duration * sample_rate)
-        # the random seek_time adds variability on audio read
-        sample_1 = dataset[0]
-        sample_2 = dataset[1]
-        assert not torch.allclose(sample_1, sample_2)
-    def test_dataset_samples(self):
-        total_examples = 1
-        num_samples = 2
-        audio_duration = 1.
-        segment_duration = 1.
-        sample_rate = 16_000
-        channels = 1
-        create_dataset = partial(
-            self._create_audio_dataset,
-            'dset', total_examples, durations=audio_duration, sample_rate=sample_rate,
-            channels=channels, segment_duration=segment_duration, num_examples=num_samples,
-        )
-        dataset = create_dataset(shuffle=True)
-        # when shuffle = True, we have different inputs for the same index across epoch
-        sample_1 = dataset[0]
-        sample_2 = dataset[0]
-        assert not torch.allclose(sample_1, sample_2)
-        dataset_noshuffle = create_dataset(shuffle=False)
-        # when shuffle = False, we have same inputs for the same index across epoch
-        sample_1 = dataset_noshuffle[0]
-        sample_2 = dataset_noshuffle[0]
-        assert torch.allclose(sample_1, sample_2)
-    def test_dataset_return_info(self):
-        total_examples = 10
-        num_samples = 20
-        min_duration, max_duration = 1., 4.
-        segment_duration = 1.
-        sample_rate = 16_000
-        channels = 1
-        dataset = self._create_audio_dataset(
-            'dset', total_examples, durations=(min_duration, max_duration), sample_rate=sample_rate,
-            channels=channels, segment_duration=segment_duration, num_examples=num_samples, return_info=True)
-        assert len(dataset) == num_samples
-        assert dataset.sample_rate == sample_rate
-        assert dataset.channels == channels
-        for idx in range(len(dataset)):
-            sample, segment_info = dataset[idx]
-            assert sample.shape[0] == channels
-            assert sample.shape[1] == int(segment_duration * sample_rate)
-            assert segment_info.sample_rate == sample_rate
-            assert segment_info.total_frames == int(segment_duration * sample_rate)
-            assert segment_info.n_frames <= int(segment_duration * sample_rate)
-            assert segment_info.seek_time >= 0
-    def test_dataset_return_info_no_segment_duration(self):
-        total_examples = 10
-        num_samples = 20
-        min_duration, max_duration = 1., 4.
-        segment_duration = None
-        sample_rate = 16_000
-        channels = 1
-        dataset = self._create_audio_dataset(
-            'dset', total_examples, durations=(min_duration, max_duration), sample_rate=sample_rate,
-            channels=channels, segment_duration=segment_duration, num_examples=num_samples, return_info=True)
-        assert len(dataset) == total_examples
-        assert dataset.sample_rate == sample_rate
-        assert dataset.channels == channels
-        for idx in range(len(dataset)):
-            sample, segment_info = dataset[idx]
-            assert sample.shape[0] == channels
-            assert sample.shape[1] == segment_info.total_frames
-            assert segment_info.sample_rate == sample_rate
-            assert segment_info.n_frames <= segment_info.total_frames
-    def test_dataset_collate_fn(self):
-        total_examples = 10
-        num_samples = 20
-        min_duration, max_duration = 1., 4.
-        segment_duration = 1.
-        sample_rate = 16_000
-        channels = 1
-        dataset = self._create_audio_dataset(
-            'dset', total_examples, durations=(min_duration, max_duration), sample_rate=sample_rate,
-            channels=channels, segment_duration=segment_duration, num_examples=num_samples, return_info=False)
-        batch_size = 4
-        dataloader = DataLoader(
-            dataset,
-            batch_size=batch_size,
-            num_workers=0
-        )
-        for idx, batch in enumerate(dataloader):
-            assert batch.shape[0] == batch_size
-    @pytest.mark.parametrize("segment_duration", [1.0, None])
-    def test_dataset_with_meta_collate_fn(self, segment_duration):
-        total_examples = 10
-        num_samples = 20
-        min_duration, max_duration = 1., 4.
-        segment_duration = 1.
-        sample_rate = 16_000
-        channels = 1
-        dataset = self._create_audio_dataset(
-            'dset', total_examples, durations=(min_duration, max_duration), sample_rate=sample_rate,
-            channels=channels, segment_duration=segment_duration, num_examples=num_samples, return_info=True)
-        batch_size = 4
-        dataloader = DataLoader(
-            dataset,
-            batch_size=batch_size,
-            collate_fn=dataset.collater,
-            num_workers=0
-        )
-        for idx, batch in enumerate(dataloader):
-            wav, infos = batch
-            assert wav.shape[0] == batch_size
-            assert len(infos) == batch_size
-    @pytest.mark.parametrize("segment_duration,sample_on_weight,sample_on_duration,a_hist,b_hist,c_hist", [
-        [1, True, True, 0.5, 0.5, 0.0],
-        [1, False, True, 0.25, 0.5, 0.25],
-        [1, True, False, 0.666, 0.333, 0.0],
-        [1, False, False, 0.333, 0.333, 0.333],
-        [None, False, False, 0.333, 0.333, 0.333]])
-    def test_sample_with_weight(self, segment_duration, sample_on_weight, sample_on_duration, a_hist, b_hist, c_hist):
-        random.seed(1234)
-        rng = torch.Generator()
-        rng.manual_seed(1234)
-        def _get_histogram(dataset, repetitions=20_000):
-            counts = {file_meta.path: 0. for file_meta in meta}
-            for _ in range(repetitions):
-                file_meta = dataset.sample_file(rng)
-                counts[file_meta.path] += 1
-            return {name: count / repetitions for name, count in counts.items()}
-        meta = [
-           AudioMeta(path='a', duration=5, sample_rate=1, weight=2),
-           AudioMeta(path='b', duration=10, sample_rate=1, weight=None),
-           AudioMeta(path='c', duration=5, sample_rate=1, weight=0),
-        ]
-        dataset = AudioDataset(
-            meta, segment_duration=segment_duration, sample_on_weight=sample_on_weight,
-            sample_on_duration=sample_on_duration)
-        hist = _get_histogram(dataset)
-        assert math.isclose(hist['a'], a_hist, abs_tol=0.01)
-        assert math.isclose(hist['b'], b_hist, abs_tol=0.01)
-        assert math.isclose(hist['c'], c_hist, abs_tol=0.01)
-    def test_meta_duration_filter_all(self):
-        meta = [
-           AudioMeta(path='a', duration=5, sample_rate=1, weight=2),
-           AudioMeta(path='b', duration=10, sample_rate=1, weight=None),
-           AudioMeta(path='c', duration=5, sample_rate=1, weight=0),
-        ]
-        try:
-            AudioDataset(meta, segment_duration=11, min_segment_ratio=1)
-            assert False
-        except AssertionError:
-            assert True
-    def test_meta_duration_filter_long(self):
-        meta = [
-           AudioMeta(path='a', duration=5, sample_rate=1, weight=2),
-           AudioMeta(path='b', duration=10, sample_rate=1, weight=None),
-           AudioMeta(path='c', duration=5, sample_rate=1, weight=0),
-        ]
-        dataset = AudioDataset(meta, segment_duration=None, min_segment_ratio=1, max_audio_duration=7)
-        assert len(dataset) == 2

tests/data/test_audio_utils.py DELETED Viewed

@@ -1,110 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-import julius
-import torch
-import pytest
-from audiocraft.data.audio_utils import (
-    _clip_wav,
-    convert_audio_channels,
-    convert_audio,
-    normalize_audio
-)
-from ..common_utils import get_batch_white_noise
-class TestConvertAudioChannels:
-    def test_convert_audio_channels_downmix(self):
-        b, c, t = 2, 3, 100
-        audio = get_batch_white_noise(b, c, t)
-        mixed = convert_audio_channels(audio, channels=2)
-        assert list(mixed.shape) == [b, 2, t]
-    def test_convert_audio_channels_nochange(self):
-        b, c, t = 2, 3, 100
-        audio = get_batch_white_noise(b, c, t)
-        mixed = convert_audio_channels(audio, channels=c)
-        assert list(mixed.shape) == list(audio.shape)
-    def test_convert_audio_channels_upmix(self):
-        b, c, t = 2, 1, 100
-        audio = get_batch_white_noise(b, c, t)
-        mixed = convert_audio_channels(audio, channels=3)
-        assert list(mixed.shape) == [b, 3, t]
-    def test_convert_audio_channels_upmix_error(self):
-        b, c, t = 2, 2, 100
-        audio = get_batch_white_noise(b, c, t)
-        with pytest.raises(ValueError):
-            convert_audio_channels(audio, channels=3)
-class TestConvertAudio:
-    def test_convert_audio_channels_downmix(self):
-        b, c, dur = 2, 3, 4.
-        sr = 128
-        audio = get_batch_white_noise(b, c, int(sr * dur))
-        out = convert_audio(audio, from_rate=sr, to_rate=sr, to_channels=2)
-        assert list(out.shape) == [audio.shape[0], 2, audio.shape[-1]]
-    def test_convert_audio_channels_upmix(self):
-        b, c, dur = 2, 1, 4.
-        sr = 128
-        audio = get_batch_white_noise(b, c, int(sr * dur))
-        out = convert_audio(audio, from_rate=sr, to_rate=sr, to_channels=3)
-        assert list(out.shape) == [audio.shape[0], 3, audio.shape[-1]]
-    def test_convert_audio_upsample(self):
-        b, c, dur = 2, 1, 4.
-        sr = 2
-        new_sr = 3
-        audio = get_batch_white_noise(b, c, int(sr * dur))
-        out = convert_audio(audio, from_rate=sr, to_rate=new_sr, to_channels=c)
-        out_j = julius.resample.resample_frac(audio, old_sr=sr, new_sr=new_sr)
-        assert torch.allclose(out, out_j)
-    def test_convert_audio_resample(self):
-        b, c, dur = 2, 1, 4.
-        sr = 3
-        new_sr = 2
-        audio = get_batch_white_noise(b, c, int(sr * dur))
-        out = convert_audio(audio, from_rate=sr, to_rate=new_sr, to_channels=c)
-        out_j = julius.resample.resample_frac(audio, old_sr=sr, new_sr=new_sr)
-        assert torch.allclose(out, out_j)
-class TestNormalizeAudio:
-    def test_clip_wav(self):
-        b, c, dur = 2, 1, 4.
-        sr = 3
-        audio = 10.0 * get_batch_white_noise(b, c, int(sr * dur))
-        _clip_wav(audio)
-        assert audio.abs().max() <= 1
-    def test_normalize_audio_clip(self):
-        b, c, dur = 2, 1, 4.
-        sr = 3
-        audio = 10.0 * get_batch_white_noise(b, c, int(sr * dur))
-        norm_audio = normalize_audio(audio, strategy='clip')
-        assert norm_audio.abs().max() <= 1
-    def test_normalize_audio_rms(self):
-        b, c, dur = 2, 1, 4.
-        sr = 3
-        audio = 10.0 * get_batch_white_noise(b, c, int(sr * dur))
-        norm_audio = normalize_audio(audio, strategy='rms')
-        assert norm_audio.abs().max() <= 1
-    def test_normalize_audio_peak(self):
-        b, c, dur = 2, 1, 4.
-        sr = 3
-        audio = 10.0 * get_batch_white_noise(b, c, int(sr * dur))
-        norm_audio = normalize_audio(audio, strategy='peak')
-        assert norm_audio.abs().max() <= 1

tests/models/test_encodec_model.py DELETED Viewed

@@ -1,60 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-import random
-import numpy as np
-import torch
-from audiocraft.models import EncodecModel
-from audiocraft.modules import SEANetEncoder, SEANetDecoder
-from audiocraft.quantization import DummyQuantizer
-class TestEncodecModel:
-    def _create_encodec_model(self,
-                              sample_rate: int,
-                              channels: int,
-                              dim: int = 5,
-                              n_filters: int = 3,
-                              n_residual_layers: int = 1,
-                              ratios: list = [5, 4, 3, 2],
-                              **kwargs):
-        frame_rate = np.prod(ratios)
-        encoder = SEANetEncoder(channels=channels, dimension=dim, n_filters=n_filters,
-                                n_residual_layers=n_residual_layers, ratios=ratios)
-        decoder = SEANetDecoder(channels=channels, dimension=dim, n_filters=n_filters,
-                                n_residual_layers=n_residual_layers, ratios=ratios)
-        quantizer = DummyQuantizer()
-        model = EncodecModel(encoder, decoder, quantizer, frame_rate=frame_rate,
-                             sample_rate=sample_rate, channels=channels, **kwargs)
-        return model
-    def test_model(self):
-        random.seed(1234)
-        sample_rate = 24_000
-        channels = 1
-        model = self._create_encodec_model(sample_rate, channels)
-        for _ in range(10):
-            length = random.randrange(1, 10_000)
-            x = torch.randn(2, channels, length)
-            res = model(x)
-            assert res.x.shape == x.shape
-    def test_model_renorm(self):
-        random.seed(1234)
-        sample_rate = 24_000
-        channels = 1
-        model_nonorm = self._create_encodec_model(sample_rate, channels, renormalize=False)
-        model_renorm = self._create_encodec_model(sample_rate, channels, renormalize=True)
-        for _ in range(10):
-            length = random.randrange(1, 10_000)
-            x = torch.randn(2, channels, length)
-            codes, scales = model_nonorm.encode(x)
-            codes, scales = model_renorm.encode(x)
-            assert scales is not None

tests/models/test_musicgen.py DELETED Viewed

@@ -1,58 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-import pytest
-import torch
-from audiocraft.models import MusicGen
-class TestSEANetModel:
-    def get_musicgen(self):
-        mg = MusicGen.get_pretrained(name='debug', device='cpu')
-        mg.set_generation_params(duration=2.0, extend_stride=2.)
-        return mg
-    def test_base(self):
-        mg = self.get_musicgen()
-        assert mg.frame_rate == 25
-        assert mg.sample_rate == 32000
-        assert mg.audio_channels == 1
-    def test_generate_unconditional(self):
-        mg = self.get_musicgen()
-        wav = mg.generate_unconditional(3)
-        assert list(wav.shape) == [3, 1, 64000]
-    def test_generate_continuation(self):
-        mg = self.get_musicgen()
-        prompt = torch.randn(3, 1, 32000)
-        wav = mg.generate_continuation(prompt, 32000)
-        assert list(wav.shape) == [3, 1, 64000]
-        prompt = torch.randn(2, 1, 32000)
-        wav = mg.generate_continuation(
-            prompt, 32000, ['youpi', 'lapin dort'])
-        assert list(wav.shape) == [2, 1, 64000]
-        prompt = torch.randn(2, 1, 32000)
-        with pytest.raises(AssertionError):
-            wav = mg.generate_continuation(
-                prompt, 32000, ['youpi', 'lapin dort', 'one too many'])
-    def test_generate(self):
-        mg = self.get_musicgen()
-        wav = mg.generate(
-            ['youpi', 'lapin dort'])
-        assert list(wav.shape) == [2, 1, 64000]
-    def test_generate_long(self):
-        mg = self.get_musicgen()
-        mg.max_duration = 3.
-        mg.set_generation_params(duration=4., extend_stride=2.)
-        wav = mg.generate(
-            ['youpi', 'lapin dort'])
-        assert list(wav.shape) == [2, 1, 32000 * 4]

tests/modules/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.

tests/modules/test_codebooks_patterns.py DELETED Viewed

@@ -1,246 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-import pytest
-import torch
-from audiocraft.modules.codebooks_patterns import (
-    DelayedPatternProvider,
-    ParallelPatternProvider,
-    Pattern,
-    UnrolledPatternProvider,
-)
-class TestParallelPatternProvider:
-    @pytest.mark.parametrize("n_q", [1, 4, 32])
-    @pytest.mark.parametrize("timesteps", [0, 1, 16, 100])
-    def test_get_pattern(self, n_q: int, timesteps: int):
-        provider = ParallelPatternProvider(n_q)
-        pattern = provider.get_pattern(timesteps)
-        # + 1 to account for 1st step
-        assert len(pattern.layout) == timesteps + 1
-    @pytest.mark.parametrize("n_q", [1, 4, 32])
-    @pytest.mark.parametrize("timesteps", [8, 16, 100])
-    def test_pattern_content(self, n_q: int, timesteps: int):
-        provider = ParallelPatternProvider(n_q)
-        pattern = provider.get_pattern(timesteps)
-        for s, v in enumerate(pattern.layout):
-            for i, code in enumerate(v):
-                assert i == code.q
-                assert code.t == s - 1  # account for the 1st empty step
-    @pytest.mark.parametrize("n_q", [1, 4, 32])
-    @pytest.mark.parametrize("timesteps", [8, 16, 100])
-    def test_pattern_max_delay(self, n_q: int, timesteps: int):
-        provider = ParallelPatternProvider(n_q)
-        pattern = provider.get_pattern(timesteps)
-        assert pattern.max_delay == 0
-        assert len(pattern.valid_layout) == len(pattern.layout) - pattern.max_delay
-class TestDelayedPatternProvider:
-    @pytest.mark.parametrize("n_q", [1, 4, 32])
-    @pytest.mark.parametrize("timesteps", [0, 1, 16, 100])
-    def test_get_pattern(self, n_q: int, timesteps: int):
-        delays = [
-            list(range(n_q)),
-            [0] + [1] * (n_q - 1),
-            [0] + [4] * (n_q - 1),
-        ]
-        for delay in delays:
-            provider = DelayedPatternProvider(n_q, delay)
-            pattern = provider.get_pattern(timesteps)
-            # + 1 to account for 1st step
-            assert len(pattern.layout) == timesteps + max(delay) + 1
-    @pytest.mark.parametrize("n_q", [1, 4, 32])
-    @pytest.mark.parametrize("timesteps", [8, 16, 100])
-    def test_pattern_content(self, n_q: int, timesteps: int):
-        provider = DelayedPatternProvider(n_q)
-        pattern = provider.get_pattern(timesteps)
-        for s, v in enumerate(pattern.layout):
-            for i, code in enumerate(v):
-                assert i == code.q
-                assert code.t == max(0, s - code.q - 1)
-    @pytest.mark.parametrize("timesteps", [8, 16, 100])
-    @pytest.mark.parametrize("delay", [[0, 1, 2, 3], [0, 1, 1, 1], [0, 3, 3, 3], [0, 3]])
-    def test_pattern_max_delay(self, timesteps: int, delay: list):
-        provider = DelayedPatternProvider(len(delay), delay)
-        pattern = provider.get_pattern(timesteps)
-        assert pattern.max_delay == max(delay)
-        assert len(pattern.valid_layout) == len(pattern.layout) - pattern.max_delay
-class TestUnrolledPatternProvider:
-    @pytest.mark.parametrize("timesteps", [0, 1, 16])
-    @pytest.mark.parametrize("flattening", [[0, 1, 2], [0, 1, 1]])
-    @pytest.mark.parametrize("delays", [[0, 0, 0], [0, 5, 5]])
-    def test_get_pattern(self, timesteps: int, flattening: list, delays: list):
-        n_q = len(flattening)
-        max_delay = max(delays)
-        provider = UnrolledPatternProvider(n_q, flattening, delays)
-        pattern = provider.get_pattern(timesteps)
-        assert len(pattern.layout) == provider.num_virtual_steps(timesteps) + max_delay
-    @pytest.mark.parametrize("timesteps", [0, 1, 16])
-    @pytest.mark.parametrize("flattening", [[0, 1, 2], [0, 1, 1]])
-    @pytest.mark.parametrize("delays", [[0, 0, 0], [0, 5, 5]])
-    def test_pattern_max_delay(self, timesteps: int, flattening: list, delays: list):
-        n_q = len(flattening)
-        max_delay = max(delays)
-        provider = UnrolledPatternProvider(n_q, flattening, delays)
-        pattern = provider.get_pattern(timesteps)
-        assert pattern.max_delay == max_delay
-class TestPattern:
-    def ref_build_pattern_sequence(self, z: torch.Tensor, pattern: Pattern, special_token: int):
-        """Reference method to build the sequence from the pattern without using fancy scatter."""
-        bs, n_q, T = z.shape
-        z = z.cpu().numpy()
-        assert n_q == pattern.n_q
-        assert T <= pattern.timesteps
-        inp = torch.full((bs, n_q, len(pattern.layout)), special_token, dtype=torch.long).numpy()
-        inp[:] = special_token
-        for s, v in enumerate(pattern.layout):
-            for (t, q) in v:
-                if t < T:
-                    inp[:, q, s] = z[:, q, t]
-        return torch.from_numpy(inp)
-    def ref_revert_pattern_sequence(self, z: torch.Tensor, pattern: Pattern, special_token: int):
-        """Reference method to revert the sequence from the pattern without using fancy scatter."""
-        z = z.cpu().numpy()
-        bs, n_q, S = z.shape
-        assert pattern.n_q == n_q
-        inp = torch.full((bs, pattern.n_q, pattern.timesteps), special_token, dtype=torch.long).numpy()
-        inp[:] = special_token
-        for s, v in enumerate(pattern.layout):
-            for (t, q) in v:
-                if t < pattern.timesteps:
-                    inp[:, q, t] = z[:, q, s]
-        return torch.from_numpy(inp)
-    def ref_revert_pattern_logits(self, z: torch.Tensor, pattern: Pattern, special_token: float):
-        """Reference method to revert the logits from the pattern without using fancy scatter."""
-        z = z.cpu().numpy()
-        bs, card, n_q, S = z.shape
-        assert pattern.n_q == n_q
-        ref_layout = pattern.layout
-        inp = torch.full((bs, card, pattern.n_q, pattern.timesteps), special_token, dtype=torch.float).numpy()
-        inp[:] = special_token
-        for s, v in enumerate(ref_layout[1:]):
-            if s < S:
-                for (t, q) in v:
-                    if t < pattern.timesteps:
-                        inp[:, :, q, t] = z[:, :, q, s]
-        return torch.from_numpy(inp)
-    def _get_pattern_providers(self, n_q: int):
-        pattern_provider_1 = ParallelPatternProvider(n_q)
-        pattern_provider_2 = DelayedPatternProvider(n_q, list(range(n_q)))
-        pattern_provider_3 = DelayedPatternProvider(n_q, [0] + [1] * (n_q - 1))
-        pattern_provider_4 = UnrolledPatternProvider(
-            n_q, flattening=list(range(n_q)), delays=[0] * n_q
-        )
-        pattern_provider_5 = UnrolledPatternProvider(
-            n_q, flattening=[0] + [1] * (n_q - 1), delays=[0] * n_q
-        )
-        pattern_provider_6 = UnrolledPatternProvider(
-            n_q, flattening=[0] + [1] * (n_q - 1), delays=[0] + [5] * (n_q - 1)
-        )
-        return [
-            pattern_provider_1,
-            pattern_provider_2,
-            pattern_provider_3,
-            pattern_provider_4,
-            pattern_provider_5,
-            pattern_provider_6,
-        ]
-    @pytest.mark.parametrize("n_q", [1, 4, 32])
-    @pytest.mark.parametrize("timesteps", [16, 72])
-    def test_build_pattern_sequence(self, n_q: int, timesteps: int):
-        bs = 2
-        card = 256
-        special_token = card
-        pattern_providers = self._get_pattern_providers(n_q)
-        for pattern_provider in pattern_providers:
-            pattern = pattern_provider.get_pattern(timesteps)
-            # we can correctly build the sequence from the pattern
-            z = torch.randint(0, card, (bs, n_q, timesteps))
-            ref_res = self.ref_build_pattern_sequence(z, pattern, special_token)
-            res, indexes, mask = pattern.build_pattern_sequence(z, special_token)
-            assert (res == ref_res).float().mean() == 1.0
-            # expected assertion fails on the number of timesteps
-            invalid_timesteps = [timesteps + 1]
-            if pattern.num_sequence_steps != pattern.timesteps:
-                invalid_timesteps.append(pattern.num_sequence_steps)
-            for i_timesteps in invalid_timesteps:
-                z2 = torch.randint(0, card, (bs, n_q, i_timesteps))
-                with pytest.raises(AssertionError):
-                    pattern.build_pattern_sequence(z2, special_token)
-            # expected assertion fails on the number of codebooks
-            invalid_qs = [0, n_q - 1, n_q + 1]
-            for i_q in invalid_qs:
-                z3 = torch.randint(0, card, (bs, i_q, timesteps))
-                with pytest.raises(AssertionError):
-                    pattern.build_pattern_sequence(z3, special_token)
-    @pytest.mark.parametrize("n_q", [1, 4, 32])
-    @pytest.mark.parametrize("timesteps", [16, 72])
-    def test_revert_pattern_sequence(self, n_q: int, timesteps: int):
-        bs = 2
-        card = 256
-        special_token = card
-        pattern_providers = self._get_pattern_providers(n_q)
-        for pattern_provider in pattern_providers:
-            pattern = pattern_provider.get_pattern(timesteps)
-            # this works assuming previous tests are successful
-            z = torch.randint(0, card, (bs, n_q, timesteps))
-            s = self.ref_build_pattern_sequence(z, pattern, special_token)
-            ref_out = self.ref_revert_pattern_sequence(s, pattern, special_token)
-            # ensure our reference script retrieve the original sequence
-            assert z.shape == ref_out.shape
-            assert (z == ref_out).float().mean() == 1.0
-            # now we can test the scatter version
-            out, indexes, mask = pattern.revert_pattern_sequence(s, special_token)
-            assert out.shape == ref_out.shape
-            assert (out == ref_out).float().mean() == 1.0
-    @pytest.mark.parametrize("n_q", [1, 4, 32])
-    @pytest.mark.parametrize("timesteps", [16, 72])
-    @pytest.mark.parametrize("card", [1, 2, 256, 1024])
-    def test_revert_pattern_logits(self, n_q: int, timesteps: int, card: int):
-        bs = 2
-        special_token = card
-        logits_special_token = float('nan')
-        pattern_providers = self._get_pattern_providers(n_q)
-        for pattern_provider in pattern_providers:
-            pattern = pattern_provider.get_pattern(timesteps)
-            # this works assuming previous tests are successful
-            z = torch.randint(0, card, (bs, n_q, timesteps))
-            s = self.ref_build_pattern_sequence(z, pattern, special_token)
-            logits = torch.randn((bs, card, n_q, s.shape[-1]))
-            ref_out = self.ref_revert_pattern_logits(logits, pattern, logits_special_token)
-            # ensure our reference script retrieve the original sequence
-            assert ref_out.shape == torch.Size([bs, card, n_q, timesteps])
-            # now we can test the scatter version
-            out, indexes, mask = pattern.revert_pattern_logits(logits, logits_special_token)
-            assert out.shape == ref_out.shape
-            assert (out == ref_out).float().mean() == 1.0

tests/modules/test_conv.py DELETED Viewed

@@ -1,203 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from itertools import product
-import math
-import random
-import pytest
-import torch
-from torch import nn
-from audiocraft.modules import (
-    NormConv1d,
-    NormConvTranspose1d,
-    StreamableConv1d,
-    StreamableConvTranspose1d,
-    pad1d,
-    unpad1d,
-)
-def test_get_extra_padding_for_conv1d():
-    # TODO: Implement me!
-    pass
-def test_pad1d_zeros():
-    x = torch.randn(1, 1, 20)
-    xp1 = pad1d(x, (0, 5), mode='constant', value=0.)
-    assert xp1.shape[-1] == 25
-    xp2 = pad1d(x, (5, 5), mode='constant', value=0.)
-    assert xp2.shape[-1] == 30
-    xp3 = pad1d(x, (0, 0), mode='constant', value=0.)
-    assert xp3.shape[-1] == 20
-    xp4 = pad1d(x, (10, 30), mode='constant', value=0.)
-    assert xp4.shape[-1] == 60
-    with pytest.raises(AssertionError):
-        pad1d(x, (-1, 0), mode='constant', value=0.)
-    with pytest.raises(AssertionError):
-        pad1d(x, (0, -1), mode='constant', value=0.)
-    with pytest.raises(AssertionError):
-        pad1d(x, (-1, -1), mode='constant', value=0.)
-def test_pad1d_reflect():
-    x = torch.randn(1, 1, 20)
-    xp1 = pad1d(x, (0, 5), mode='reflect', value=0.)
-    assert xp1.shape[-1] == 25
-    xp2 = pad1d(x, (5, 5), mode='reflect', value=0.)
-    assert xp2.shape[-1] == 30
-    xp3 = pad1d(x, (0, 0), mode='reflect', value=0.)
-    assert xp3.shape[-1] == 20
-    xp4 = pad1d(x, (10, 30), mode='reflect', value=0.)
-    assert xp4.shape[-1] == 60
-    with pytest.raises(AssertionError):
-        pad1d(x, (-1, 0), mode='reflect', value=0.)
-    with pytest.raises(AssertionError):
-        pad1d(x, (0, -1), mode='reflect', value=0.)
-    with pytest.raises(AssertionError):
-        pad1d(x, (-1, -1), mode='reflect', value=0.)
-def test_unpad1d():
-    x = torch.randn(1, 1, 20)
-    u1 = unpad1d(x, (5, 5))
-    assert u1.shape[-1] == 10
-    u2 = unpad1d(x, (0, 5))
-    assert u2.shape[-1] == 15
-    u3 = unpad1d(x, (5, 0))
-    assert u3.shape[-1] == 15
-    u4 = unpad1d(x, (0, 0))
-    assert u4.shape[-1] == x.shape[-1]
-    with pytest.raises(AssertionError):
-        unpad1d(x, (-1, 0))
-    with pytest.raises(AssertionError):
-        unpad1d(x, (0, -1))
-    with pytest.raises(AssertionError):
-        unpad1d(x, (-1, -1))
-class TestNormConv1d:
-    def test_norm_conv1d_modules(self):
-        N, C, T = 2, 2, random.randrange(1, 100_000)
-        t0 = torch.randn(N, C, T)
-        C_out, kernel_size, stride = 1, 4, 1
-        expected_out_length = int((T - kernel_size) / stride + 1)
-        wn_conv = NormConv1d(C, 1, kernel_size=4, norm='weight_norm')
-        gn_conv = NormConv1d(C, 1, kernel_size=4, norm='time_group_norm')
-        nn_conv = NormConv1d(C, 1, kernel_size=4, norm='none')
-        assert isinstance(wn_conv.norm, nn.Identity)
-        assert isinstance(wn_conv.conv, nn.Conv1d)
-        assert isinstance(gn_conv.norm, nn.GroupNorm)
-        assert isinstance(gn_conv.conv, nn.Conv1d)
-        assert isinstance(nn_conv.norm, nn.Identity)
-        assert isinstance(nn_conv.conv, nn.Conv1d)
-        for conv_layer in [wn_conv, gn_conv, nn_conv]:
-            out = conv_layer(t0)
-            assert isinstance(out, torch.Tensor)
-            assert list(out.shape) == [N, C_out, expected_out_length]
-class TestNormConvTranspose1d:
-    def test_normalizations(self):
-        N, C, T = 2, 2, random.randrange(1, 100_000)
-        t0 = torch.randn(N, C, T)
-        C_out, kernel_size, stride = 1, 4, 1
-        expected_out_length = (T - 1) * stride + (kernel_size - 1) + 1
-        wn_convtr = NormConvTranspose1d(C, C_out, kernel_size=kernel_size, stride=stride, norm='weight_norm')
-        gn_convtr = NormConvTranspose1d(C, C_out, kernel_size=kernel_size, stride=stride, norm='time_group_norm')
-        nn_convtr = NormConvTranspose1d(C, C_out, kernel_size=kernel_size, stride=stride, norm='none')
-        assert isinstance(wn_convtr.norm, nn.Identity)
-        assert isinstance(wn_convtr.convtr, nn.ConvTranspose1d)
-        assert isinstance(gn_convtr.norm, nn.GroupNorm)
-        assert isinstance(gn_convtr.convtr, nn.ConvTranspose1d)
-        assert isinstance(nn_convtr.norm, nn.Identity)
-        assert isinstance(nn_convtr.convtr, nn.ConvTranspose1d)
-        for convtr_layer in [wn_convtr, gn_convtr, nn_convtr]:
-            out = convtr_layer(t0)
-            assert isinstance(out, torch.Tensor)
-            assert list(out.shape) == [N, C_out, expected_out_length]
-class TestStreamableConv1d:
-    def get_streamable_conv1d_output_length(self, length, kernel_size, stride, dilation):
-        # StreamableConv1d internally pads to make sure that the last window is full
-        padding_total = (kernel_size - 1) * dilation - (stride - 1)
-        n_frames = (length - kernel_size + padding_total) / stride + 1
-        ideal_length = (math.ceil(n_frames) - 1) * stride + (kernel_size - padding_total)
-        return ideal_length // stride
-    def test_streamable_conv1d(self):
-        N, C, T = 2, 2, random.randrange(1, 100_000)
-        t0 = torch.randn(N, C, T)
-        C_out = 1
-        # conv params are [(kernel_size, stride, dilation)]
-        conv_params = [(4, 1, 1), (4, 2, 1), (3, 1, 3), (10, 5, 1), (3, 2, 3)]
-        for causal, (kernel_size, stride, dilation) in product([False, True], conv_params):
-            expected_out_length = self.get_streamable_conv1d_output_length(T, kernel_size, stride, dilation)
-            sconv = StreamableConv1d(C, C_out, kernel_size=kernel_size, stride=stride, dilation=dilation, causal=causal)
-            out = sconv(t0)
-            assert isinstance(out, torch.Tensor)
-            print(list(out.shape), [N, C_out, expected_out_length])
-            assert list(out.shape) == [N, C_out, expected_out_length]
-class TestStreamableConvTranspose1d:
-    def get_streamable_convtr1d_output_length(self, length, kernel_size, stride):
-        padding_total = (kernel_size - stride)
-        return (length - 1) * stride - padding_total + (kernel_size - 1) + 1
-    def test_streamable_convtr1d(self):
-        N, C, T = 2, 2, random.randrange(1, 100_000)
-        t0 = torch.randn(N, C, T)
-        C_out = 1
-        with pytest.raises(AssertionError):
-            StreamableConvTranspose1d(C, C_out, kernel_size=4, causal=False, trim_right_ratio=0.5)
-            StreamableConvTranspose1d(C, C_out, kernel_size=4, causal=True, trim_right_ratio=-1.)
-            StreamableConvTranspose1d(C, C_out, kernel_size=4, causal=True, trim_right_ratio=2)
-        # causal params are [(causal, trim_right)]
-        causal_params = [(False, 1.0), (True, 1.0), (True, 0.5), (True, 0.0)]
-        # conv params are [(kernel_size, stride)]
-        conv_params = [(4, 1), (4, 2), (3, 1), (10, 5)]
-        for ((causal, trim_right_ratio), (kernel_size, stride)) in product(causal_params, conv_params):
-            expected_out_length = self.get_streamable_convtr1d_output_length(T, kernel_size, stride)
-            sconvtr = StreamableConvTranspose1d(C, C_out, kernel_size=kernel_size, stride=stride,
-                                                causal=causal, trim_right_ratio=trim_right_ratio)
-            out = sconvtr(t0)
-            assert isinstance(out, torch.Tensor)
-            assert list(out.shape) == [N, C_out, expected_out_length]

tests/modules/test_lstm.py DELETED Viewed

@@ -1,32 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-import random
-import torch
-from audiocraft.modules.lstm import StreamableLSTM
-class TestStreamableLSTM:
-    def test_lstm(self):
-        B, C, T = 4, 2, random.randint(1, 100)
-        lstm = StreamableLSTM(C, 3, skip=False)
-        x = torch.randn(B, C, T)
-        y = lstm(x)
-        print(y.shape)
-        assert y.shape == torch.Size([B, C, T])
-    def test_lstm_skip(self):
-        B, C, T = 4, 2, random.randint(1, 100)
-        lstm = StreamableLSTM(C, 3, skip=True)
-        x = torch.randn(B, C, T)
-        y = lstm(x)
-        assert y.shape == torch.Size([B, C, T])

tests/modules/test_rope.py DELETED Viewed

@@ -1,168 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-import torch
-from audiocraft.modules.rope import RotaryEmbedding
-from audiocraft.modules.transformer import StreamingTransformer, set_efficient_attention_backend
-def test_rope():
-    set_efficient_attention_backend('xformers')
-    B, T, H, C = 8, 75, 16, 128
-    rope = RotaryEmbedding(dim=C)
-    xq = torch.rand((B, T, H, C))
-    xk = torch.rand((B, T, H, C))
-    xq_out, xk_out = rope.rotate_qk(xq, xk, start=7)
-    assert list(xq_out.shape) == [B, T, H, C]
-    assert list(xk_out.shape) == [B, T, H, C]
-def test_rope_io_dtypes():
-    set_efficient_attention_backend('xformers')
-    B, T, H, C = 8, 75, 16, 128
-    rope_32 = RotaryEmbedding(dim=C, dtype=torch.float32)
-    rope_64 = RotaryEmbedding(dim=C, dtype=torch.float64)
-    # Test bfloat16 inputs w/ both 32 and 64 precision rope.
-    xq_16 = torch.rand((B, T, H, C)).to(torch.bfloat16)
-    xk_16 = torch.rand((B, T, H, C)).to(torch.bfloat16)
-    xq_out, xk_out = rope_32.rotate_qk(xq_16, xk_16)
-    assert xq_out.dtype == torch.bfloat16
-    xq_out, xk_out = rope_64.rotate_qk(xq_16, xk_16)
-    assert xq_out.dtype == torch.bfloat16
-    # Test float32 inputs w/ both 32 and 64 precision rope.
-    xq_32 = torch.rand((B, T, H, C)).to(torch.float32)
-    xk_32 = torch.rand((B, T, H, C)).to(torch.float32)
-    xq_out, xk_out = rope_32.rotate_qk(xq_32, xk_32)
-    assert xq_out.dtype == torch.float32
-    xq_out, xk_out = rope_64.rotate_qk(xq_32, xk_32)
-    assert xq_out.dtype == torch.float32
-def test_transformer_with_rope():
-    set_efficient_attention_backend('xformers')
-    torch.manual_seed(1234)
-    for pos in ['rope', 'sin_rope']:
-        tr = StreamingTransformer(
-            16, 4, 2, custom=True, dropout=0., layer_scale=0.1,
-            positional_embedding=pos)
-        tr.eval()
-        steps = 12
-        x = torch.randn(3, steps, 16)
-        out = tr(x)
-        assert list(out.shape) == list(x.shape)
-@torch.no_grad()
-def test_rope_streaming():
-    set_efficient_attention_backend('xformers')
-    torch.manual_seed(1234)
-    tr = StreamingTransformer(
-        16, 4, 2, causal=True, dropout=0.,
-        custom=True, positional_embedding='rope')
-    tr.eval()
-    steps = 12
-    x = torch.randn(3, steps, 16)
-    ref = tr(x)
-    with tr.streaming():
-        outs = []
-        frame_sizes = [1] * steps
-        for frame_size in frame_sizes:
-            frame = x[:, :frame_size]
-            x = x[:, frame_size:]
-            outs.append(tr(frame))
-    out = torch.cat(outs, dim=1)
-    assert list(out.shape) == [3, steps, 16]
-    delta = torch.norm(out - ref) / torch.norm(out)
-    assert delta < 1e-6, delta
-@torch.no_grad()
-def test_rope_streaming_past_context():
-    set_efficient_attention_backend('xformers')
-    torch.manual_seed(1234)
-    for context in [None, 10]:
-        tr = StreamingTransformer(
-            16, 4, 1 if context else 2,
-            causal=True, past_context=context, custom=True,
-            dropout=0., positional_embedding='rope')
-        tr.eval()
-        steps = 20
-        x = torch.randn(3, steps, 16)
-        ref = tr(x)
-        with tr.streaming():
-            outs = []
-            frame_sizes = [1] * steps
-            for frame_size in frame_sizes:
-                frame = x[:, :frame_size]
-                x = x[:, frame_size:]
-                outs.append(tr(frame))
-        out = torch.cat(outs, dim=1)
-        assert list(out.shape) == [3, steps, 16]
-        delta = torch.norm(out - ref) / torch.norm(out)
-        assert delta < 1e-6, delta
-def test_rope_memory_efficient():
-    set_efficient_attention_backend('xformers')
-    torch.manual_seed(1234)
-    tr = StreamingTransformer(
-        16, 4, 2, custom=True, dropout=0., layer_scale=0.1,
-        positional_embedding='rope')
-    tr_mem_efficient = StreamingTransformer(
-        16, 4, 2, dropout=0., memory_efficient=True, layer_scale=0.1,
-        positional_embedding='rope')
-    tr_mem_efficient.load_state_dict(tr.state_dict())
-    tr.eval()
-    steps = 12
-    x = torch.randn(3, steps, 16)
-    with torch.no_grad():
-        y = tr(x)
-        y2 = tr_mem_efficient(x)
-        # Check at float precision b/c this is the rope default.
-        assert torch.allclose(y, y2, atol=1e-7), (y - y2).norm()
-def test_rope_with_xpos():
-    set_efficient_attention_backend('xformers')
-    B, T, H, C = 8, 75, 16, 128
-    rope = RotaryEmbedding(dim=C, xpos=True)
-    xq = torch.rand((B, T, H, C))
-    xk = torch.rand((B, T, H, C))
-    xq_out, xk_out = rope.rotate_qk(xq, xk, start=7)
-    assert list(xq_out.shape) == [B, T, H, C]
-    assert list(xk_out.shape) == [B, T, H, C]
-def test_positional_scale():
-    set_efficient_attention_backend('xformers')
-    B, T, H, C = 8, 75, 16, 128
-    rope = RotaryEmbedding(dim=C, xpos=True, scale=0.0)
-    xq = torch.rand((B, T, H, C))
-    xk = torch.rand((B, T, H, C))
-    xq_out, xk_out = rope.rotate_qk(xq, xk, start=7)
-    assert torch.allclose(xq, xq_out)
-    assert torch.allclose(xk, xk_out)

tests/modules/test_seanet.py DELETED Viewed

@@ -1,115 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from itertools import product
-import pytest
-import torch
-from audiocraft.modules.seanet import SEANetEncoder, SEANetDecoder, SEANetResnetBlock
-from audiocraft.modules import StreamableConv1d, StreamableConvTranspose1d
-class TestSEANetModel:
-    def test_base(self):
-        encoder = SEANetEncoder()
-        decoder = SEANetDecoder()
-        x = torch.randn(1, 1, 24000)
-        z = encoder(x)
-        assert list(z.shape) == [1, 128, 75], z.shape
-        y = decoder(z)
-        assert y.shape == x.shape, (x.shape, y.shape)
-    def test_causal(self):
-        encoder = SEANetEncoder(causal=True)
-        decoder = SEANetDecoder(causal=True)
-        x = torch.randn(1, 1, 24000)
-        z = encoder(x)
-        assert list(z.shape) == [1, 128, 75], z.shape
-        y = decoder(z)
-        assert y.shape == x.shape, (x.shape, y.shape)
-    def test_conv_skip_connection(self):
-        encoder = SEANetEncoder(true_skip=False)
-        decoder = SEANetDecoder(true_skip=False)
-        x = torch.randn(1, 1, 24000)
-        z = encoder(x)
-        assert list(z.shape) == [1, 128, 75], z.shape
-        y = decoder(z)
-        assert y.shape == x.shape, (x.shape, y.shape)
-    def test_seanet_encoder_decoder_final_act(self):
-        encoder = SEANetEncoder(true_skip=False)
-        decoder = SEANetDecoder(true_skip=False, final_activation='Tanh')
-        x = torch.randn(1, 1, 24000)
-        z = encoder(x)
-        assert list(z.shape) == [1, 128, 75], z.shape
-        y = decoder(z)
-        assert y.shape == x.shape, (x.shape, y.shape)
-    def _check_encoder_blocks_norm(self, encoder: SEANetEncoder, n_disable_blocks: int, norm: str):
-        n_blocks = 0
-        for layer in encoder.model:
-            if isinstance(layer, StreamableConv1d):
-                n_blocks += 1
-                assert layer.conv.norm_type == 'none' if n_blocks <= n_disable_blocks else norm
-            elif isinstance(layer, SEANetResnetBlock):
-                for resnet_layer in layer.block:
-                    if isinstance(resnet_layer, StreamableConv1d):
-                        # here we add + 1 to n_blocks as we increment n_blocks just after the block
-                        assert resnet_layer.conv.norm_type == 'none' if (n_blocks + 1) <= n_disable_blocks else norm
-    def test_encoder_disable_norm(self):
-        n_residuals = [0, 1, 3]
-        disable_blocks = [0, 1, 2, 3, 4, 5, 6]
-        norms = ['weight_norm', 'none']
-        for n_res, disable_blocks, norm in product(n_residuals, disable_blocks, norms):
-            encoder = SEANetEncoder(n_residual_layers=n_res, norm=norm,
-                                    disable_norm_outer_blocks=disable_blocks)
-            self._check_encoder_blocks_norm(encoder, disable_blocks, norm)
-    def _check_decoder_blocks_norm(self, decoder: SEANetDecoder, n_disable_blocks: int, norm: str):
-        n_blocks = 0
-        for layer in decoder.model:
-            if isinstance(layer, StreamableConv1d):
-                n_blocks += 1
-                assert layer.conv.norm_type == 'none' if (decoder.n_blocks - n_blocks) < n_disable_blocks else norm
-            elif isinstance(layer, StreamableConvTranspose1d):
-                n_blocks += 1
-                assert layer.convtr.norm_type == 'none' if (decoder.n_blocks - n_blocks) < n_disable_blocks else norm
-            elif isinstance(layer, SEANetResnetBlock):
-                for resnet_layer in layer.block:
-                    if isinstance(resnet_layer, StreamableConv1d):
-                        assert resnet_layer.conv.norm_type == 'none' \
-                            if (decoder.n_blocks - n_blocks) < n_disable_blocks else norm
-    def test_decoder_disable_norm(self):
-        n_residuals = [0, 1, 3]
-        disable_blocks = [0, 1, 2, 3, 4, 5, 6]
-        norms = ['weight_norm', 'none']
-        for n_res, disable_blocks, norm in product(n_residuals, disable_blocks, norms):
-            decoder = SEANetDecoder(n_residual_layers=n_res, norm=norm,
-                                    disable_norm_outer_blocks=disable_blocks)
-            self._check_decoder_blocks_norm(decoder, disable_blocks, norm)
-    def test_disable_norm_raises_exception(self):
-        # Invalid disable_norm_outer_blocks values raise exceptions
-        with pytest.raises(AssertionError):
-            SEANetEncoder(disable_norm_outer_blocks=-1)
-        with pytest.raises(AssertionError):
-            SEANetEncoder(ratios=[1, 1, 2, 2], disable_norm_outer_blocks=7)
-        with pytest.raises(AssertionError):
-            SEANetDecoder(disable_norm_outer_blocks=-1)
-        with pytest.raises(AssertionError):
-            SEANetDecoder(ratios=[1, 1, 2, 2], disable_norm_outer_blocks=7)

tests/modules/test_transformer.py DELETED Viewed

@@ -1,253 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-from itertools import product
-import pytest
-import torch
-from audiocraft.modules.transformer import (
-    StreamingMultiheadAttention, StreamingTransformer, set_efficient_attention_backend)
-def test_transformer_causal_streaming():
-    torch.manual_seed(1234)
-    for context, custom in product([None, 10], [False, True]):
-        # Test that causality and receptive fields are properly handled.
-        # looking at the gradients
-        tr = StreamingTransformer(
-            16, 4, 1 if context else 2,
-            causal=True, past_context=context, custom=custom,
-            dropout=0.)
-        steps = 20
-        for k in [0, 10, 15, 19]:
-            x = torch.randn(4, steps, 16, requires_grad=True)
-            y = tr(x)
-            y[:, k].abs().sum().backward()
-            if k + 1 < steps:
-                assert torch.allclose(x.grad[:, k + 1:], torch.tensor(0.)), x.grad[:, k + 1:].norm()
-            assert not torch.allclose(x.grad[:, :k + 1], torch.tensor(0.)), x.grad[:, :k + 1].norm()
-            if context is not None and k > context:
-                limit = k - context - 1
-                assert torch.allclose(x.grad[:, :limit],
-                                      torch.tensor(0.)), x.grad[:, :limit].norm()
-        # Now check that streaming gives the same result at batch eval.
-        x = torch.randn(4, steps, 16)
-        y = tr(x)
-        ys = []
-        with tr.streaming():
-            for k in range(steps):
-                chunk = x[:, k:k + 1, :]
-                ys.append(tr(chunk))
-        y_stream = torch.cat(ys, dim=1)
-        delta = torch.norm(y_stream - y) / torch.norm(y)
-        assert delta < 1e-6, delta
-def test_transformer_vs_pytorch():
-    torch.manual_seed(1234)
-    # Check that in the non causal setting, we get the same result as
-    # PyTorch Transformer encoder.
-    for custom in [False, True]:
-        tr = StreamingTransformer(
-            16, 4, 2,
-            causal=False, custom=custom, dropout=0., positional_scale=0.)
-        layer = torch.nn.TransformerEncoderLayer(16, 4, dropout=0., batch_first=True)
-        tr_ref = torch.nn.TransformerEncoder(layer, 2)
-        tr.load_state_dict(tr_ref.state_dict())
-        x = torch.randn(4, 20, 16)
-        y = tr(x)
-        y2 = tr_ref(x)
-        delta = torch.norm(y2 - y) / torch.norm(y)
-        assert delta < 1e-6, delta
-def test_streaming_api():
-    tr = StreamingTransformer(16, 4, 2, causal=True, dropout=0.)
-    tr.eval()
-    steps = 12
-    x = torch.randn(1, steps, 16)
-    with torch.no_grad():
-        with tr.streaming():
-            _ = tr(x[:, :1])
-            state = {k: v.clone() for k, v in tr.get_streaming_state().items()}
-            y = tr(x[:, 1:2])
-            tr.set_streaming_state(state)
-            y2 = tr(x[:, 1:2])
-            assert torch.allclose(y, y2), (y - y2).norm()
-            assert tr.flush() is None
-def test_memory_efficient():
-    for backend in ['torch', 'xformers']:
-        torch.manual_seed(1234)
-        set_efficient_attention_backend(backend)
-        tr = StreamingTransformer(
-            16, 4, 2, custom=True, dropout=0., layer_scale=0.1)
-        tr_mem_efficient = StreamingTransformer(
-            16, 4, 2, dropout=0., memory_efficient=True, layer_scale=0.1)
-        tr_mem_efficient.load_state_dict(tr.state_dict())
-        tr.eval()
-        steps = 12
-        x = torch.randn(3, steps, 16)
-        with torch.no_grad():
-            y = tr(x)
-            y2 = tr_mem_efficient(x)
-            assert torch.allclose(y, y2), ((y - y2).norm(), backend)
-def test_attention_as_float32():
-    torch.manual_seed(1234)
-    cases = [
-        {'custom': True},
-        {'custom': False},
-    ]
-    for case in cases:
-        tr = StreamingTransformer(16, 4, 2, dropout=0., dtype=torch.bfloat16, **case)
-        tr_float32 = StreamingTransformer(
-            16, 4, 2, dropout=0., attention_as_float32=True, dtype=torch.bfloat16, **case)
-        if not case['custom']:
-            # we are not using autocast here because it doesn't really
-            # work as expected on CPU, so we have to manually cast the weights of the MHA.
-            for layer in tr_float32.layers:
-                layer.self_attn.mha.to(torch.float32)
-        tr_float32.load_state_dict(tr.state_dict())
-        steps = 12
-        x = torch.randn(3, steps, 16, dtype=torch.bfloat16)
-        with torch.no_grad():
-            y = tr(x)
-            y2 = tr_float32(x)
-            assert not torch.allclose(y, y2), (y - y2).norm()
-@torch.no_grad()
-def test_streaming_memory_efficient():
-    for backend in ['torch', 'xformers']:
-        torch.manual_seed(1234)
-        set_efficient_attention_backend(backend)
-        tr = StreamingTransformer(16, 4, 2, causal=True, dropout=0., custom=True)
-        tr_mem_efficient = StreamingTransformer(
-            16, 4, 2, dropout=0., memory_efficient=True, causal=True)
-        tr.load_state_dict(tr_mem_efficient.state_dict())
-        tr.eval()
-        tr_mem_efficient.eval()
-        steps = 12
-        x = torch.randn(3, steps, 16)
-        ref = tr(x)
-        with tr_mem_efficient.streaming():
-            outs = []
-            # frame_sizes = [2] + [1] * (steps - 2)
-            frame_sizes = [1] * steps
-            for frame_size in frame_sizes:
-                frame = x[:, :frame_size]
-                x = x[:, frame_size:]
-                outs.append(tr_mem_efficient(frame))
-        out = torch.cat(outs, dim=1)
-        delta = torch.norm(out - ref) / torch.norm(out)
-        assert delta < 1e-6, delta
-def test_cross_attention():
-    torch.manual_seed(1234)
-    for norm_first in [True, False]:
-        m = StreamingTransformer(
-            16, 4, 2, cross_attention=False, norm_first=norm_first, dropout=0., custom=True)
-        m_cross = StreamingTransformer(
-            16, 4, 2, cross_attention=True, norm_first=norm_first, dropout=0., custom=True)
-        m_cross.load_state_dict(m.state_dict(), strict=False)
-        x = torch.randn(2, 5, 16)
-        cross_x = torch.randn(2, 3, 16)
-        y_ref = m(x)
-        y_cross_zero = m_cross(x, cross_attention_src=0 * cross_x)
-        # With norm_first, the two should be exactly yhe same,
-        # but with norm_first=False, we get 2 normalization in a row
-        # and the epsilon value leads to a tiny change.
-        atol = 0. if norm_first else 1e-6
-        print((y_ref - y_cross_zero).norm() / y_ref.norm())
-        assert torch.allclose(y_ref, y_cross_zero, atol=atol)
-        # We now expect a difference even with a generous atol of 1e-2.
-        y_cross = m_cross(x, cross_attention_src=cross_x)
-        assert not torch.allclose(y_cross, y_cross_zero, atol=1e-2)
-        with pytest.raises(AssertionError):
-            _ = m_cross(x)
-            _ = m(x, cross_attention_src=cross_x)
-def test_cross_attention_compat():
-    torch.manual_seed(1234)
-    num_heads = 2
-    dim = num_heads * 64
-    with pytest.raises(AssertionError):
-        StreamingMultiheadAttention(dim, num_heads, causal=True, cross_attention=True)
-    cross_attn = StreamingMultiheadAttention(
-        dim, num_heads, dropout=0, cross_attention=True, custom=True)
-    ref_attn = torch.nn.MultiheadAttention(dim, num_heads, dropout=0, batch_first=True)
-    # We can load the regular attention state dict
-    # so we have compat when loading old checkpoints.
-    cross_attn.load_state_dict(ref_attn.state_dict())
-    queries = torch.randn(3, 7, dim)
-    keys = torch.randn(3, 9, dim)
-    values = torch.randn(3, 9, dim)
-    y = cross_attn(queries, keys, values)[0]
-    y_ref = ref_attn(queries, keys, values)[0]
-    assert torch.allclose(y, y_ref, atol=1e-7), (y - y_ref).norm() / y_ref.norm()
-    # Now let's check that streaming is working properly.
-    with cross_attn.streaming():
-        ys = []
-        for step in range(queries.shape[1]):
-            ys.append(cross_attn(queries[:, step: step + 1], keys, values)[0])
-    y_streaming = torch.cat(ys, dim=1)
-    assert torch.allclose(y_streaming, y, atol=1e-7)
-def test_repeat_kv():
-    torch.manual_seed(1234)
-    num_heads = 8
-    kv_repeat = 4
-    dim = num_heads * 64
-    with pytest.raises(AssertionError):
-        mha = StreamingMultiheadAttention(
-            dim, num_heads, causal=True, kv_repeat=kv_repeat, cross_attention=True)
-        mha = StreamingMultiheadAttention(
-            dim, num_heads, causal=True, kv_repeat=kv_repeat)
-    mha = StreamingMultiheadAttention(
-        dim, num_heads, causal=True, kv_repeat=kv_repeat, custom=True)
-    x = torch.randn(4, 18, dim)
-    y = mha(x, x, x)[0]
-    assert x.shape == y.shape
-def test_qk_layer_norm():
-    torch.manual_seed(1234)
-    tr = StreamingTransformer(
-        16, 4, 2, custom=True, dropout=0., qk_layer_norm=True, bias_attn=False)
-    steps = 12
-    x = torch.randn(3, steps, 16)
-    y = tr(x)
-    tr = StreamingTransformer(
-        16, 4, 2, custom=True, dropout=0., qk_layer_norm=True, cross_attention=True)
-    z = torch.randn(3, 21, 16)
-    y = tr(x, cross_attention_src=z)
-    assert y.shape == x.shape

tests/quantization/test_vq.py DELETED Viewed

@@ -1,18 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.
-import torch
-from audiocraft.quantization.vq import ResidualVectorQuantizer
-class TestResidualVectorQuantizer:
-    def test_rvq(self):
-        x = torch.randn(1, 16, 2048)
-        vq = ResidualVectorQuantizer(n_q=8, dimension=16, bins=8)
-        res = vq(x, 1.)
-        assert res.x.shape == torch.Size([1, 16, 2048])

tests/utils/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the
-# LICENSE file in the root directory of this source tree.