Gertie01
/

MusicLM

Gertie01 commited on Feb 3, 2023

Commit

e77217f

•

1 Parent(s): 13ddd49

Update musiclm_pytorch.py

Files changed (1) hide show

musiclm_pytorch.py CHANGED Viewed

@@ -5,6 +5,7 @@ from torch import nn, einsum
 from torchaudio.transforms import Spectrogram, TimeStretch, FrequencyMasking, TimeMasking
 from audiolm_pytorch import AudioLM
 from x_clip.tokenizer import tokenizer
 from vector_quantize_pytorch import ResidualVQ
@@ -448,7 +449,7 @@ class MuLaN(nn.Module):
 # music lm
 @beartype
-class MuLaNEmbedQuantizer(nn.Module):
     def __init__(
         self,
         mulan: MuLaN,
@@ -494,6 +495,9 @@ class MuLaNEmbedQuantizer(nn.Module):
         self.set_default_namespace(namespaces[0])
     def set_default_namespace(self, namespace):
         self._default_namespace = namespace
@@ -537,6 +541,8 @@ class MusicLM(nn.Module):
         mulan_embed_quantizer: MuLaNEmbedQuantizer
     ):
         super().__init__()
         self.mulan_embed_quantizer = mulan_embed_quantizer
         self.audio_lm = audio_lm
@@ -549,7 +555,7 @@ class MusicLM(nn.Module):
         self.eval()
         texts = tokenizer.tokenize(raw_texts)
-        cond_tokens = self.mulan_embed_quantizer(texts = texts)
-        wavs = self.audio_lm.generate(cond_tokens = cond_tokens, **audio_lm_kwargs)
-        return wavs

 from torchaudio.transforms import Spectrogram, TimeStretch, FrequencyMasking, TimeMasking
 from audiolm_pytorch import AudioLM
+from audiolm_pytorch.utils import AudioConditionerBase
 from x_clip.tokenizer import tokenizer
 from vector_quantize_pytorch import ResidualVQ
 # music lm
 @beartype
+class MuLaNEmbedQuantizer(AudioConditionerBase):
     def __init__(
         self,
         mulan: MuLaN,
         self.set_default_namespace(namespaces[0])
+    def parameters(self):
+        return self.cond_embeddings.parameters()
     def set_default_namespace(self, namespace):
         self._default_namespace = namespace
         mulan_embed_quantizer: MuLaNEmbedQuantizer
     ):
         super().__init__()
+        assert not exists(audio_lm.audio_conditioner), 'mulan must not have been passed into AudioLM. it will be managed externally now, embedding the text into the joint embedding space for text-to-audio synthesis'
         self.mulan_embed_quantizer = mulan_embed_quantizer
         self.audio_lm = audio_lm
         self.eval()
         texts = tokenizer.tokenize(raw_texts)
+        text_embeds = self.mulan_embed_quantizer(texts = texts)
+        return self.audio_lm(text_embeds = text_embeds, **audio_lm_kwargs)