Spaces:

vovahimself
/

jukwi-vqvae

Runtime error

File size: 3,274 Bytes

# A simple gradio app that converts music tokens to and from audio using JukeboxVQVAE as the model and Gradio as the UI

from transformers import JukeboxVQVAE

import gradio as gr
import torch as t

model_id = 'openai/jukebox-5b-lyrics' #@param ['openai/jukebox-1b-lyrics', 'openai/jukebox-5b-lyrics']

if 'google.colab' in sys.modules:

  cache_path = '/content/drive/My Drive/jukebox-webui/_data/' #@param {type:"string"}
  # Connect to your Google Drive
  from google.colab import drive
  drive.mount('/content/drive')

else:

  cache_path = '~/.cache/'

class Convert:

  class TokenList:

    def to_tokens_file(tokens_list):
      # temporary random file name
      filename = f"tmp/{t.randint(0, 1000000)}.jt"
      t.save(validate_tokens_list(tokens_list), filename)
      return filename
    
    def to_audio(tokens_list):
      return model.decode(validate_tokens_list(tokens_list)[2:], start_level=2).squeeze(-1)
      # TODO: Implement converting other levels besides 2

  class TokensFile:

    def to_tokens_list(file):
      return validate_tokens_list(t.load(file))
    
    def to_audio(file):
      return Convert.TokenList.to_audio(Convert.TokensFile.to_tokens_list(file))
  
  class Audio:

    def to_tokens_list(audio):
      return model.encode(audio.unsqueeze(0), start_level=2)
      # (TODO: Generated by copilot, check if it works)

    def to_tokens_file(audio):
      return Convert.TokenList.to_tokens_file(Convert.Audio.to_tokens_list(audio))

def init():
  global model

  try:
    model
    print("Model already initialized")
  except NameError:
    model = JukeboxVQVAE.from_pretrained(
      model_id,
      torch_dtype = t.float16,
      cache_dir = f"{cache_path}/jukebox/models"
    )

def validate_tokens_list(tokens_list):
  # Make sure that:
  # - tokens_list is a list of exactly 3 torch tensors
  assert len(tokens_list) == 3, "Invalid file format: expecting a list of 3 tensors"

  # - each has the same number of dimensions
  assert len(tokens_list[0].shape) == len(tokens_list[1].shape) == len(tokens_list[2].shape), "Invalid file format: each tensor in the list should have the same number of dimensions"

  # - the shape along dimension 0 is the same
  assert tokens_list[0].shape[0] == tokens_list[1].shape[0] == tokens_list[2].shape[0], "Invalid file format: the shape along dimension 0 should be the same for all tensors in the list"

  # - the shape along dimension 1 increases (or stays the same) as we go from 0 to 2
  assert tokens_list[0].shape[1] >= tokens_list[1].shape[1] >= tokens_list[2].shape[1], "Invalid file format: the shape along dimension 1 should decrease (or stay the same) as we go from 0 to 2"
  
  return tokens_list


with gr.Blocks() as ui:

  # File input to upload or download the music tokens file
  tokens = gr.File(label='music_tokens_file')

  # Audio output to play or upload the generated audio
  audio = gr.Audio(label='audio')
  
  # Buttons to convert from music tokens to audio (primary) and vice versa (secondary)
  gr.Button("Convert tokens to audio", variant='primary').click(Convert.TokensFile.to_audio, tokens, audio)
  gr.Button("Convert audio to tokens", variant='secondary').click(Convert.Audio.to_tokens_file, audio, tokens)
  
if __name__ == '__main__':
  init()
  ui.launch()