import argparse import os import json import time import math import torch import os from matplotlib import pyplot as plt import generation_config import constants from model import VAE from utils import set_seed from utils import mtp_from_logits, muspy_from_mtp, set_seed from utils import print_divider from utils import loop_muspy_music, save_midi, save_audio from plots import plot_pianoroll, plot_structure def generate_music(vae, z, s_cond=None, s_tensor_cond=None): # Decoder pass to get structure and content logits s_logits, c_logits = vae.decoder(z, s_cond) if s_tensor_cond is not None: s_tensor = s_tensor_cond else: # Compute binary structure tensor from logits s_tensor = vae.decoder._binary_from_logits(s_logits) # Build (n_batches x n_bars x n_tracks x n_timesteps x Sigma x d_token) # multitrack pianoroll tensor containing logits for each activation and # hard silences elsewhere mtp = mtp_from_logits(c_logits, s_tensor) return mtp, s_tensor def save(mtp, dir, s_tensor=None, n_loops=1, audio=True, z=None, looped_only=False, plot_proll=False, plot_struct=False): n_bars = mtp.size(1) resolution = mtp.size(3) // 4 # Clear matplotlib cache (this solves formatting problems with first plot) plt.clf() # Iterate over batches for i in range(mtp.size(0)): # Create the directory if it does not exist save_dir = os.path.join(dir, str(i)) os.makedirs(save_dir, exist_ok=True) if not looped_only: # Generate MIDI song from multitrack pianoroll and save muspy_song = muspy_from_mtp(mtp[i]) print("Saving MIDI sequence {} in {}...".format(str(i + 1), save_dir)) save_midi(muspy_song, save_dir, name='generated') if audio: print("Saving audio sequence {} in {}...".format(str(i + 1), save_dir)) save_audio(muspy_song, save_dir, name='generated') if plot_proll: plot_pianoroll(muspy_song, save_dir) if plot_struct: plot_structure(s_tensor[i].cpu(), save_dir) if n_loops > 1: # Copy the generated sequence n_loops times and save the looped # MIDI and audio files print("Saving MIDI sequence " "{} looped {} times in {}...".format(str(i + 1), n_loops, save_dir)) extended = loop_muspy_music(muspy_song, n_loops, n_bars, resolution) save_midi(extended, save_dir, name='extended') if audio: print("Saving audio sequence " "{} looped {} times in {}...".format(str(i + 1), n_loops, save_dir)) save_audio(extended, save_dir, name='extended') # Save structure with open(os.path.join(save_dir, 'structure.json'), 'wb') as file: file.write(json.dumps(s_tensor[i].tolist()).encode('utf-8')) # Save z if z[i] is not None: torch.save(z[i], os.path.join(save_dir, 'z')) print() def generate_z(bs, d_model, device): shape = (bs, d_model) z_norm = torch.normal( torch.zeros(shape, device=device), torch.ones(shape, device=device) ) return z_norm def load_model(model_dir, device): checkpoint = torch.load(os.path.join(model_dir, 'checkpoint'), map_location='cpu') configuration = torch.load(os.path.join(model_dir, 'configuration'), map_location='cpu') state_dict = checkpoint['model_state_dict'] model = VAE(**configuration['model'], device=device).to(device) model.load_state_dict(state_dict) model.eval() return model, configuration def main(): parser = argparse.ArgumentParser( description='Generates MIDI music with a trained model.' ) parser.add_argument( 'model_dir', type=str, help='Directory of the model.' ) parser.add_argument( 'output_dir', type=str, help='Directory to save the generated MIDI files.' ) parser.add_argument( '--n', type=int, default=5, help='Number of sequences to be generated. Default is 5.' ) parser.add_argument( '--n_loops', type=int, default=1, help="If greater than 1, outputs an additional MIDI file containing " "the sequence looped n_loops times." ) parser.add_argument( '--no_audio', action='store_true', default=False, help="Flag to disable audio files generation." ) parser.add_argument( '--s_file', type=str, help='Path to the JSON file containing the binary structure tensor.' ) parser.add_argument( '--z_file', type=str, help='' ) parser.add_argument( '--z_change', action='store_true', default=False, help='' ) parser.add_argument( '--use_gpu', action='store_true', default=False, help='Flag to enable GPU usage.' ) parser.add_argument( '--gpu_id', type=int, default='0', help='Index of the GPU to be used. Default is 0.' ) parser.add_argument( '--seed', type=int ) args = parser.parse_args() if args.seed is not None: set_seed(args.seed) audio = not args.no_audio device = torch.device("cuda") if args.use_gpu else torch.device("cpu") if args.use_gpu: torch.cuda.set_device(args.gpu_id) print_divider() print("Loading the model on {} device...".format(device)) model, configuration = load_model(args.model_dir, device) d_model = configuration['model']['d'] n_bars = configuration['model']['n_bars'] n_tracks = constants.N_TRACKS n_timesteps = 4 * configuration['model']['resolution'] output_dir = args.output_dir s, s_tensor = None, None if args.s_file is not None: print("Loading the structure tensor " "from {}...".format(args.model_dir)) # Load structure tensor from file with open(args.s_file, 'r') as f: s_tensor = json.load(f) s_tensor = torch.tensor(s_tensor, dtype=bool) # Check structure dimensions dims = list(s_tensor.size()) expected = [n_bars, n_tracks, n_timesteps] if dims != expected: if (len(dims) != len(expected) or dims[1:] != expected[1:] or dims[0] > n_bars): raise ValueError(f"Loaded structure tensor dimensions {dims} " f"do not match expected dimensions {expected}") elif dims[0] > n_bars: raise ValueError(f"First structure tensor dimension {dims[0]} " f"is higher than {n_bars}") else: # Repeat partial structure tensor r = math.ceil(n_bars / dims[0]) s_tensor = s_tensor.repeat(r, 1, 1) s_tensor = s_tensor[:n_bars, ...] # Avoid empty bars by creating a fake activation for each empty # (n_tracks x n_timesteps) bar matrix in position [0, 0] empty_mask = ~s_tensor.any(dim=-1).any(dim=-1) if empty_mask.any(): print("The provided structure tensor contains empty bars. Fake " "track activations will be created to avoid processing " "empty bars.") idxs = torch.nonzero(empty_mask, as_tuple=True) s_tensor[idxs + (0, 0)] = True # Repeat structure along new batch dimension s_tensor = s_tensor.unsqueeze(0).repeat(args.n, 1, 1, 1) s = model.decoder._structure_from_binary(s_tensor) print() if args.z_file is not None: print("Loading z...") z = torch.load(args.z_file) z = z.unsqueeze(0) if args.z_change: #e = 0.5 e = 0.5 z = z + e*(torch.rand(list(z.size())) - 0.5) else: print("Generating z...") z = generate_z(args.n, d_model, device) print("Generating music with the model...") s_t = time.time() mtp, s_tensor = generate_music(model, z, s, s_tensor) print("Inference time: {:.3f} s".format(time.time() - s_t)) print() print("Saving MIDI files in {}...\n".format(output_dir)) save(mtp, output_dir, s_tensor, args.n_loops, audio, z) print("Finished saving MIDI files.") print_divider() if __name__ == '__main__': main()