Imagen-POP-Music-Medley-Diffusion-Transformer

Sleeping

App Files Files Community

asigalov61 commited on Sep 2

Commit

ef7a679

•

1 Parent(s): 6f6e0e1

Delete Melody2Song_Seq2Seq_Music_Transformer.ipynb

Browse files

Files changed (1) hide show

Melody2Song_Seq2Seq_Music_Transformer.ipynb +0 -523

Melody2Song_Seq2Seq_Music_Transformer.ipynb DELETED Viewed

@@ -1,523 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "VGrGd6__l5ch"
-      },
-      "source": [
-        "# Melody2Song Seq2Seq Music Transformer (ver. 1.0)\n",
-        "\n",
-        "***\n",
-        "\n",
-        "Powered by tegridy-tools: https://github.com/asigalov61/tegridy-tools\n",
-        "\n",
-        "***\n",
-        "\n",
-        "WARNING: This complete implementation is a functioning model of the Artificial Intelligence. Please excercise great humility, care, and respect. https://www.nscai.gov/\n",
-        "\n",
-        "***\n",
-        "\n",
-        "#### Project Los Angeles\n",
-        "\n",
-        "#### Tegridy Code 2024\n",
-        "\n",
-        "***"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "shLrgoXdl5cj"
-      },
-      "source": [
-        "# (GPU CHECK)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "X3rABEpKCO02",
-        "cellView": "form"
-      },
-      "outputs": [],
-      "source": [
-        "# @title NVIDIA GPU Check\n",
-        "!nvidia-smi"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "0RcVC4btl5ck"
-      },
-      "source": [
-        "# (SETUP ENVIRONMENT)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "viHgEaNACPTs",
-        "cellView": "form"
-      },
-      "outputs": [],
-      "source": [
-        "# @title Install requirements\n",
-        "!git clone --depth 1 https://github.com/asigalov61/tegridy-tools\n",
-        "!pip install einops\n",
-        "!pip install torch-summary\n",
-        "!apt install fluidsynth"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "DzCOZU_gBiQV",
-        "cellView": "form"
-      },
-      "outputs": [],
-      "source": [
-        "# @title Load all needed modules\n",
-        "\n",
-        "print('=' * 70)\n",
-        "print('Loading needed modules...')\n",
-        "print('=' * 70)\n",
-        "\n",
-        "import os\n",
-        "import pickle\n",
-        "import random\n",
-        "import secrets\n",
-        "import tqdm\n",
-        "import math\n",
-        "import torch\n",
-        "\n",
-        "import matplotlib.pyplot as plt\n",
-        "\n",
-        "from torchsummary import summary\n",
-        "\n",
-        "%cd /content/tegridy-tools/tegridy-tools/\n",
-        "\n",
-        "import TMIDIX\n",
-        "from midi_to_colab_audio import midi_to_colab_audio\n",
-        "\n",
-        "%cd /content/tegridy-tools/tegridy-tools/X-Transformer\n",
-        "\n",
-        "from x_transformer_1_23_2 import *\n",
-        "\n",
-        "%cd /content/\n",
-        "\n",
-        "import random\n",
-        "\n",
-        "from sklearn import metrics\n",
-        "\n",
-        "from IPython.display import Audio, display\n",
-        "\n",
-        "from huggingface_hub import hf_hub_download\n",
-        "\n",
-        "from google.colab import files\n",
-        "\n",
-        "print('=' * 70)\n",
-        "print('Done')\n",
-        "print('=' * 70)\n",
-        "print('Torch version:', torch.__version__)\n",
-        "print('=' * 70)\n",
-        "print('Enjoy! :)')\n",
-        "print('=' * 70)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# (SETUP DATA AND MODEL)"
-      ],
-      "metadata": {
-        "id": "SQ1_7P4bLdtB"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "#@title Load Melody2Song Seq2Seq Music Trnasofmer Data and Pre-Trained Model\n",
-        "\n",
-        "#@markdown Model precision option\n",
-        "\n",
-        "model_precision = \"bfloat16\" # @param [\"bfloat16\", \"float16\"]\n",
-        "\n",
-        "plot_tokens_embeddings = True # @param {type:\"boolean\"}\n",
-        "\n",
-        "print('=' * 70)\n",
-        "print('Donwloading Melody2Song Seq2Seq Music Transformer Data File...')\n",
-        "print('=' * 70)\n",
-        "\n",
-        "data_path = '/content'\n",
-        "\n",
-        "if os.path.isfile(data_path+'/Melody2Song_Seq2Seq_Music_Transformer_Seed_Melodies_Data.pickle'):\n",
-        "  print('Data file already exists...')\n",
-        "\n",
-        "else:\n",
-        "  hf_hub_download(repo_id='asigalov61/Melody2Song-Seq2Seq-Music-Transformer',\n",
-        "                  repo_type='space',\n",
-        "                  filename='Melody2Song_Seq2Seq_Music_Transformer_Seed_Melodies_Data.pickle',\n",
-        "                  local_dir=data_path,\n",
-        "                  )\n",
-        "\n",
-        "print('=' * 70)\n",
-        "seed_melodies_data = TMIDIX.Tegridy_Any_Pickle_File_Reader('Melody2Song_Seq2Seq_Music_Transformer_Seed_Melodies_Data')\n",
-        "\n",
-        "print('=' * 70)\n",
-        "print('Loading Melody2Song Seq2Seq Music Transformer Pre-Trained Model...')\n",
-        "print('Please wait...')\n",
-        "print('=' * 70)\n",
-        "\n",
-        "full_path_to_models_dir = \"/content\"\n",
-        "\n",
-        "model_checkpoint_file_name = 'Melody2Song_Seq2Seq_Music_Transformer_Trained_Model_28482_steps_0.719_loss_0.7865_acc.pth'\n",
-        "model_path = full_path_to_models_dir+'/'+model_checkpoint_file_name\n",
-        "num_layers = 24\n",
-        "if os.path.isfile(model_path):\n",
-        "  print('Model already exists...')\n",
-        "\n",
-        "else:\n",
-        "  hf_hub_download(repo_id='asigalov61/Melody2Song-Seq2Seq-Music-Transformer',\n",
-        "                  repo_type='space',\n",
-        "                  filename=model_checkpoint_file_name,\n",
-        "                  local_dir=full_path_to_models_dir,\n",
-        "                  )\n",
-        "\n",
-        "\n",
-        "print('=' * 70)\n",
-        "print('Instantiating model...')\n",
-        "\n",
-        "torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul\n",
-        "torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn\n",
-        "device_type = 'cuda'\n",
-        "\n",
-        "if model_precision == 'bfloat16' and torch.cuda.is_bf16_supported():\n",
-        "  dtype = 'bfloat16'\n",
-        "else:\n",
-        "  dtype = 'float16'\n",
-        "\n",
-        "if model_precision == 'float16':\n",
-        "  dtype = 'float16'\n",
-        "\n",
-        "ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]\n",
-        "ctx = torch.amp.autocast(device_type=device_type, dtype=ptdtype)\n",
-        "\n",
-        "SEQ_LEN = 2560\n",
-        "PAD_IDX = 514\n",
-        "\n",
-        "# instantiate the model\n",
-        "\n",
-        "model = TransformerWrapper(\n",
-        "    num_tokens = PAD_IDX+1,\n",
-        "    max_seq_len = SEQ_LEN,\n",
-        "    attn_layers = Decoder(dim = 1024, depth = num_layers, heads = 16, attn_flash = True)\n",
-        ")\n",
-        "\n",
-        "model = AutoregressiveWrapper(model, ignore_index=PAD_IDX, pad_value=PAD_IDX)\n",
-        "\n",
-        "model.cuda()\n",
-        "print('=' * 70)\n",
-        "\n",
-        "print('Loading model checkpoint...')\n",
-        "\n",
-        "model.load_state_dict(torch.load(model_path))\n",
-        "print('=' * 70)\n",
-        "\n",
-        "model.eval()\n",
-        "\n",
-        "print('Done!')\n",
-        "print('=' * 70)\n",
-        "\n",
-        "print('Model will use', dtype, 'precision...')\n",
-        "print('=' * 70)\n",
-        "\n",
-        "# Model stats\n",
-        "print('Model summary...')\n",
-        "summary(model)\n",
-        "\n",
-        "if plot_tokens_embeddings:\n",
-        "\n",
-        "  tok_emb = model.net.token_emb.emb.weight.detach().cpu().tolist()\n",
-        "\n",
-        "  cos_sim = metrics.pairwise_distances(\n",
-        "    tok_emb, metric='cosine'\n",
-        "  )\n",
-        "  plt.figure(figsize=(7, 7))\n",
-        "  plt.imshow(cos_sim, cmap=\"inferno\", interpolation=\"nearest\")\n",
-        "  im_ratio = cos_sim.shape[0] / cos_sim.shape[1]\n",
-        "  plt.colorbar(fraction=0.046 * im_ratio, pad=0.04)\n",
-        "  plt.xlabel(\"Position\")\n",
-        "  plt.ylabel(\"Position\")\n",
-        "  plt.tight_layout()\n",
-        "  plt.plot()\n",
-        "  plt.savefig(\"/content/Melody2Song-Seq2Seq-Music-Transformer-Tokens-Embeddings-Plot.png\", bbox_inches=\"tight\")"
-      ],
-      "metadata": {
-        "cellView": "form",
-        "id": "z7QLJ6FajxPA"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "# (LOAD SEED MELODY)"
-      ],
-      "metadata": {
-        "id": "NdJ1_A8gNoV3"
-      }
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "AIvb6MmSO9R3",
-        "cellView": "form"
-      },
-      "outputs": [],
-      "source": [
-        "# @title Load desired seed melody\n",
-        "\n",
-        "#@markdown NOTE: If custom MIDI file is not provided, sample seed melody will be used instead\n",
-        "\n",
-        "full_path_to_custom_seed_melody_MIDI_file = \"/content/tegridy-tools/tegridy-tools/seed-melody.mid\" # @param {type:\"string\"}\n",
-        "sample_seed_melody_number = 0 # @param {type:\"slider\", min:0, max:203664, step:1}\n",
-        "\n",
-        "print('=' * 70)\n",
-        "print('Loading seed melody...')\n",
-        "print('=' * 70)\n",
-        "\n",
-        "if full_path_to_custom_seed_melody_MIDI_file != '':\n",
-        "\n",
-        "  #===============================================================================\n",
-        "  # Raw single-track ms score\n",
-        "\n",
-        "  raw_score = TMIDIX.midi2single_track_ms_score(full_path_to_custom_seed_melody_MIDI_file)\n",
-        "\n",
-        "  #===============================================================================\n",
-        "  # Enhanced score notes\n",
-        "\n",
-        "  escore_notes = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True)[0]\n",
-        "\n",
-        "  #===============================================================================\n",
-        "  # Augmented enhanced score notes\n",
-        "\n",
-        "  escore_notes = TMIDIX.recalculate_score_timings(TMIDIX.augment_enhanced_score_notes(escore_notes, timings_divider=32))\n",
-        "\n",
-        "  cscore = TMIDIX.chordify_score([1000, escore_notes])\n",
-        "\n",
-        "  fixed_mel_score = TMIDIX.fix_monophonic_score_durations([c[0] for c in cscore])\n",
-        "\n",
-        "  melody = []\n",
-        "\n",
-        "  pe = fixed_mel_score[0]\n",
-        "\n",
-        "  for s in fixed_mel_score:\n",
-        "\n",
-        "      dtime = max(0, min(127, s[1]-pe[1]))\n",
-        "      dur = max(1, min(127, s[2]))\n",
-        "      ptc = max(1, min(127, s[4]))\n",
-        "\n",
-        "      chan = 1\n",
-        "\n",
-        "      melody.extend([dtime, dur+128, (128 * chan)+ptc+256])\n",
-        "\n",
-        "      pe = s\n",
-        "\n",
-        "  if len(melody) >= 192:\n",
-        "    melody = [512] + melody[:192] + [513]\n",
-        "\n",
-        "  else:\n",
-        "    mult = math.ceil(192 / len(melody))\n",
-        "    melody = melody * mult\n",
-        "    melody = [512] + melody[:192] + [513]\n",
-        "\n",
-        "  print('Loaded custom MIDI melody:', full_path_to_custom_seed_melody_MIDI_file)\n",
-        "  print('=' * 70)\n",
-        "\n",
-        "else:\n",
-        "  melody = seed_melodies_data[sample_seed_melody_number]\n",
-        "  print('Loaded sample seed melody #', sample_seed_melody_number)\n",
-        "  print('=' * 70)\n",
-        "\n",
-        "print('Sample melody INTs:', melody[:10])\n",
-        "print('=' * 70)\n",
-        "print('Done!')\n",
-        "print('=' * 70)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "feXay_Ed7mG5"
-      },
-      "source": [
-        "# (GENERATE)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "naf65RxUXwDg",
-        "cellView": "form"
-      },
-      "outputs": [],
-      "source": [
-        "# @title Generate song from melody\n",
-        "\n",
-        "melody_MIDI_patch_number = 40 # @param {type:\"slider\", min:0, max:127, step:1}\n",
-        "accompaniment_MIDI_patch_number = 0 # @param {type:\"slider\", min:0, max:127, step:1}\n",
-        "number_of_tokens_to_generate = 900 # @param {type:\"slider\", min:15, max:2354, step:3}\n",
-        "number_of_batches_to_generate = 4 # @param {type:\"slider\", min:1, max:16, step:1}\n",
-        "top_k_value = 25 # @param {type:\"slider\", min:1, max:50, step:1}\n",
-        "temperature = 0.9 # @param {type:\"slider\", min:0.1, max:1, step:0.05}\n",
-        "render_MIDI_to_audio = True # @param {type:\"boolean\"}\n",
-        "\n",
-        "print('=' * 70)\n",
-        "print('Melody2Song Seq1Seq Music Transformer Model Generator')\n",
-        "print('=' * 70)\n",
-        "\n",
-        "print('Generating...')\n",
-        "print('=' * 70)\n",
-        "\n",
-        "model.eval()\n",
-        "\n",
-        "torch.cuda.empty_cache()\n",
-        "\n",
-        "x = (torch.tensor([melody] * number_of_batches_to_generate, dtype=torch.long, device='cuda'))\n",
-        "\n",
-        "with ctx:\n",
-        "  out = model.generate(x,\n",
-        "                      number_of_tokens_to_generate,\n",
-        "                      filter_logits_fn=top_k,\n",
-        "                      filter_kwargs={'k': top_k_value},\n",
-        "                      temperature=0.9,\n",
-        "                      return_prime=False,\n",
-        "                      verbose=True)\n",
-        "\n",
-        "output = out.tolist()\n",
-        "\n",
-        "print('=' * 70)\n",
-        "print('Done!')\n",
-        "print('=' * 70)\n",
-        "\n",
-        "#======================================================================\n",
-        "print('Rendering results...')\n",
-        "\n",
-        "for i in range(number_of_batches_to_generate):\n",
-        "\n",
-        "  print('=' * 70)\n",
-        "  print('Batch #', i)\n",
-        "  print('=' * 70)\n",
-        "\n",
-        "  out1 = output[i]\n",
-        "\n",
-        "  print('Sample INTs', out1[:12])\n",
-        "  print('=' * 70)\n",
-        "\n",
-        "  if len(out1) != 0:\n",
-        "\n",
-        "      song = out1\n",
-        "      song_f = []\n",
-        "\n",
-        "      time = 0\n",
-        "      dur = 0\n",
-        "      vel = 90\n",
-        "      pitch = 0\n",
-        "      channel = 0\n",
-        "\n",
-        "      patches = [0] * 16\n",
-        "      patches[0] = accompaniment_MIDI_patch_number\n",
-        "      patches[3] = melody_MIDI_patch_number\n",
-        "\n",
-        "      for ss in song:\n",
-        "\n",
-        "          if 0 < ss < 128:\n",
-        "\n",
-        "              time += (ss * 32)\n",
-        "\n",
-        "          if 128 < ss < 256:\n",
-        "\n",
-        "              dur = (ss-128) * 32\n",
-        "\n",
-        "          if 256 < ss < 512:\n",
-        "\n",
-        "              pitch = (ss-256) % 128\n",
-        "\n",
-        "              channel = (ss-256) // 128\n",
-        "\n",
-        "              if channel == 1:\n",
-        "                  channel = 3\n",
-        "                  vel = 110 + (pitch % 12)\n",
-        "                  song_f.append(['note', time, dur, channel, pitch, vel, melody_MIDI_patch_number])\n",
-        "\n",
-        "              else:\n",
-        "                  vel = 80 + (pitch % 12)\n",
-        "                  channel = 0\n",
-        "                  song_f.append(['note', time, dur, channel, pitch, vel, accompaniment_MIDI_patch_number])\n",
-        "\n",
-        "      detailed_stats = TMIDIX.Tegridy_ms_SONG_to_MIDI_Converter(song_f,\n",
-        "                                                                output_signature = 'Melody2Song Seq2Seq Music Transformer',\n",
-        "                                                                output_file_name = '/content/Melody2Song-Seq2Seq-Music-Transformer-Composition_'+str(i),\n",
-        "                                                                track_name='Project Los Angeles',\n",
-        "                                                                list_of_MIDI_patches=patches\n",
-        "                                                                )\n",
-        "      print('=' * 70)\n",
-        "      print('Displaying resulting composition...')\n",
-        "      print('=' * 70)\n",
-        "\n",
-        "      fname = '/content/Melody2Song-Seq2Seq-Music-Transformer-Composition_'+str(i)\n",
-        "\n",
-        "      if render_MIDI_to_audio:\n",
-        "        midi_audio = midi_to_colab_audio(fname + '.mid')\n",
-        "        display(Audio(midi_audio, rate=16000, normalize=False))\n",
-        "\n",
-        "      TMIDIX.plot_ms_SONG(song_f, plot_title=fname)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "z87TlDTVl5cp"
-      },
-      "source": [
-        "# Congrats! You did it! :)"
-      ]
-    }
-  ],
-  "metadata": {
-    "accelerator": "GPU",
-    "colab": {
-      "gpuClass": "premium",
-      "gpuType": "L4",
-      "private_outputs": true,
-      "provenance": [],
-      "machine_shape": "hm"
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.10.12"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}