<a href="https://colab.research.google.com/github/teticio/audio-diffusion/blob/master/notebooks/conditional_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
try:
    # are we running on Google Colab?
    import google.colab
    !git clone -q https://github.com/teticio/audio-diffusion.git
    %cd audio-diffusion
    %pip install -q -r requirements.txt
except:
    pass

In [None]:
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath("")))

In [None]:
import torch
import urllib
import requests
from IPython.display import Audio
from audiodiffusion import AudioDiffusion
from audiodiffusion.audio_encoder import AudioEncoder

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
generator = torch.Generator(device=device)

In [None]:
audio_diffusion = AudioDiffusion(model_id="teticio/conditional-latent-audio-diffusion-512")

In [None]:
audio_encoder = AudioEncoder.from_pretrained("teticio/audio-encoder")

In [None]:
# Uncomment for faster (but slightly lower quality) generation
#from diffusers import DDIMScheduler
#audio_diffusion.pipe.scheduler = DDIMScheduler()

## Download and encode preview track from Spotify

In [None]:
# Get temporary API credentials
credentials = requests.get(
    "https://open.spotify.com/get_access_token?reason=transport&productType=embed"
).json()
headers = {
    "Accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": "Bearer " + credentials["accessToken"]
}

# Search for tracks
search_string = input("Search: ")
response = requests.get(
    f"https://api.spotify.com/v1/search?q={urllib.parse.quote(search_string)}&type=track",
    headers=headers).json()

# List results
for _, track in enumerate(response["tracks"]["items"]):
    print(f"{_ + 1}. {track['artists'][0]['name']} - {track['name']}")
selection = input("Select a track: ")

# Download and encode selection
r = requests.get(response["tracks"]["items"][int(selection) -
                                             1]["preview_url"],
                 stream=True)
with open("temp.mp3", "wb") as f:
    for chunk in r:
        f.write(chunk)
encoding = torch.unsqueeze(audio_encoder.encode(["temp.mp3"]),
                           axis=1).to(device)
os.remove("temp.mp3")

## Conditional Generation
Bear in mind that the generative model can only generate music similar to that on which it was trained. The audio encoding will influence the generation within those limitations.

In [None]:
for _ in range(10):
    seed = generator.seed()
    print(f'Seed = {seed}')
    generator.manual_seed(seed)
    image, (sample_rate,
            audio) = audio_diffusion.generate_spectrogram_and_audio(
                generator=generator, encoding=encoding)
    display(image)
    display(Audio(audio, rate=sample_rate))
    loop = AudioDiffusion.loop_it(audio, sample_rate)
    if loop is not None:
        display(Audio(loop, rate=sample_rate))
    else:
        print("Unable to determine loop points")