|
|
|
from transformers import AutoTokenizer, AutoModelForTextToWaveform |
|
import torch |
|
import scipy.io.wavfile as wavfile |
|
import numpy as np |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("facebook/musicgen-small") |
|
model = AutoModelForTextToWaveform.from_pretrained("facebook/musicgen-small") |
|
|
|
def generate_music(prompt, duration_s=10): |
|
|
|
inputs = tokenizer(prompt, return_tensors="pt") |
|
|
|
|
|
audio_values = model.generate(**inputs, max_new_tokens=int(duration_s * model.config.sample_rate / model.config.hop_length)) |
|
|
|
|
|
audio_np = audio_values[0].cpu().numpy() |
|
audio_np = (audio_np * 32767).astype(np.int16) |
|
|
|
return audio_np |
|
|
|
|
|
prompt = "A catchy electronic beat with a groovy bassline" |
|
generated_audio = generate_music(prompt) |
|
|
|
|
|
wavfile.write("generated_music.wav", model.config.sample_rate, generated_audio) |
|
|
|
print(f"Music generated and saved as 'generated_music.wav'") |