Spaces:

asigalov61
/

Melody2Song-Seq2Seq-Music-Transformer

Running on Zero

App Files Files Community

Melody2Song-Seq2Seq-Music-Transformer / app.py

asigalov61

Update app.py

0e99ce0 verified 27 days ago

raw

history blame

No virus

12 kB

	import os.path

	import time as reqtime
	import datetime
	from pytz import timezone

	import torch

	import spaces
	import gradio as gr

	from x_transformer_1_23_2 import *
	import random
	import tqdm

	from midi_to_colab_audio import midi_to_colab_audio
	import TMIDIX

	import matplotlib.pyplot as plt

	in_space = os.getenv("SYSTEM") == "spaces"

	# =================================================================================================

	@spaces.GPU
	def GenerateAccompaniment(input_midi, input_num_tokens, input_conditioning_type, input_strip_notes):
	print('=' * 70)
	print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
	start_time = reqtime.time()

	print('Loading model...')

	SEQ_LEN = 8192 # Models seq len
	PAD_IDX = 707 # Models pad index
	DEVICE = 'cuda' # 'cuda'

	# instantiate the model

	model = TransformerWrapper(
	num_tokens = PAD_IDX+1,
	max_seq_len = SEQ_LEN,
	attn_layers = Decoder(dim = 2048, depth = 4, heads = 16, attn_flash = True)
	)

	model = AutoregressiveWrapper(model, ignore_index = PAD_IDX)

	model.to(DEVICE)
	print('=' * 70)

	print('Loading model checkpoint...')

	model.load_state_dict(
	torch.load('Chords_Progressions_Transformer_Small_2048_Trained_Model_12947_steps_0.9316_loss_0.7386_acc.pth',
	map_location=DEVICE))
	print('=' * 70)

	model.eval()

	if DEVICE == 'cpu':
	dtype = torch.bfloat16
	else:
	dtype = torch.float16

	ctx = torch.amp.autocast(device_type=DEVICE, dtype=dtype)

	print('Done!')
	print('=' * 70)

	fn = os.path.basename(input_midi.name)
	fn1 = fn.split('.')[0]

	input_num_tokens = max(4, min(128, input_num_tokens))

	print('-' * 70)
	print('Input file name:', fn)
	print('Req num toks:', input_num_tokens)
	print('Conditioning type:', input_conditioning_type)
	print('Strip notes:', input_strip_notes)
	print('-' * 70)

	#===============================================================================
	raw_score = TMIDIX.midi2single_track_ms_score(input_midi.name)

	#===============================================================================
	# Enhanced score notes

	escore_notes = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True)[0]

	no_drums_escore_notes = [e for e in escore_notes if e[6] < 80]

	if len(no_drums_escore_notes) > 0:

	#=======================================================
	# PRE-PROCESSING

	#===============================================================================
	# Augmented enhanced score notes

	no_drums_escore_notes = TMIDIX.augment_enhanced_score_notes(no_drums_escore_notes)

	cscore = TMIDIX.chordify_score([1000, no_drums_escore_notes])

	clean_cscore = []

	for c in cscore:
	pitches = []
	cho = []
	for cc in c:
	if cc[4] not in pitches:
	cho.append(cc)
	pitches.append(cc[4])

	clean_cscore.append(cho)

	#=======================================================
	# FINAL PROCESSING

	melody_chords = []
	chords = []
	times = [0]
	durs = []

	#=======================================================
	# MAIN PROCESSING CYCLE
	#=======================================================

	pe = clean_cscore[0][0]

	first_chord = True

	for c in clean_cscore:

	# Chords

	c.sort(key=lambda x: x[4], reverse=True)

	tones_chord = sorted(set([cc[4] % 12 for cc in c]))

	try:
	chord_token = TMIDIX.ALL_CHORDS_SORTED.index(tones_chord)
	except:
	checked_tones_chord = TMIDIX.check_and_fix_tones_chord(tones_chord)
	chord_token = TMIDIX.ALL_CHORDS_SORTED.index(checked_tones_chord)

	melody_chords.extend([chord_token+384])

	if input_strip_notes:
	if len(tones_chord) > 1:
	chords.extend([chord_token+384])

	else:
	chords.extend([chord_token+384])

	if first_chord:
	melody_chords.extend([0])
	first_chord = False

	for e in c:

	#=======================================================
	# Timings...

	time = e[1]-pe[1]

	dur = e[2]

	if time != 0 and time % 2 != 0:
	time += 1
	if dur % 2 != 0:
	dur += 1

	delta_time = int(max(0, min(255, time)) / 2)

	# Durations

	dur = int(max(0, min(255, dur)) / 2)

	# Pitches

	ptc = max(1, min(127, e[4]))

	#=======================================================
	# FINAL NOTE SEQ

	# Writing final note asynchronously

	if delta_time != 0:
	melody_chords.extend([delta_time, dur+128, ptc+256])
	if input_strip_notes:
	if len(c) > 1:
	times.append(delta_time)
	durs.append(dur+128)
	else:
	times.append(delta_time)
	durs.append(dur+128)
	else:
	melody_chords.extend([dur+128, ptc+256])

	pe = e

	#==================================================================

	print('=' * 70)

	print('Sample output events', melody_chords[:5])
	print('=' * 70)
	print('Generating...')

	output = []

	max_chords_limit = 8
	temperature=0.9
	num_memory_tokens=4096

	output = []

	idx = 0

	for c in chords[:input_num_tokens]:

	output.append(c)

	if input_conditioning_type == 'Chords-Times' or input_conditioning_type == 'Chords-Times-Durations':
	output.append(times[idx])

	if input_conditioning_type == 'Chords-Times-Durations':
	output.append(durs[idx])

	x = torch.tensor([output] * 1, dtype=torch.long, device='cuda')

	o = 0

	ncount = 0

	while o < 384 and ncount < max_chords_limit:
	with ctx:
	out = model.generate(x[-num_memory_tokens:],
	1,
	temperature=temperature,
	return_prime=False,
	verbose=False)

	o = out.tolist()[0][0]

	if 256 <= o < 384:
	ncount += 1

	if o < 384:
	x = torch.cat((x, out), 1)

	outy = x.tolist()[0][len(output):]

	output.extend(outy)

	idx += 1

	if idx == len(chords[:input_num_tokens])-1:
	break

	print('=' * 70)
	print('Done!')
	print('=' * 70)

	#===============================================================================
	print('Rendering results...')

	print('=' * 70)
	print('Sample INTs', output[:12])
	print('=' * 70)

	out1 = output

	if len(out1) != 0:

	song = out1
	song_f = []

	time = 0
	dur = 0
	vel = 90
	pitch = 0
	channel = 0

	patches = [0] * 16

	channel = 0

	for ss in song:

	if 0 <= ss < 128:

	time += ss * 32

	if 128 <= ss < 256:

	dur = (ss-128) * 32

	if 256 <= ss < 384:

	pitch = (ss-256)

	vel = max(40, pitch)

	song_f.append(['note', time, dur, channel, pitch, vel, 0])

	fn1 = "Chords-Progressions-Transformer-Composition"

	detailed_stats = TMIDIX.Tegridy_ms_SONG_to_MIDI_Converter(song_f,
	output_signature = 'Chords Progressions Transformer',
	output_file_name = fn1,
	track_name='Project Los Angeles',
	list_of_MIDI_patches=patches
	)

	new_fn = fn1+'.mid'


	audio = midi_to_colab_audio(new_fn,
	soundfont_path=soundfont,
	sample_rate=16000,
	volume_scale=10,
	output_for_gradio=True
	)

	print('Done!')
	print('=' * 70)

	#========================================================

	output_midi_title = str(fn1)
	output_midi_summary = str(song_f[:3])
	output_midi = str(new_fn)
	output_audio = (16000, audio)

	output_plot = TMIDIX.plot_ms_SONG(song_f, plot_title=output_midi, return_plt=True)

	print('Output MIDI file name:', output_midi)
	print('Output MIDI title:', output_midi_title)
	print('Output MIDI summary:', '')
	print('=' * 70)


	#========================================================

	print('-' * 70)
	print('Req end time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
	print('-' * 70)
	print('Req execution time:', (reqtime.time() - start_time), 'sec')

	return output_midi_title, output_midi_summary, output_midi, output_audio, output_plot

	# =================================================================================================

	if __name__ == "__main__":

	PDT = timezone('US/Pacific')

	print('=' * 70)
	print('App start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
	print('=' * 70)

	soundfont = "SGM-v2.01-YamahaGrand-Guit-Bass-v2.7.sf2"

	app = gr.Blocks()
	with app:
	gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Melody2Song Seq2Seq Music Transformer</h1>")
	gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Generate unique songs from melodies with se2seq music transformer</h1>")
	gr.Markdown(
	"![Visitors](https://api.visitorbadge.io/api/visitors?path=asigalov61.Melody2Song-Seq2Seq-Music-Transformer&style=flat)\n\n")

	input_midi = gr.File(label="Input MIDI", file_types=[".midi", ".mid", ".kar"])
	input_num_tokens = gr.Slider(4, 128, value=32, step=1, label="Number of composition chords to generate progression for")
	input_conditioning_type = gr.Radio(["Chords", "Chords-Times", "Chords-Times-Durations"], label="Conditioning type")
	input_strip_notes = gr.Checkbox(label="Strip notes from the composition")

	run_btn = gr.Button("generate", variant="primary")

	gr.Markdown("## Generation results")

	output_midi_title = gr.Textbox(label="Output MIDI title")
	output_midi_summary = gr.Textbox(label="Output MIDI summary")
	output_audio = gr.Audio(label="Output MIDI audio", format="wav", elem_id="midi_audio")
	output_plot = gr.Plot(label="Output MIDI score plot")
	output_midi = gr.File(label="Output MIDI file", file_types=[".mid"])


	run_event = run_btn.click(GenerateAccompaniment, [input_midi, input_num_tokens, input_conditioning_type, input_strip_notes],
	[output_midi_title, output_midi_summary, output_midi, output_audio, output_plot])

	app.queue().launch()