Spaces:
Running
Running
import os | |
# from mido import MidiFile | |
import mido | |
import music21 | |
import numpy as np | |
import pandas as pd | |
from music21 import * | |
from mido import Message, MidiFile, MidiTrack | |
#number of notes to be used for prediction | |
window = 3 | |
#num of notes to generate | |
#TODO: change this to accept values according to user | |
num_notes = 100 | |
#midi ticks per quarter note, indicates tempo of track | |
quarter_note_ticks = 480 | |
#accepted note durations: ranges from 16th note to whole dotted notes | |
accepeted_lengths = [0.25,0.375,0.5,0.75,1,1.5,2.0,3.0,4.0] | |
#Finds all absolute paths in directory | |
#https://stackoverflow.com/questions/9816816/get-absolute-paths-of-all-files-in-a-directory | |
def abs_paths(dir): | |
for dir_path,_,filenames in os.walk(dir): | |
for f in filenames: | |
yield os.path.abspath(os.path.join(dir_path, f)) | |
def pitch_to_int(nameWithOctave): | |
# letter names with corresponding values | |
letter_dict = {'C':0,'D':2,'E':4,'F':5,'G':7,'A':9,'B':11} | |
# parse characters from strings | |
chars = list(nameWithOctave) | |
# convert octave number to corresponding midi value | |
octave = 12*(int(chars[-1])+1) | |
# select value from letter_dict using first character | |
note = letter_dict[chars[0]] | |
# set accidental value | |
accidental = 0 | |
# does accidental exist? | |
if not len(chars)==2: | |
# increase (sharp) or decrease (flat) value by one | |
accidental = 1 if chars[1]=='#' else -1 | |
# return sum of these numbers, middle C(4) == 60 | |
return octave + note + accidental | |
def generate_notes(): | |
df_notes = pd.read_csv('prepared.csv') | |
print(df_notes.shape) | |
# define arrays for generated notes and durations | |
gen_notes = [] | |
gen_durations = [] | |
# define note and duration feature columns based on names | |
features = df_notes.columns[:-2] | |
note_features = [s for s in features if "note" in s] | |
duration_features = [s for s in features if "duration" in s] | |
# define target columns | |
note_target = df_notes.columns[-2] | |
duration_target = df_notes.columns[-1] | |
# sample random row from dataframe and define start notes and durations | |
initial_sample = df_notes.sample() | |
start_notes = list(initial_sample[note_features].values[0]) | |
start_durations = list(initial_sample[duration_features].values[0]) | |
# append starting notes and durations to gen arrays | |
for note in start_notes: | |
gen_notes.append(int(note)) | |
for duration in start_durations: | |
gen_durations.append(duration) | |
for i in range(num_notes) : | |
rows = df_notes | |
for i in range(window-1): | |
rows = rows.loc[df_notes[note_features[i]] == start_notes[i]] | |
rows = rows.loc[df_notes[duration_features[i]]== start_durations[i]] | |
#This gives the same effect as probability. | |
# We effectively sample from a list which might have more than 1 C note, Hence increasing its probability | |
#Sometime, The start_notes and durations could be selected in such a way that we cannot generate any further notes uptill num_notes, | |
#This means there maybe some combinations of notes such as 76,68 which are not there in the dataset and hence cannot be sampled. | |
#In such cases, the only way about it would be to reset the start notes, because we cannot sample from an empty row | |
#So here we check if any rows which we ta | |
if len(rows): | |
next_sample = rows.sample() | |
next_note = next_sample[note_target].values[0] | |
next_duration = next_sample[duration_target].values[0] | |
gen_notes.append(int(next_note)) | |
gen_durations.append(next_duration) | |
start_notes.pop() | |
start_durations.pop() | |
start_notes.append(next_note) | |
start_durations.append(next_duration) | |
else: | |
#Received empty row | |
# print("Exiting!!!!!!") | |
#restarting again to get new start notes | |
return [],[] | |
# print(rows[note_target].value_counts(normalize=True)) | |
# print(rows[duration_target].value_counts(normalize=True)) | |
return gen_notes, gen_durations | |
#MAIN FUNCTION | |
if __name__=="__main__": | |
# https://stackoverflow.com/questions/49462107/how-can-i-get-all-piano-parts-from-a-music21-score | |
if not os.path.exists('tracks'): | |
os.mkdir('tracks') | |
i = 0 | |
#Parse midi files into tracks folder | |
for path in abs_paths('datamidi'): | |
print(path) | |
# mid = MidiFile(path) | |
piece = converter.parse(path) | |
print(list(piece.parts)) | |
for part in piece.parts: | |
part_notes = [] | |
#get all note messages from all tracks | |
for event in part: | |
if getattr(event, 'isNote', None) and event.isNote: | |
print('note in {}'.format(part)) | |
#check if note is in accepted length | |
#convert string to numerical value | |
if event.quarterLength in accepeted_lengths: | |
part_notes.append([pitch_to_int(event.nameWithOctave), event.quarterLength]) | |
if not len(part_notes) == 0: | |
np.save('tracks/{}.npy'.format(i), np.array(part_notes)) | |
i+=1 | |
print('Number of tracks parsed: {}'.format(i)) | |
if not os.path.exists('prepared.csv'): | |
columns = [] | |
for i in range(window): | |
columns.append('note' + str(i)) | |
columns.append('duration' + str(i)) | |
df_notes = pd.DataFrame(columns=columns) | |
# append segments from each track as rows to dataframe | |
for path in abs_paths('tracks'): | |
notes = np.load(path) | |
for i in range(len(notes)-window): | |
# take every x notes and durations | |
segment = notes[i:i+window].flatten() | |
# make into pd.Series row | |
row = pd.Series(segment, index=df_notes.columns) | |
# append row to dataframe | |
df_notes = df_notes.append(row, ignore_index=True) | |
# export | |
df_notes.to_csv('prepared.csv', index=False) | |
success = False | |
gen_notes =[] | |
gen_durations =[] | |
#Retry mechanism | |
while len(gen_notes)<num_notes: | |
gen_notes,gen_durations = generate_notes() | |
# import | |
# df_notes = pd.read_csv('prepared.csv') | |
# print(df_notes.shape) | |
# # define arrays for generated notes and durations | |
# gen_notes = [] | |
# gen_durations = [] | |
# # define note and duration feature columns based on names | |
# features = df_notes.columns[:-2] | |
# note_features = [s for s in features if "note" in s] | |
# duration_features = [s for s in features if "duration" in s] | |
# # define target columns | |
# note_target = df_notes.columns[-2] | |
# duration_target = df_notes.columns[-1] | |
# # sample random row from dataframe and define start notes and durations | |
# initial_sample = df_notes.sample() | |
# start_notes = list(initial_sample[note_features].values[0]) | |
# start_durations = list(initial_sample[duration_features].values[0]) | |
# # append starting notes and durations to gen arrays | |
# for note in start_notes: | |
# gen_notes.append(int(note)) | |
# for duration in start_durations: | |
# gen_durations.append(duration) | |
# for i in range(num_notes) : | |
# rows = df_notes | |
# for i in range(window-1): | |
# rows = rows.loc[df_notes[note_features[i]] == start_notes[i]] | |
# rows = rows.loc[df_notes[duration_features[i]]== start_durations[i]] | |
# #This gives the same effect as probability. | |
# # We effectively sample from a list which might have more than 1 C note, Hence increasing its probability | |
# #Sometime, The start_notes and durations could be selected in such a way that we cannot generate any further notes uptill num_notes, | |
# #This means there maybe some combinations of notes such as 76,68 which are not there in the dataset and hence cannot be sampled. | |
# #In such cases, the only way about it would be to reset the start notes, because we cannot sample from an empty row | |
# #So here we check if any rows which we ta | |
# if len(rows): | |
# next_sample = rows.sample() | |
# next_note = next_sample[note_target].values[0] | |
# next_duration = next_sample[duration_target].values[0] | |
# gen_notes.append(int(next_note)) | |
# gen_durations.append(next_duration) | |
# start_notes.pop() | |
# start_durations.pop() | |
# start_notes.append(next_note) | |
# start_durations.append(next_duration) | |
# else: | |
# #Received empty row | |
# print("Exiting!!!!!!") | |
# print(rows[note_target].value_counts(normalize=True)) | |
# print(rows[duration_target].value_counts(normalize=True)) | |
print('Generated notes/durations'.format(num_notes)) | |
print(gen_notes) | |
print(gen_durations) | |
mid = MidiFile() | |
track = MidiTrack() | |
mid.tracks.append(track) | |
for i in range(num_notes): | |
track.append(Message('note_on', channel=0, note=gen_notes[i], velocity=60, time=0)) | |
track.append(Message('note_on', channel=0, note=gen_notes[i], velocity=0,time=int(gen_durations[i]*quarter_note_ticks))) | |
mid.save('new_song.mid') | |
#create new midi file which can be engraved | |
#https://mido.readthedocs.io/en/latest/midi_files.html , crreating a New file sectoin | |
# mid = MidiFile() | |
# track = MidiTrack | |
# mid.tracks.append(track) | |
# for i in range(num_notes): | |
# track.append(Message('note_on', channel=0, note=gen_notes[i], velocity=60, time=0)) | |
# track.append(Message('note_on', channel=0, note=gen_notes[i], velocity=0,time=int(gen_durations[i]*quarter_note_ticks))) | |
# mid.save('output.mid') | |
# def inspect_midi(): | |
# #Just inspecting midi file 1. | |
# for path in abs_paths('data'): | |
# # print(path) | |
# mid = MidiFile(path) | |
# for i, track in enumerate(mid.tracks): | |
# print('Track {}: {}'.format(i, track.name)) | |
# for message in track: | |
# print(message) | |
# break | |
# inspect_midi() | |
# def isolate_note_on_msgs(): | |
# #round each note duration to 250ms | |
# #Build adjaceny matrix | |
# LILYPOND COMMANDS : To be used for generating music scores | |
# Installation : sudo apt-get install -y lilypond | |
# !midi2ly "new_song.ly" | |
# !lilypond -fpng "new_song-midi.ly" | |
#Converting abc2ly | |
# abc2ly "new_song.abc" | |
#Then same command to convert to png which will generate midi as well | |
#midi to abc | |
#midi2abc -f file.mid > file.abc | |