Spaces:

Katpeeler
/

Midi_space2

Runtime error

App Files Files Community

Katpeeler commited on Dec 10, 2023

Commit

fcd504e

1 Parent(s): 2d56d1d

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -8

app.py CHANGED Viewed

@@ -2,15 +2,24 @@ import gradio as gr
 import note_seq
 import numpy as np
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from constants import GM_INSTRUMENTS
 tokenizer = AutoTokenizer.from_pretrained("Katpeeler/midi_model_3")
 model = AutoModelForCausalLM.from_pretrained("Katpeeler/midi_model_3")
 NOTE_LENGTH_16TH_120BPM = 0.25 * 60 / 120
 BAR_LENGTH_120BPM = 4.0 * 60 / 120
 SAMPLE_RATE=44100
 def token_sequence_to_note_sequence(token_sequence, use_program=True, use_drums=True, instrument_mapper=None, only_piano=False):
     if isinstance(token_sequence, str):
         token_sequence = token_sequence.split()
@@ -109,54 +118,74 @@ def empty_note_sequence(qpm=120.0, total_time=0.0):
     note_sequence.total_time = total_time
     return note_sequence
 def process(num1, num2, num3):
     created_text = f"""PIECE_START STYLE=JSFAKES GENRE=JSFAKES TRACK_START INST={num1} BAR_START NOTE_ON={num2}"""
     global NOTE_LENGTH_16TH_120BPM
     NOTE_LENGTH_16TH_120BPM = 0.25 * 60 / num3
     global BAR_LENGTH_120BPM
     BAR_LENGTH_120BPM = 4.0 * 60 / num3
     input_ids = tokenizer.encode(created_text, return_tensors="pt")
     generated_ids = model.generate(input_ids, max_length=500)
     global generated_sequence
     generated_sequence = tokenizer.decode(generated_ids[0])
-    # Convert text of notes to audio
     note_sequence = token_sequence_to_note_sequence(generated_sequence)
     synth = note_seq.midi_synth.synthesize
     array_of_floats = synth(note_sequence, sample_rate=SAMPLE_RATE)
     note_plot = note_seq.plot_sequence(note_sequence, False)
     array_of_floats /=1.414
     array_of_floats *= 32767
     int16_data = array_of_floats.astype(np.int16)
     return SAMPLE_RATE, int16_data
 def generation():
     return generated_sequence
 def identity(x, state):
     state += 1
     return x, state, state
 with gr.Blocks() as demo:
     gr.Markdown("Midi Generation")
-    #with gr.Tab("Token generation"):
-    #    text_output = gr.Textbox()
-    #    text_button = gr.Button("show generated tokens")
     with gr.Tab("Audio generation"):
         audio_output = gr.Audio()
         number1 = gr.Slider(1, 100, value=25, label="Inst number", step=1, info="Choose between 1 and 100")
         number2 = gr.Slider(1, 100, value=40, label="Note number", step=1, info="Choose between 1 and 100")
         number3 = gr.Slider(60, 140, value=120, label="BPM", step=5, info="Choose between 60 and 140")
         audio_button = gr.Button("generate audio")
     with gr.Tab("Token generation"):
         text_output = gr.Textbox()
         text_button = gr.Button("show generated tokens")
     text_button.click(generation, inputs=None, outputs=text_output)
     audio_button.click(process, inputs=[number1, number2, number3], outputs=audio_output)
 if __name__ == "__main__":
     demo.launch()

 import note_seq
 import numpy as np
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# Instrument list is imported but not currently used.
 from constants import GM_INSTRUMENTS
+# Import the current midi_model
 tokenizer = AutoTokenizer.from_pretrained("Katpeeler/midi_model_3")
 model = AutoModelForCausalLM.from_pretrained("Katpeeler/midi_model_3")
+# Define note and bar length, relative to 120bpm.
+# This is overriden if the user adjusts the bpm
 NOTE_LENGTH_16TH_120BPM = 0.25 * 60 / 120
 BAR_LENGTH_120BPM = 4.0 * 60 / 120
+# Sample rate should never change, and should be imported from constants.
+# I will do this once I confirm I can't use a higher sample rate for playing back audio here.
 SAMPLE_RATE=44100
+# Main method for transposing from tokens back to midi notes.
+# Can specify an instrument_mapper when ready to add more sounds
+# THIS METHOD IS FROM DR.TRISTAN BEHRENS (https://huggingface.co/TristanBehrens)
 def token_sequence_to_note_sequence(token_sequence, use_program=True, use_drums=True, instrument_mapper=None, only_piano=False):
     if isinstance(token_sequence, str):
         token_sequence = token_sequence.split()
     note_sequence.total_time = total_time
     return note_sequence
+# The process that is called when the user clicks the "generate audio" button.
+# Currently takes in 3 number arguments, correlating to two parts of the input prompt,
+# and the bpm.
 def process(num1, num2, num3):
+    # Prompt used to generate. I have this hard-coded currently to make generation smoother.
+    # I include the start of the midi file, style and genre (since they are unused), start a track,
+    # and allow the user to adjust the instrument number and the first note from the UI.
     created_text = f"""PIECE_START STYLE=JSFAKES GENRE=JSFAKES TRACK_START INST={num1} BAR_START NOTE_ON={num2}"""
+    # adjustments for bpm
     global NOTE_LENGTH_16TH_120BPM
     NOTE_LENGTH_16TH_120BPM = 0.25 * 60 / num3
     global BAR_LENGTH_120BPM
     BAR_LENGTH_120BPM = 4.0 * 60 / num3
+    # send the input prompt to the tokenizer, and generate
     input_ids = tokenizer.encode(created_text, return_tensors="pt")
     generated_ids = model.generate(input_ids, max_length=500)
     global generated_sequence
     generated_sequence = tokenizer.decode(generated_ids[0])
+    # Convert the text of notes to audio
     note_sequence = token_sequence_to_note_sequence(generated_sequence)
+    # The synth engine for playing sound
     synth = note_seq.midi_synth.synthesize
     array_of_floats = synth(note_sequence, sample_rate=SAMPLE_RATE)
     note_plot = note_seq.plot_sequence(note_sequence, False)
     array_of_floats /=1.414
     array_of_floats *= 32767
     int16_data = array_of_floats.astype(np.int16)
+    # return the sampmle rate and array, needed for gradio audio widget
     return SAMPLE_RATE, int16_data
+# simple call to show the generated tokens
 def generation():
     return generated_sequence
+# unused call that was used to store instant feedback of the gradio sliders.
+# I ended up using a simpler method for them, but am keeping this in case it becomes useful later.
 def identity(x, state):
     state += 1
     return x, state, state
+# Gradio app structure
 with gr.Blocks() as demo:
+    # Title of the page
     gr.Markdown("Midi Generation")
+    # The audio generation tab
     with gr.Tab("Audio generation"):
+        # an audio widget
         audio_output = gr.Audio()
+        # the slider widgets for the user to adjust the values for generation
         number1 = gr.Slider(1, 100, value=25, label="Inst number", step=1, info="Choose between 1 and 100")
         number2 = gr.Slider(1, 100, value=40, label="Note number", step=1, info="Choose between 1 and 100")
         number3 = gr.Slider(60, 140, value=120, label="BPM", step=5, info="Choose between 60 and 140")
+        # the button to send the prompt
         audio_button = gr.Button("generate audio")
+    # the token generation tab
     with gr.Tab("Token generation"):
+        # a text widget to display the generated tokens
         text_output = gr.Textbox()
+        # the button to display the generated tokens
         text_button = gr.Button("show generated tokens")
+    # The definitions for button clicks
     text_button.click(generation, inputs=None, outputs=text_output)
     audio_button.click(process, inputs=[number1, number2, number3], outputs=audio_output)
+# runs the application
 if __name__ == "__main__":
     demo.launch()