thecollabagepatch commited on
Commit
0e5a9e8
·
1 Parent(s): e823dee

first try maybe

Browse files
Files changed (3) hide show
  1. .gitmodules +3 -0
  2. app.py +247 -0
  3. requirements.txt +0 -0
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "audiocraft"]
2
+ path = audiocraft
3
+ url = https://github.com/aaronabebe/audiocraft
app.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torchaudio
3
+ from audiocraft.models import MusicGen
4
+ from audiocraft.data.audio import audio_write
5
+ import tempfile
6
+ import os
7
+ import logging
8
+ import torch
9
+ from pydub import AudioSegment
10
+ import io
11
+ import random
12
+ import spaces
13
+
14
+ #logging.basicConfig(level=logging.DEBUG)
15
+
16
+ # Check if CUDA is available
17
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+
19
+ # Placeholder Utility Functions
20
+ #def peak_normalize(y, target_peak=0.97):
21
+ # return target_peak * (y / np.max(np.abs(y)))
22
+ #
23
+ #def rms_normalize(y, target_rms=0.05):
24
+ # return y * (target_rms / np.sqrt(np.mean(y**2)))
25
+
26
+ def preprocess_audio(waveform):
27
+ waveform_np = waveform.cpu().squeeze().numpy() # Move to CPU before converting to NumPy
28
+ # processed_waveform_np = rms_normalize(peak_normalize(waveform_np))
29
+ return torch.from_numpy(waveform_np).unsqueeze(0).to(device)
30
+
31
+ @spaces.GPU(10)
32
+ def generate_drum_sample():
33
+ model = MusicGen.get_pretrained('pharoAIsanders420/micro-musicgen-jungle')
34
+ model.set_generation_params(duration=10)
35
+ wav = model.generate_unconditional(1).squeeze(0) # Reducing dimensions if necessary
36
+
37
+ filename_without_extension = f'jungle'
38
+ filename_with_extension = f'{filename_without_extension}.wav'
39
+
40
+ audio_write(filename_without_extension, wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True)
41
+
42
+ return filename_with_extension
43
+
44
+ @spaces.GPU(10)
45
+ def continue_drum_sample(existing_audio_path):
46
+ # Load the existing audio
47
+ existing_audio, sr = torchaudio.load(existing_audio_path)
48
+ existing_audio = existing_audio.to(device) # Ensure the existing audio is on the GPU if available
49
+
50
+ # Set fixed durations
51
+ prompt_duration = 2 # seconds
52
+ output_duration = 10 # seconds
53
+
54
+ # Calculate the slice from the end of the current audio based on prompt_duration
55
+ num_samples = int(prompt_duration * sr)
56
+ if existing_audio.shape[1] < num_samples:
57
+ raise ValueError("The existing audio is too short for the specified prompt duration.")
58
+
59
+ start_sample = existing_audio.shape[1] - num_samples
60
+ prompt_waveform = existing_audio[..., start_sample:]
61
+
62
+ # Assume model is already loaded and configured to generate drum samples
63
+ model = MusicGen.get_pretrained('pharoAIsanders420/micro-musicgen-jungle')
64
+ model.set_generation_params(duration=output_duration)
65
+
66
+ # Generate continuation
67
+ output = model.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
68
+ output = output.to(device) # Ensure the new output is on the same device as existing_audio
69
+
70
+ if output.dim() == 3: # [batch_size, channels, samples]
71
+ output = output.squeeze(0) # Remove batch dimension if present
72
+
73
+ if output.dim() == 1:
74
+ output = output.unsqueeze(0) # Mono to [1, samples]
75
+
76
+ # Combine the new output with the existing audio
77
+ combined_audio = torch.cat((existing_audio, output), dim=1)
78
+
79
+ # Move combined audio to CPU for saving
80
+ combined_audio = combined_audio.cpu()
81
+
82
+ # Save combined audio to a new file
83
+ combined_file_path = f'./continued_jungle_{random.randint(1000, 9999)}.wav'
84
+ torchaudio.save(combined_file_path, combined_audio, sr)
85
+
86
+ return combined_file_path
87
+
88
+ @spaces.GPU(90)
89
+ def generate_music(wav_filename, prompt_duration, musicgen_model, output_duration):
90
+ # Load the audio from the passed file path
91
+ song, sr = torchaudio.load(wav_filename)
92
+ song = song.to(device)
93
+
94
+ # Load the model
95
+ model_name = musicgen_model.split(" ")[0]
96
+ model_continue = MusicGen.get_pretrained(model_name)
97
+
98
+ # Setting generation parameters
99
+ model_continue.set_generation_params(
100
+ use_sampling=True,
101
+ top_k=250,
102
+ top_p=0.0,
103
+ temperature=1.0,
104
+ duration=output_duration,
105
+ cfg_coef=3
106
+ )
107
+
108
+ prompt_waveform = song[..., :int(prompt_duration * sr)]
109
+ prompt_waveform = preprocess_audio(prompt_waveform)
110
+
111
+ output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
112
+ output = output.cpu() # Move the output tensor back to CPU
113
+
114
+ # Ensure the output tensor has at most 2 dimensions
115
+ if len(output.size()) > 2:
116
+ output = output.squeeze()
117
+
118
+ filename_without_extension = f'continued_music'
119
+ filename_with_extension = f'{filename_without_extension}.wav'
120
+ audio_write(filename_without_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
121
+
122
+ return filename_with_extension
123
+
124
+ @spaces.GPU(90)
125
+ def continue_music(input_audio_path, prompt_duration, musicgen_model, output_duration):
126
+ # Load the audio from the given file path
127
+ song, sr = torchaudio.load(input_audio_path)
128
+ song = song.to(device)
129
+
130
+ # Load the model and set generation parameters
131
+ model_continue = MusicGen.get_pretrained(musicgen_model.split(" ")[0])
132
+ model_continue.set_generation_params(
133
+ use_sampling=True,
134
+ top_k=250,
135
+ top_p=0.0,
136
+ temperature=1.0,
137
+ duration=output_duration,
138
+ cfg_coef=3
139
+ )
140
+
141
+ original_audio = AudioSegment.from_mp3(input_audio_path)
142
+ current_audio = original_audio
143
+
144
+ file_paths_for_cleanup = [] # List to track generated file paths for cleanup
145
+
146
+ for i in range(1):
147
+ # Calculate the slice from the end of the current audio based on prompt_duration
148
+ num_samples = int(prompt_duration * sr)
149
+ if current_audio.duration_seconds * 1000 < prompt_duration * 1000:
150
+ raise ValueError("The prompt_duration is longer than the current audio length.")
151
+
152
+ start_time = current_audio.duration_seconds * 1000 - prompt_duration * 1000
153
+ prompt_audio = current_audio[start_time:]
154
+
155
+ # Convert the prompt audio to a PyTorch tensor
156
+ prompt_bytes = prompt_audio.export(format="wav").read()
157
+ prompt_waveform, _ = torchaudio.load(io.BytesIO(prompt_bytes))
158
+ prompt_waveform = prompt_waveform.to(device)
159
+
160
+ # Prepare the audio slice for generation
161
+ prompt_waveform = preprocess_audio(prompt_waveform)
162
+
163
+ output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
164
+ output = output.cpu() # Move the output tensor back to CPU
165
+
166
+ if len(output.size()) > 2:
167
+ output = output.squeeze()
168
+
169
+ filename_without_extension = f'continue_{i}'
170
+ filename_with_extension = f'{filename_without_extension}.wav'
171
+ correct_filename_extension = f'{filename_without_extension}.wav.wav' # Apply the workaround for audio_write
172
+
173
+ audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
174
+ generated_audio_segment = AudioSegment.from_wav(correct_filename_extension)
175
+
176
+ # Replace the prompt portion with the generated audio
177
+ current_audio = current_audio[:start_time] + generated_audio_segment
178
+
179
+ file_paths_for_cleanup.append(correct_filename_extension) # Add to cleanup list
180
+
181
+ combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
182
+ current_audio.export(combined_audio_filename, format="mp3")
183
+
184
+ # Clean up temporary files using the list of file paths
185
+ for file_path in file_paths_for_cleanup:
186
+ os.remove(file_path)
187
+
188
+ return combined_audio_filename
189
+
190
+ # Define the expandable sections
191
+ musicgen_micro_blurb = """
192
+ ## musicgen_micro
193
+ musicgen micro is an experimental series of models by aaron abebe. they are incredibly fast, and extra insane. this one does goated jungle drums. we're very excited about these.
194
+ [<img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" width="20" style="vertical-align:middle"> aaron's github](https://github.com/aaronabebe/)
195
+ [<img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Hugging Face" width="20" style="vertical-align:middle"> musicgen-micro on huggingface](https://huggingface.co/pharoAIsanders420/micro-musicgen-jungle)
196
+ """
197
+
198
+ musicgen_blurb = """
199
+ ## musicgen
200
+ musicgen is a transformer-based music model that generates audio. It can also do something called a continuation, which was initially meant to extend musicgen outputs beyond 30 seconds. it can be used with any input audio to produce surprising results.
201
+ [<img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" width="20" style="vertical-align:middle"> audiocraft github](https://github.com/facebookresearch/audiocraft)
202
+ visit https://thecollabagepatch.com/infinitepolo.mp3 or https://thecollabagepatch.com/audiocraft.mp3 to hear continuations in action.
203
+ see also https://youtube.com/@thecollabagepatch
204
+ """
205
+
206
+ finetunes_blurb = """
207
+ ## fine-tuned models
208
+ the fine-tunes hosted on the huggingface hub are provided collectively by the musicgen discord community. thanks to vanya, mj, hoenn, septicDNB and of course, lyra.
209
+ [<img src="https://cdn.iconscout.com/icon/free/png-256/discord-3691244-3073764.png" alt="Discord" width="20" style="vertical-align:middle"> musicgen discord](https://discord.gg/93kX8rGZ)
210
+ [<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" style="vertical-align:middle"> fine-tuning colab notebook by lyra](https://colab.research.google.com/drive/13tbcC3A42KlaUZ21qvUXd25SFLu8WIvb)
211
+ """
212
+
213
+ # Create the Gradio interface
214
+ with gr.Blocks() as iface:
215
+ gr.Markdown("# the-slot-machine")
216
+ gr.Markdown("two ai's jamming. warning: outputs will be very strange, likely stupid, and possibly rad.")
217
+ gr.Markdown("this is an even weirder slot machine than the other one. on the left, you get to generate some state of the art lo-fi jungle drums at incredible speed thanks to aaron's new class of model, and if you want you can have it continue its own output. Then, you can either press the generate_music button to use the first 5 seconds as a prompt, or you can re-upload the audio into the continue_music section to have a fine-tune continue from the end of the jungle drum output, however long and insane it is. think of this as a very weird relay race and you're winning.")
218
+
219
+ with gr.Row():
220
+ with gr.Column():
221
+ generate_button = gr.Button("Generate Drum Sample")
222
+ drum_audio = gr.Audio(label="Generated Drum Sample", type="filepath", interactive=True, show_download_button=True)
223
+ continue_drum_sample_button = gr.Button("Continue Drum Sample")
224
+
225
+
226
+ with gr.Column():
227
+ prompt_duration = gr.Dropdown(label="Prompt Duration (seconds)", choices=list(range(1, 11)), value=5)
228
+ output_duration = gr.Slider(label="Output Duration (seconds)", minimum=10, maximum=30, step=1, value=20)
229
+ musicgen_model = gr.Dropdown(label="MusicGen Model", choices=[
230
+ "thepatch/vanya_ai_dnb_0.1 (small)",
231
+ "thepatch/budots_remix (small)",
232
+ "thepatch/PhonkV2 (small)",
233
+ "thepatch/bleeps-medium (medium)",
234
+ "thepatch/hoenn_lofi (large)",
235
+ ], value="thepatch/vanya_ai_dnb_0.1 (small)")
236
+ generate_music_button = gr.Button("Generate Music")
237
+ output_audio = gr.Audio(label="Generated Music", type="filepath")
238
+ continue_button = gr.Button("Continue Generating Music")
239
+ continue_output_audio = gr.Audio(label="Continued Music Output", type="filepath")
240
+
241
+ # Connecting the components
242
+ generate_button.click(generate_drum_sample, outputs=[drum_audio])
243
+ continue_drum_sample_button.click(continue_drum_sample, inputs=[drum_audio], outputs=[drum_audio])
244
+ generate_music_button.click(generate_music, inputs=[drum_audio, prompt_duration, musicgen_model, output_duration], outputs=[output_audio])
245
+ continue_button.click(continue_music, inputs=[output_audio, prompt_duration, musicgen_model, output_duration], outputs=continue_output_audio)
246
+
247
+ iface.launch()
requirements.txt ADDED
File without changes