Spaces:
Runtime error
Runtime error
Hugo Flores Garcia
commited on
Commit
·
128981d
1
Parent(s):
03f09ee
demo
Browse files- demo.py +31 -17
- vampnet/interface.py +15 -0
demo.py
CHANGED
@@ -65,13 +65,21 @@ def vamp(
|
|
65 |
mask_periodic_amt, beat_unmask_dur,
|
66 |
mask_dwn_chk, dwn_factor,
|
67 |
mask_up_chk, up_factor,
|
68 |
-
num_vamps, mode, use_beats, num_steps
|
69 |
):
|
70 |
# try:
|
71 |
print(input_audio)
|
72 |
|
73 |
-
sig = at.AudioSignal(input_audio
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
if beat_unmask_dur > 0.0 and use_beats:
|
76 |
beat_mask = interface.make_beat_mask(
|
77 |
sig,
|
@@ -142,13 +150,13 @@ def save_vamp(
|
|
142 |
mask_periodic_amt, beat_unmask_dur,
|
143 |
mask_dwn_chk, dwn_factor,
|
144 |
mask_up_chk, up_factor,
|
145 |
-
num_vamps, mode, output_audio, notes, use_beats, num_steps
|
146 |
):
|
147 |
out_dir = OUT_DIR / "saved" / str(uuid.uuid4())
|
148 |
out_dir.mkdir(parents=True, exist_ok=True)
|
149 |
|
150 |
-
sig_in = at.AudioSignal(input_audio
|
151 |
-
sig_out = at.AudioSignal(output_audio
|
152 |
|
153 |
sig_in.write(out_dir / "input.wav")
|
154 |
sig_out.write(out_dir / "output.wav")
|
@@ -168,6 +176,7 @@ def save_vamp(
|
|
168 |
"up_factor": up_factor,
|
169 |
"num_vamps": num_vamps,
|
170 |
"num_steps": num_steps,
|
|
|
171 |
"mode": mode,
|
172 |
"notes": notes,
|
173 |
}
|
@@ -212,12 +221,12 @@ with gr.Blocks() as demo:
|
|
212 |
with gr.Column():
|
213 |
gr.Markdown("""
|
214 |
### Tips
|
215 |
-
- use the beat
|
216 |
- if you want the generated audio to sound like the original, but with a different beat structure:
|
217 |
-
- uncheck the beat
|
218 |
- decrease the periodic unmasking to anywhere from 2 to 8
|
219 |
- if you want a more "random" generation:
|
220 |
-
- uncheck the beat
|
221 |
- increase the periodic unmasking to 16 or more
|
222 |
- increase the temperatures!
|
223 |
|
@@ -228,11 +237,11 @@ with gr.Blocks() as demo:
|
|
228 |
with gr.Column():
|
229 |
mode = gr.Radio(
|
230 |
label="**mode**. note that loop mode requires a prefix and suffix longer than 0",
|
231 |
-
choices=["standard",
|
232 |
value="standard"
|
233 |
)
|
234 |
num_vamps = gr.Number(
|
235 |
-
label="number of vamps
|
236 |
value=1,
|
237 |
precision=0
|
238 |
)
|
@@ -246,13 +255,13 @@ with gr.Blocks() as demo:
|
|
246 |
input_audio = gr.Audio(
|
247 |
label="input audio",
|
248 |
interactive=False,
|
249 |
-
type="
|
250 |
)
|
251 |
|
252 |
audio_mask = gr.Audio(
|
253 |
label="audio mask (listen to this to hear the mask hints)",
|
254 |
interactive=False,
|
255 |
-
type="
|
256 |
)
|
257 |
|
258 |
# connect widgets
|
@@ -273,7 +282,7 @@ with gr.Blocks() as demo:
|
|
273 |
with gr.Column():
|
274 |
|
275 |
mask_periodic_amt = gr.Slider(
|
276 |
-
label="periodic hint (0.0 means no hint, 2
|
277 |
minimum=0,
|
278 |
maximum=64,
|
279 |
step=1,
|
@@ -321,6 +330,11 @@ with gr.Blocks() as demo:
|
|
321 |
value=True
|
322 |
)
|
323 |
|
|
|
|
|
|
|
|
|
|
|
324 |
num_steps = gr.Slider(
|
325 |
label="number of steps (should normally be between 12 and 36)",
|
326 |
minimum=4,
|
@@ -334,7 +348,7 @@ with gr.Blocks() as demo:
|
|
334 |
output_audio = gr.Audio(
|
335 |
label="output audio",
|
336 |
interactive=False,
|
337 |
-
type="
|
338 |
)
|
339 |
|
340 |
|
@@ -407,7 +421,7 @@ with gr.Blocks() as demo:
|
|
407 |
mask_periodic_amt, beat_unmask_dur,
|
408 |
mask_dwn_chk, dwn_factor,
|
409 |
mask_up_chk, up_factor,
|
410 |
-
num_vamps, mode, use_beats, num_steps
|
411 |
],
|
412 |
outputs=[output_audio, audio_mask]
|
413 |
)
|
@@ -422,7 +436,7 @@ with gr.Blocks() as demo:
|
|
422 |
mask_up_chk, up_factor,
|
423 |
num_vamps, mode,
|
424 |
output_audio,
|
425 |
-
notes_text, use_beats, num_steps
|
426 |
],
|
427 |
outputs=[thank_you, download_file]
|
428 |
)
|
|
|
65 |
mask_periodic_amt, beat_unmask_dur,
|
66 |
mask_dwn_chk, dwn_factor,
|
67 |
mask_up_chk, up_factor,
|
68 |
+
num_vamps, mode, use_beats, num_steps, snap_to_beats
|
69 |
):
|
70 |
# try:
|
71 |
print(input_audio)
|
72 |
|
73 |
+
sig = at.AudioSignal(input_audio)
|
74 |
+
|
75 |
+
if snap_to_beats:
|
76 |
+
old_sig = sig.clone()
|
77 |
+
sig = interface.snap_to_beats(sig)
|
78 |
+
if sig.duration < (sig.duration / 4): # we cut off too much
|
79 |
+
sig = old_sig
|
80 |
+
print(f"new sig duration is {sig.duration} which is too short, reverting to old sig")
|
81 |
+
print(f"new sig duration is {sig.duration}")
|
82 |
+
|
83 |
if beat_unmask_dur > 0.0 and use_beats:
|
84 |
beat_mask = interface.make_beat_mask(
|
85 |
sig,
|
|
|
150 |
mask_periodic_amt, beat_unmask_dur,
|
151 |
mask_dwn_chk, dwn_factor,
|
152 |
mask_up_chk, up_factor,
|
153 |
+
num_vamps, mode, output_audio, notes, use_beats, num_steps, snap_to_beats
|
154 |
):
|
155 |
out_dir = OUT_DIR / "saved" / str(uuid.uuid4())
|
156 |
out_dir.mkdir(parents=True, exist_ok=True)
|
157 |
|
158 |
+
sig_in = at.AudioSignal(input_audio)
|
159 |
+
sig_out = at.AudioSignal(output_audio)
|
160 |
|
161 |
sig_in.write(out_dir / "input.wav")
|
162 |
sig_out.write(out_dir / "output.wav")
|
|
|
176 |
"up_factor": up_factor,
|
177 |
"num_vamps": num_vamps,
|
178 |
"num_steps": num_steps,
|
179 |
+
"snap_to_beats": snap_to_beats,
|
180 |
"mode": mode,
|
181 |
"notes": notes,
|
182 |
}
|
|
|
221 |
with gr.Column():
|
222 |
gr.Markdown("""
|
223 |
### Tips
|
224 |
+
- use the beat hint button so the output audio has the same beat structure as the input audio
|
225 |
- if you want the generated audio to sound like the original, but with a different beat structure:
|
226 |
+
- uncheck the beat hint button
|
227 |
- decrease the periodic unmasking to anywhere from 2 to 8
|
228 |
- if you want a more "random" generation:
|
229 |
+
- uncheck the beat hint button (or reduce the beat unmask duration)
|
230 |
- increase the periodic unmasking to 16 or more
|
231 |
- increase the temperatures!
|
232 |
|
|
|
237 |
with gr.Column():
|
238 |
mode = gr.Radio(
|
239 |
label="**mode**. note that loop mode requires a prefix and suffix longer than 0",
|
240 |
+
choices=["standard",],
|
241 |
value="standard"
|
242 |
)
|
243 |
num_vamps = gr.Number(
|
244 |
+
label="number of vamps. more vamps = longer generated audio",
|
245 |
value=1,
|
246 |
precision=0
|
247 |
)
|
|
|
255 |
input_audio = gr.Audio(
|
256 |
label="input audio",
|
257 |
interactive=False,
|
258 |
+
type="filepath",
|
259 |
)
|
260 |
|
261 |
audio_mask = gr.Audio(
|
262 |
label="audio mask (listen to this to hear the mask hints)",
|
263 |
interactive=False,
|
264 |
+
type="filepath",
|
265 |
)
|
266 |
|
267 |
# connect widgets
|
|
|
282 |
with gr.Column():
|
283 |
|
284 |
mask_periodic_amt = gr.Slider(
|
285 |
+
label="periodic hint (0.0 means no hint, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
|
286 |
minimum=0,
|
287 |
maximum=64,
|
288 |
step=1,
|
|
|
330 |
value=True
|
331 |
)
|
332 |
|
333 |
+
snap_to_beats = gr.Checkbox(
|
334 |
+
label="trim to beat markers (uncheck if the output audio is too short.)",
|
335 |
+
value=True
|
336 |
+
)
|
337 |
+
|
338 |
num_steps = gr.Slider(
|
339 |
label="number of steps (should normally be between 12 and 36)",
|
340 |
minimum=4,
|
|
|
348 |
output_audio = gr.Audio(
|
349 |
label="output audio",
|
350 |
interactive=False,
|
351 |
+
type="filepath"
|
352 |
)
|
353 |
|
354 |
|
|
|
421 |
mask_periodic_amt, beat_unmask_dur,
|
422 |
mask_dwn_chk, dwn_factor,
|
423 |
mask_up_chk, up_factor,
|
424 |
+
num_vamps, mode, use_beats, num_steps, snap_to_beats
|
425 |
],
|
426 |
outputs=[output_audio, audio_mask]
|
427 |
)
|
|
|
436 |
mask_up_chk, up_factor,
|
437 |
num_vamps, mode,
|
438 |
output_audio,
|
439 |
+
notes_text, use_beats, num_steps, snap_to_beats
|
440 |
],
|
441 |
outputs=[thank_you, download_file]
|
442 |
)
|
vampnet/interface.py
CHANGED
@@ -111,6 +111,21 @@ class Interface:
|
|
111 |
z = self.codec.encode(signal.samples, signal.sample_rate)["codes"]
|
112 |
return z
|
113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
def make_beat_mask(self,
|
115 |
signal: AudioSignal,
|
116 |
before_beat_s: float = 0.1,
|
|
|
111 |
z = self.codec.encode(signal.samples, signal.sample_rate)["codes"]
|
112 |
return z
|
113 |
|
114 |
+
def snap_to_beats(
|
115 |
+
self,
|
116 |
+
signal: AudioSignal
|
117 |
+
):
|
118 |
+
assert hasattr(self, "beat_tracker"), "No beat tracker loaded"
|
119 |
+
beats, downbeats = self.beat_tracker.extract_beats(signal)
|
120 |
+
|
121 |
+
# trim the signa around the first beat time
|
122 |
+
samples_begin = int(beats[0] * signal.sample_rate )
|
123 |
+
samples_end = int(beats[-1] * signal.sample_rate)
|
124 |
+
print(beats[0])
|
125 |
+
signal = signal.clone().trim(samples_begin, signal.length - samples_end)
|
126 |
+
|
127 |
+
return signal
|
128 |
+
|
129 |
def make_beat_mask(self,
|
130 |
signal: AudioSignal,
|
131 |
before_beat_s: float = 0.1,
|