teticio commited on
Commit
f67abdb
1 Parent(s): 5be865c
Files changed (1) hide show
  1. notebooks/test_model.ipynb +15 -36
notebooks/test_model.ipynb CHANGED
@@ -90,7 +90,7 @@
90
  {
91
  "cell_type": "code",
92
  "execution_count": null,
93
- "id": "9b52c13c",
94
  "metadata": {},
95
  "outputs": [],
96
  "source": [
@@ -181,7 +181,7 @@
181
  },
182
  {
183
  "cell_type": "markdown",
184
- "id": "daa4a41e",
185
  "metadata": {},
186
  "source": [
187
  "### Generate continuations (\"out-painting\")"
@@ -190,7 +190,7 @@
190
  {
191
  "cell_type": "code",
192
  "execution_count": null,
193
- "id": "5e73e629",
194
  "metadata": {},
195
  "outputs": [],
196
  "source": [
@@ -245,26 +245,14 @@
245
  },
246
  {
247
  "cell_type": "code",
248
- "execution_count": 9,
249
  "id": "5a257e69",
250
  "metadata": {
251
- "scrolled": true
252
  },
253
- "outputs": [
254
- {
255
- "ename": "NameError",
256
- "evalue": "name 'sample_rate' is not defined",
257
- "output_type": "error",
258
- "traceback": [
259
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
260
- "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
261
- "Input \u001b[0;32mIn [9]\u001b[0m, in \u001b[0;36m<cell line: 10>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 11\u001b[0m generator\u001b[38;5;241m.\u001b[39mmanual_seed(seed)\n\u001b[1;32m 12\u001b[0m audio \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39marray(mel\u001b[38;5;241m.\u001b[39maudio[sample \u001b[38;5;241m*\u001b[39m stride:sample \u001b[38;5;241m*\u001b[39m stride \u001b[38;5;241m+\u001b[39m slice_size])\n\u001b[0;32m---> 13\u001b[0m display(Audio(audio, rate\u001b[38;5;241m=\u001b[39m\u001b[43msample_rate\u001b[49m))\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(track) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# Normalize and re-insert generated audio\u001b[39;00m\n\u001b[1;32m 16\u001b[0m audio[:overlap_samples] \u001b[38;5;241m=\u001b[39m audio2[\u001b[38;5;241m-\u001b[39moverlap_samples:] \u001b[38;5;241m*\u001b[39m np\u001b[38;5;241m.\u001b[39mmax(\n\u001b[1;32m 17\u001b[0m audio[:overlap_samples]) \u001b[38;5;241m/\u001b[39m np\u001b[38;5;241m.\u001b[39mmax(audio2[\u001b[38;5;241m-\u001b[39moverlap_samples:])\n",
262
- "\u001b[0;31mNameError\u001b[0m: name 'sample_rate' is not defined"
263
- ]
264
- }
265
- ],
266
  "source": [
267
- "start_step = 500 #@param {type:\"slider\", min:0, max:1000, step:10}\n",
268
  "overlap_secs = 2 #@param {type:\"integer\"}\n",
269
  "mel.load_audio(audio_file)\n",
270
  "overlap_samples = overlap_secs * mel.get_sample_rate()\n",
@@ -273,11 +261,11 @@
273
  "generator = torch.Generator()\n",
274
  "seed = generator.seed()\n",
275
  "track = np.array([])\n",
 
276
  "for sample in range(len(mel.audio) // stride):\n",
277
  " generator.manual_seed(seed)\n",
278
  " audio = np.array(mel.audio[sample * stride:sample * stride + slice_size])\n",
279
- " display(Audio(audio, rate=mel.get_sample_rate()))\n",
280
- " if len(track) > 0:\n",
281
  " # Normalize and re-insert generated audio\n",
282
  " audio[:overlap_samples] = audio2[-overlap_samples:] * np.max(\n",
283
  " audio[:overlap_samples]) / np.max(audio2[-overlap_samples:])\n",
@@ -286,24 +274,15 @@
286
  " raw_audio=audio,\n",
287
  " start_step=start_step,\n",
288
  " generator=generator,\n",
289
- " mask_start_secs=overlap_secs if len(track) > 0 else 0)\n",
290
- " display(Audio(audio2, rate=sample_rate))\n",
291
- " track = np.concatenate([track, audio2[overlap_samples:]])"
292
- ]
293
- },
294
- {
295
- "cell_type": "code",
296
- "execution_count": null,
297
- "id": "8f5ed5d8",
298
- "metadata": {},
299
- "outputs": [],
300
- "source": [
301
- "display(Audio(track, rate=sample_rate))"
302
  ]
303
  },
304
  {
305
  "cell_type": "markdown",
306
- "id": "61f2ed2a",
307
  "metadata": {},
308
  "source": [
309
  "### Fill the gap (\"in-painting\")"
@@ -312,7 +291,7 @@
312
  {
313
  "cell_type": "code",
314
  "execution_count": null,
315
- "id": "8cad545c",
316
  "metadata": {},
317
  "outputs": [],
318
  "source": [
 
90
  {
91
  "cell_type": "code",
92
  "execution_count": null,
93
+ "id": "4dc17ac0",
94
  "metadata": {},
95
  "outputs": [],
96
  "source": [
 
181
  },
182
  {
183
  "cell_type": "markdown",
184
+ "id": "58a876c1",
185
  "metadata": {},
186
  "source": [
187
  "### Generate continuations (\"out-painting\")"
 
190
  {
191
  "cell_type": "code",
192
  "execution_count": null,
193
+ "id": "b95d5780",
194
  "metadata": {},
195
  "outputs": [],
196
  "source": [
 
245
  },
246
  {
247
  "cell_type": "code",
248
+ "execution_count": null,
249
  "id": "5a257e69",
250
  "metadata": {
251
+ "scrolled": false
252
  },
253
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
254
  "source": [
255
+ "start_step = 600 #@param {type:\"slider\", min:0, max:1000, step:10}\n",
256
  "overlap_secs = 2 #@param {type:\"integer\"}\n",
257
  "mel.load_audio(audio_file)\n",
258
  "overlap_samples = overlap_secs * mel.get_sample_rate()\n",
 
261
  "generator = torch.Generator()\n",
262
  "seed = generator.seed()\n",
263
  "track = np.array([])\n",
264
+ "not_first = 0\n",
265
  "for sample in range(len(mel.audio) // stride):\n",
266
  " generator.manual_seed(seed)\n",
267
  " audio = np.array(mel.audio[sample * stride:sample * stride + slice_size])\n",
268
+ " if not_first:\n",
 
269
  " # Normalize and re-insert generated audio\n",
270
  " audio[:overlap_samples] = audio2[-overlap_samples:] * np.max(\n",
271
  " audio[:overlap_samples]) / np.max(audio2[-overlap_samples:])\n",
 
274
  " raw_audio=audio,\n",
275
  " start_step=start_step,\n",
276
  " generator=generator,\n",
277
+ " mask_start_secs=overlap_secs * not_first)\n",
278
+ " track = np.concatenate([track, audio2[overlap_samples * not_first:]])\n",
279
+ " not_first = 1\n",
280
+ " display(Audio(track, rate=sample_rate))"
 
 
 
 
 
 
 
 
 
281
  ]
282
  },
283
  {
284
  "cell_type": "markdown",
285
+ "id": "924ff9d5",
286
  "metadata": {},
287
  "source": [
288
  "### Fill the gap (\"in-painting\")"
 
291
  {
292
  "cell_type": "code",
293
  "execution_count": null,
294
+ "id": "0200264c",
295
  "metadata": {},
296
  "outputs": [],
297
  "source": [