Spaces:
Runtime error
Runtime error
make colab friendly
Browse files- notebooks/test_mel.ipynb +83 -5
notebooks/test_mel.ipynb
CHANGED
@@ -1,14 +1,28 @@
|
|
1 |
{
|
2 |
"cells": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
"execution_count": null,
|
6 |
-
"id": "
|
7 |
"metadata": {},
|
8 |
"outputs": [],
|
9 |
"source": [
|
10 |
-
"
|
11 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
]
|
13 |
},
|
14 |
{
|
@@ -30,6 +44,7 @@
|
|
30 |
"metadata": {},
|
31 |
"outputs": [],
|
32 |
"source": [
|
|
|
33 |
"from IPython.display import Audio\n",
|
34 |
"from audiodiffusion.mel import Mel"
|
35 |
]
|
@@ -58,6 +73,21 @@
|
|
58 |
"### Transform slice of audio to mel spectrogram"
|
59 |
]
|
60 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
{
|
62 |
"cell_type": "code",
|
63 |
"execution_count": null,
|
@@ -65,7 +95,7 @@
|
|
65 |
"metadata": {},
|
66 |
"outputs": [],
|
67 |
"source": [
|
68 |
-
"mel.load_audio(
|
69 |
]
|
70 |
},
|
71 |
{
|
@@ -108,10 +138,58 @@
|
|
108 |
"Audio(data=audio, rate=mel.get_sample_rate())"
|
109 |
]
|
110 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
{
|
112 |
"cell_type": "code",
|
113 |
"execution_count": null,
|
114 |
-
"id": "
|
115 |
"metadata": {},
|
116 |
"outputs": [],
|
117 |
"source": []
|
|
|
1 |
{
|
2 |
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"id": "3de63898",
|
6 |
+
"metadata": {},
|
7 |
+
"source": [
|
8 |
+
"<a href=\"https://colab.research.google.com/github/teticio/audio-diffusion/blob/master/notebooks/test_mel.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
9 |
+
]
|
10 |
+
},
|
11 |
{
|
12 |
"cell_type": "code",
|
13 |
"execution_count": null,
|
14 |
+
"id": "81fbd495",
|
15 |
"metadata": {},
|
16 |
"outputs": [],
|
17 |
"source": [
|
18 |
+
"try:\n",
|
19 |
+
" # are we running on Google Colab?\n",
|
20 |
+
" import google.colab\n",
|
21 |
+
" !git clone -q https://github.com/teticio/audio-diffusion.git\n",
|
22 |
+
" %cd audio-diffusion\n",
|
23 |
+
" !pip install -q -r requirements.txt\n",
|
24 |
+
"except:\n",
|
25 |
+
" pass"
|
26 |
]
|
27 |
},
|
28 |
{
|
|
|
44 |
"metadata": {},
|
45 |
"outputs": [],
|
46 |
"source": [
|
47 |
+
"from datasets import load_dataset\n",
|
48 |
"from IPython.display import Audio\n",
|
49 |
"from audiodiffusion.mel import Mel"
|
50 |
]
|
|
|
73 |
"### Transform slice of audio to mel spectrogram"
|
74 |
]
|
75 |
},
|
76 |
+
{
|
77 |
+
"cell_type": "code",
|
78 |
+
"execution_count": null,
|
79 |
+
"id": "f32bb35e",
|
80 |
+
"metadata": {},
|
81 |
+
"outputs": [],
|
82 |
+
"source": [
|
83 |
+
"try:\n",
|
84 |
+
" # are we running on Google Colab?\n",
|
85 |
+
" from google.colab import files\n",
|
86 |
+
" audio_file = list(files.upload().keys())[0]\n",
|
87 |
+
"except:\n",
|
88 |
+
" audio_file = \"/home/teticio/Music/Music/A Tribe Called Quest/The Anthology/08 Can I Kick It_.mp3\""
|
89 |
+
]
|
90 |
+
},
|
91 |
{
|
92 |
"cell_type": "code",
|
93 |
"execution_count": null,
|
|
|
95 |
"metadata": {},
|
96 |
"outputs": [],
|
97 |
"source": [
|
98 |
+
"mel.load_audio(audio_file)"
|
99 |
]
|
100 |
},
|
101 |
{
|
|
|
138 |
"Audio(data=audio, rate=mel.get_sample_rate())"
|
139 |
]
|
140 |
},
|
141 |
+
{
|
142 |
+
"cell_type": "markdown",
|
143 |
+
"id": "0f1f2006",
|
144 |
+
"metadata": {},
|
145 |
+
"source": [
|
146 |
+
"### Select a random image from the training set"
|
147 |
+
]
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"cell_type": "code",
|
151 |
+
"execution_count": null,
|
152 |
+
"id": "1f29f025",
|
153 |
+
"metadata": {},
|
154 |
+
"outputs": [],
|
155 |
+
"source": [
|
156 |
+
"ds = load_dataset('teticio/audio-diffusion-256')"
|
157 |
+
]
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"cell_type": "code",
|
161 |
+
"execution_count": null,
|
162 |
+
"id": "e002482d",
|
163 |
+
"metadata": {},
|
164 |
+
"outputs": [],
|
165 |
+
"source": [
|
166 |
+
"image = ds['train'].shuffle().select(range(1))['image'][0]\n",
|
167 |
+
"image"
|
168 |
+
]
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"cell_type": "markdown",
|
172 |
+
"id": "6a801fc5",
|
173 |
+
"metadata": {},
|
174 |
+
"source": [
|
175 |
+
"### Convert to audio"
|
176 |
+
]
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"cell_type": "code",
|
180 |
+
"execution_count": null,
|
181 |
+
"id": "a2421827",
|
182 |
+
"metadata": {},
|
183 |
+
"outputs": [],
|
184 |
+
"source": [
|
185 |
+
"audio = mel.image_to_audio(image)\n",
|
186 |
+
"Audio(data=audio, rate=mel.get_sample_rate())"
|
187 |
+
]
|
188 |
+
},
|
189 |
{
|
190 |
"cell_type": "code",
|
191 |
"execution_count": null,
|
192 |
+
"id": "2281fb55",
|
193 |
"metadata": {},
|
194 |
"outputs": [],
|
195 |
"source": []
|