teticio commited on
Commit
0f3ac5f
1 Parent(s): af13674

make colab friendly

Browse files
Files changed (1) hide show
  1. notebooks/test_mel.ipynb +83 -5
notebooks/test_mel.ipynb CHANGED
@@ -1,14 +1,28 @@
1
  {
2
  "cells": [
 
 
 
 
 
 
 
 
3
  {
4
  "cell_type": "code",
5
  "execution_count": null,
6
- "id": "2a61d194",
7
  "metadata": {},
8
  "outputs": [],
9
  "source": [
10
- "%load_ext autoreload\n",
11
- "%autoreload 2"
 
 
 
 
 
 
12
  ]
13
  },
14
  {
@@ -30,6 +44,7 @@
30
  "metadata": {},
31
  "outputs": [],
32
  "source": [
 
33
  "from IPython.display import Audio\n",
34
  "from audiodiffusion.mel import Mel"
35
  ]
@@ -58,6 +73,21 @@
58
  "### Transform slice of audio to mel spectrogram"
59
  ]
60
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  {
62
  "cell_type": "code",
63
  "execution_count": null,
@@ -65,7 +95,7 @@
65
  "metadata": {},
66
  "outputs": [],
67
  "source": [
68
- "mel.load_audio('/home/teticio/Music/Music/A Tribe Called Quest/The Anthology/08 Can I Kick It_.mp3')"
69
  ]
70
  },
71
  {
@@ -108,10 +138,58 @@
108
  "Audio(data=audio, rate=mel.get_sample_rate())"
109
  ]
110
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  {
112
  "cell_type": "code",
113
  "execution_count": null,
114
- "id": "a0dffbc4",
115
  "metadata": {},
116
  "outputs": [],
117
  "source": []
 
1
  {
2
  "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "3de63898",
6
+ "metadata": {},
7
+ "source": [
8
+ "<a href=\"https://colab.research.google.com/github/teticio/audio-diffusion/blob/master/notebooks/test_mel.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
9
+ ]
10
+ },
11
  {
12
  "cell_type": "code",
13
  "execution_count": null,
14
+ "id": "81fbd495",
15
  "metadata": {},
16
  "outputs": [],
17
  "source": [
18
+ "try:\n",
19
+ " # are we running on Google Colab?\n",
20
+ " import google.colab\n",
21
+ " !git clone -q https://github.com/teticio/audio-diffusion.git\n",
22
+ " %cd audio-diffusion\n",
23
+ " !pip install -q -r requirements.txt\n",
24
+ "except:\n",
25
+ " pass"
26
  ]
27
  },
28
  {
 
44
  "metadata": {},
45
  "outputs": [],
46
  "source": [
47
+ "from datasets import load_dataset\n",
48
  "from IPython.display import Audio\n",
49
  "from audiodiffusion.mel import Mel"
50
  ]
 
73
  "### Transform slice of audio to mel spectrogram"
74
  ]
75
  },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": null,
79
+ "id": "f32bb35e",
80
+ "metadata": {},
81
+ "outputs": [],
82
+ "source": [
83
+ "try:\n",
84
+ " # are we running on Google Colab?\n",
85
+ " from google.colab import files\n",
86
+ " audio_file = list(files.upload().keys())[0]\n",
87
+ "except:\n",
88
+ " audio_file = \"/home/teticio/Music/Music/A Tribe Called Quest/The Anthology/08 Can I Kick It_.mp3\""
89
+ ]
90
+ },
91
  {
92
  "cell_type": "code",
93
  "execution_count": null,
 
95
  "metadata": {},
96
  "outputs": [],
97
  "source": [
98
+ "mel.load_audio(audio_file)"
99
  ]
100
  },
101
  {
 
138
  "Audio(data=audio, rate=mel.get_sample_rate())"
139
  ]
140
  },
141
+ {
142
+ "cell_type": "markdown",
143
+ "id": "0f1f2006",
144
+ "metadata": {},
145
+ "source": [
146
+ "### Select a random image from the training set"
147
+ ]
148
+ },
149
+ {
150
+ "cell_type": "code",
151
+ "execution_count": null,
152
+ "id": "1f29f025",
153
+ "metadata": {},
154
+ "outputs": [],
155
+ "source": [
156
+ "ds = load_dataset('teticio/audio-diffusion-256')"
157
+ ]
158
+ },
159
+ {
160
+ "cell_type": "code",
161
+ "execution_count": null,
162
+ "id": "e002482d",
163
+ "metadata": {},
164
+ "outputs": [],
165
+ "source": [
166
+ "image = ds['train'].shuffle().select(range(1))['image'][0]\n",
167
+ "image"
168
+ ]
169
+ },
170
+ {
171
+ "cell_type": "markdown",
172
+ "id": "6a801fc5",
173
+ "metadata": {},
174
+ "source": [
175
+ "### Convert to audio"
176
+ ]
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "execution_count": null,
181
+ "id": "a2421827",
182
+ "metadata": {},
183
+ "outputs": [],
184
+ "source": [
185
+ "audio = mel.image_to_audio(image)\n",
186
+ "Audio(data=audio, rate=mel.get_sample_rate())"
187
+ ]
188
+ },
189
  {
190
  "cell_type": "code",
191
  "execution_count": null,
192
+ "id": "2281fb55",
193
  "metadata": {},
194
  "outputs": [],
195
  "source": []