Spaces:
Runtime error
Runtime error
mrfakename
commited on
Commit
•
df8f6a6
1
Parent(s):
f1cfc49
add app
Browse files- app.py +314 -0
- packages.txt +1 -0
- requirements.txt +6 -0
app.py
ADDED
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Thank you to the authors of seewav for dedicating it into the public domain.
|
2 |
+
# This program is also dedicated into the public domain.
|
3 |
+
# You may use it, at your choice, under the Unlicense, CC0, or WTFPL license.
|
4 |
+
# Enjoy!
|
5 |
+
|
6 |
+
# Mostly from: https://github.com/adefossez/seewav
|
7 |
+
# Original author: adefossez
|
8 |
+
|
9 |
+
|
10 |
+
import math
|
11 |
+
import tempfile
|
12 |
+
from pathlib import Path
|
13 |
+
import subprocess
|
14 |
+
import cairo
|
15 |
+
import numpy as np
|
16 |
+
import gradio as gr
|
17 |
+
from pydub import AudioSegment
|
18 |
+
|
19 |
+
|
20 |
+
def read_audio(audio, seek=None, duration=None):
|
21 |
+
"""
|
22 |
+
Read the `audio` file, starting at `seek` (or 0) seconds for `duration` (or all) seconds.
|
23 |
+
Returns `float[channels, samples]`.
|
24 |
+
"""
|
25 |
+
|
26 |
+
audio_segment = AudioSegment.from_file(audio)
|
27 |
+
channels = audio_segment.channels
|
28 |
+
samplerate = audio_segment.frame_rate
|
29 |
+
|
30 |
+
if seek is not None:
|
31 |
+
seek_ms = int(seek * 1000)
|
32 |
+
audio_segment = audio_segment[seek_ms:]
|
33 |
+
|
34 |
+
if duration is not None:
|
35 |
+
duration_ms = int(duration * 1000)
|
36 |
+
audio_segment = audio_segment[:duration_ms]
|
37 |
+
|
38 |
+
samples = audio_segment.get_array_of_samples()
|
39 |
+
wav = np.array(samples, dtype=np.float32)
|
40 |
+
return wav.reshape(channels, -1), samplerate
|
41 |
+
|
42 |
+
|
43 |
+
def sigmoid(x):
|
44 |
+
return 1 / (1 + np.exp(-x))
|
45 |
+
|
46 |
+
|
47 |
+
def envelope(wav, window, stride):
|
48 |
+
"""
|
49 |
+
Extract the envelope of the waveform `wav` (float[samples]), using average pooling
|
50 |
+
with `window` samples and the given `stride`.
|
51 |
+
"""
|
52 |
+
# pos = np.pad(np.maximum(wav, 0), window // 2)
|
53 |
+
wav = np.pad(wav, window // 2)
|
54 |
+
out = []
|
55 |
+
for off in range(0, len(wav) - window, stride):
|
56 |
+
frame = wav[off : off + window]
|
57 |
+
out.append(np.maximum(frame, 0).mean())
|
58 |
+
out = np.array(out)
|
59 |
+
# Some form of audio compressor based on the sigmoid.
|
60 |
+
out = 1.9 * (sigmoid(2.5 * out) - 0.5)
|
61 |
+
return out
|
62 |
+
|
63 |
+
|
64 |
+
def draw_env(envs, out, fg_colors, bg_color, size):
|
65 |
+
"""
|
66 |
+
Internal function, draw a single frame (two frames for stereo) using cairo and save
|
67 |
+
it to the `out` file as png. envs is a list of envelopes over channels, each env
|
68 |
+
is a float[bars] representing the height of the envelope to draw. Each entry will
|
69 |
+
be represented by a bar.
|
70 |
+
"""
|
71 |
+
surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, *size)
|
72 |
+
ctx = cairo.Context(surface)
|
73 |
+
ctx.scale(*size)
|
74 |
+
|
75 |
+
ctx.set_source_rgb(*bg_color)
|
76 |
+
ctx.rectangle(0, 0, 1, 1)
|
77 |
+
ctx.fill()
|
78 |
+
|
79 |
+
K = len(envs) # Number of waves to draw (waves are stacked vertically)
|
80 |
+
T = len(envs[0]) # Numbert of time steps
|
81 |
+
pad_ratio = 0.1 # spacing ratio between 2 bars
|
82 |
+
width = 1.0 / (T * (1 + 2 * pad_ratio))
|
83 |
+
pad = pad_ratio * width
|
84 |
+
delta = 2 * pad + width
|
85 |
+
|
86 |
+
ctx.set_line_width(width)
|
87 |
+
for step in range(T):
|
88 |
+
for i in range(K):
|
89 |
+
half = 0.5 * envs[i][step] # (semi-)height of the bar
|
90 |
+
half /= K # as we stack K waves vertically
|
91 |
+
midrule = (1 + 2 * i) / (2 * K) # midrule of i-th wave
|
92 |
+
ctx.set_source_rgb(*fg_colors[i])
|
93 |
+
ctx.move_to(pad + step * delta, midrule - half)
|
94 |
+
ctx.line_to(pad + step * delta, midrule)
|
95 |
+
ctx.stroke()
|
96 |
+
ctx.set_source_rgba(*fg_colors[i], 0.8)
|
97 |
+
ctx.move_to(pad + step * delta, midrule)
|
98 |
+
ctx.line_to(pad + step * delta, midrule + 0.9 * half)
|
99 |
+
ctx.stroke()
|
100 |
+
|
101 |
+
surface.write_to_png(out)
|
102 |
+
|
103 |
+
|
104 |
+
def interpole(x1, y1, x2, y2, x):
|
105 |
+
return y1 + (y2 - y1) * (x - x1) / (x2 - x1)
|
106 |
+
|
107 |
+
|
108 |
+
def visualize(
|
109 |
+
progress,
|
110 |
+
audio,
|
111 |
+
tmp,
|
112 |
+
out,
|
113 |
+
seek=None,
|
114 |
+
duration=None,
|
115 |
+
rate=60,
|
116 |
+
bars=50,
|
117 |
+
speed=4,
|
118 |
+
time=0.4,
|
119 |
+
oversample=3,
|
120 |
+
fg_color=(0.2, 0.2, 0.2),
|
121 |
+
fg_color2=(0.5, 0.3, 0.6),
|
122 |
+
bg_color=(1, 1, 1),
|
123 |
+
size=(400, 400),
|
124 |
+
stereo=False,
|
125 |
+
):
|
126 |
+
"""
|
127 |
+
Generate the visualisation for the `audio` file, using a `tmp` folder and saving the final
|
128 |
+
video in `out`.
|
129 |
+
`seek` and `durations` gives the extract location if any.
|
130 |
+
`rate` is the framerate of the output video.
|
131 |
+
|
132 |
+
`bars` is the number of bars in the animation.
|
133 |
+
`speed` is the base speed of transition. Depending on volume, actual speed will vary
|
134 |
+
between 0.5 and 2 times it.
|
135 |
+
`time` amount of audio shown at once on a frame.
|
136 |
+
`oversample` higher values will lead to more frequent changes.
|
137 |
+
`fg_color` is the rgb color to use for the foreground.
|
138 |
+
`fg_color2` is the rgb color to use for the second wav if stereo is set.
|
139 |
+
`bg_color` is the rgb color to use for the background.
|
140 |
+
`size` is the `(width, height)` in pixels to generate.
|
141 |
+
`stereo` is whether to create 2 waves.
|
142 |
+
"""
|
143 |
+
try:
|
144 |
+
wav, sr = read_audio(audio, seek=seek, duration=duration)
|
145 |
+
except (IOError, ValueError) as err:
|
146 |
+
raise gr.Error(err)
|
147 |
+
# wavs is a list of wav over channels
|
148 |
+
wavs = []
|
149 |
+
if stereo:
|
150 |
+
assert wav.shape[0] == 2, "stereo requires stereo audio file"
|
151 |
+
wavs.append(wav[0])
|
152 |
+
wavs.append(wav[1])
|
153 |
+
else:
|
154 |
+
wav = wav.mean(0)
|
155 |
+
wavs.append(wav)
|
156 |
+
|
157 |
+
for i, wav in enumerate(wavs):
|
158 |
+
wavs[i] = wav / wav.std()
|
159 |
+
|
160 |
+
window = int(sr * time / bars)
|
161 |
+
stride = int(window / oversample)
|
162 |
+
# envs is a list of env over channels
|
163 |
+
envs = []
|
164 |
+
for wav in wavs:
|
165 |
+
env = envelope(wav, window, stride)
|
166 |
+
env = np.pad(env, (bars // 2, 2 * bars))
|
167 |
+
envs.append(env)
|
168 |
+
|
169 |
+
duration = len(wavs[0]) / sr
|
170 |
+
frames = int(rate * duration)
|
171 |
+
smooth = np.hanning(bars)
|
172 |
+
|
173 |
+
gr.Info("Generating the frames...")
|
174 |
+
for idx in progress(range(frames)):
|
175 |
+
pos = (((idx / rate)) * sr) / stride / bars
|
176 |
+
off = int(pos)
|
177 |
+
loc = pos - off
|
178 |
+
denvs = []
|
179 |
+
for env in envs:
|
180 |
+
env1 = env[off * bars : (off + 1) * bars]
|
181 |
+
env2 = env[(off + 1) * bars : (off + 2) * bars]
|
182 |
+
|
183 |
+
# we want loud parts to be updated faster
|
184 |
+
maxvol = math.log10(1e-4 + env2.max()) * 10
|
185 |
+
speedup = np.clip(interpole(-6, 0.5, 0, 2, maxvol), 0.5, 2)
|
186 |
+
w = sigmoid(speed * speedup * (loc - 0.5))
|
187 |
+
denv = (1 - w) * env1 + w * env2
|
188 |
+
denv *= smooth
|
189 |
+
denvs.append(denv)
|
190 |
+
draw_env(denvs, tmp / f"{idx:06d}.png", (fg_color, fg_color2), bg_color, size)
|
191 |
+
gr.Info("Encoding the animation video...")
|
192 |
+
subprocess.run([
|
193 |
+
"ffmpeg", "-y", "-loglevel", "panic", "-r",
|
194 |
+
str(rate), "-f", "image2", "-s", f"{size[0]}x{size[1]}", "-i", "%06d.png", "-i", audio, "-c:a", "aac", "-vcodec", "libx264", "-crf", "10", "-pix_fmt", "yuv420p",
|
195 |
+
out.resolve()
|
196 |
+
], check=True, cwd=tmp)
|
197 |
+
return out
|
198 |
+
|
199 |
+
|
200 |
+
|
201 |
+
def parse_color(colorstr):
|
202 |
+
"""
|
203 |
+
Given a comma separated rgb(a) colors, returns a 4-tuple of float.
|
204 |
+
"""
|
205 |
+
try:
|
206 |
+
r, g, b = [float(i) for i in colorstr.split(",")]
|
207 |
+
return r, g, b
|
208 |
+
except ValueError:
|
209 |
+
raise gr.Error(
|
210 |
+
"Format for color is 3 floats separated by commas 0.xx,0.xx,0.xx, rgb order"
|
211 |
+
)
|
212 |
+
|
213 |
+
|
214 |
+
def hex_to_rgb(hex_color):
|
215 |
+
hex_color = hex_color.lstrip('#')
|
216 |
+
if len(hex_color) == 3:
|
217 |
+
hex_color = ''.join([c*2 for c in hex_color])
|
218 |
+
return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
|
219 |
+
|
220 |
+
|
221 |
+
def do_viz(
|
222 |
+
inp_aud,
|
223 |
+
inp_bgcolor,
|
224 |
+
inp_color1,
|
225 |
+
inp_nbars,
|
226 |
+
inp_vidw,
|
227 |
+
inp_vidh,
|
228 |
+
progress=gr.Progress(),
|
229 |
+
):
|
230 |
+
with tempfile.TemporaryDirectory() as tmp, tempfile.NamedTemporaryFile(
|
231 |
+
suffix=".mp4",
|
232 |
+
delete=False
|
233 |
+
) as out:
|
234 |
+
return visualize(
|
235 |
+
progress.tqdm,
|
236 |
+
inp_aud,
|
237 |
+
Path(tmp),
|
238 |
+
Path(out.name),
|
239 |
+
bars=inp_nbars,
|
240 |
+
fg_color=hex_to_rgb(inp_color1),
|
241 |
+
bg_color=hex_to_rgb(inp_bgcolor),
|
242 |
+
size=(inp_vidw, inp_vidh),
|
243 |
+
)
|
244 |
+
|
245 |
+
|
246 |
+
import gradio as gr
|
247 |
+
|
248 |
+
ABOUT = """
|
249 |
+
# seewav GUI
|
250 |
+
|
251 |
+
> Have an audio clip but need a video (e.g. for X/Twitter)?
|
252 |
+
|
253 |
+
**Convert audio into a nice video!**
|
254 |
+
|
255 |
+
An online graphical user interface for [seewav](https://github.com/adefossez/seewav).
|
256 |
+
|
257 |
+
Enjoy!
|
258 |
+
"""
|
259 |
+
with gr.Blocks() as demo:
|
260 |
+
gr.Markdown(ABOUT)
|
261 |
+
with gr.Row():
|
262 |
+
with gr.Column():
|
263 |
+
inp_aud = gr.Audio(type='filepath')
|
264 |
+
with gr.Group():
|
265 |
+
inp_color1 = gr.ColorPicker(
|
266 |
+
label="Color",
|
267 |
+
info="Color of the top waveform",
|
268 |
+
value="#00237E",
|
269 |
+
interactive=True,
|
270 |
+
)
|
271 |
+
inp_bgcolor = gr.ColorPicker(
|
272 |
+
label="Background Color",
|
273 |
+
info="Color of the background",
|
274 |
+
value="#000000",
|
275 |
+
interactive=True,
|
276 |
+
)
|
277 |
+
with gr.Accordion("Advanced Configuration", open=False):
|
278 |
+
inp_nbars = gr.Slider(
|
279 |
+
label="Num. Bars",
|
280 |
+
value=50,
|
281 |
+
interactive=True,
|
282 |
+
minimum=2,
|
283 |
+
maximum=500,
|
284 |
+
)
|
285 |
+
inp_vidw = gr.Slider(
|
286 |
+
label="Video Width",
|
287 |
+
value=400,
|
288 |
+
interactive=True,
|
289 |
+
minimum=100,
|
290 |
+
maximum=3000,
|
291 |
+
)
|
292 |
+
inp_vidh = gr.Slider(
|
293 |
+
label="Video Height",
|
294 |
+
value=400,
|
295 |
+
interactive=True,
|
296 |
+
minimum=100,
|
297 |
+
maximum=3000,
|
298 |
+
)
|
299 |
+
inp_go = gr.Button("Visualize", variant="primary")
|
300 |
+
with gr.Column():
|
301 |
+
out_vid = gr.Video(interactive=False)
|
302 |
+
inp_go.click(
|
303 |
+
do_viz,
|
304 |
+
inputs=[
|
305 |
+
inp_aud,
|
306 |
+
inp_bgcolor,
|
307 |
+
inp_color1,
|
308 |
+
inp_nbars,
|
309 |
+
inp_vidw,
|
310 |
+
inp_vidh,
|
311 |
+
],
|
312 |
+
outputs=[out_vid],
|
313 |
+
)
|
314 |
+
demo.queue(api_open=False).launch(show_api=False)
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
ffmpeg
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy
|
2 |
+
pycairo
|
3 |
+
tqdm
|
4 |
+
pydub
|
5 |
+
ffmpeg-python
|
6 |
+
opencv-python
|