Spaces:
Running
Running
File size: 11,644 Bytes
d7ff226 5e5dc03 2fbfeb6 78dd807 d7ff226 6979b00 d7ff226 db19dc1 2c9b6e2 8091650 db19dc1 ece5f12 b8bb35c ece5f12 f5213d4 d7ff226 4305074 d7ff226 db725c9 4305074 db725c9 d7ff226 71ce4d9 088fe02 d7ff226 b536a33 d7ff226 e835e58 d7ff226 5e5dc03 d7ff226 5e5dc03 d7ff226 35abc84 d7ff226 1d44699 d7ff226 e835e58 004ce63 d7ff226 004ce63 d7ff226 7f8a1b2 dc8a999 7f8a1b2 dc8a999 7f8a1b2 dc8a999 7f8a1b2 78dd807 e32c131 78dd807 e7c405a 78dd807 d782c33 7f8a1b2 dc8a999 d782c33 e7c405a d782c33 e7c405a 78dd807 dc8a999 78dd807 1d44699 d7ff226 78dd807 d7ff226 5e5dc03 d7ff226 78dd807 d7ff226 1d44699 d7ff226 6a8f4ca 1d44699 088fe02 06f1ea0 d7ff226 b536a33 d7ff226 5e5dc03 d7ff226 5e5dc03 78dd807 d7ff226 d6584d2 5e5dc03 e835e58 d6584d2 e835e58 d7ff226 2c9b6e2 ca1a3de d6584d2 cc2fd5e d7ff226 e7c405a d782c33 9b7f939 d782c33 d7ff226 011064a e835e58 004ce63 011064a e6606d5 2fbfeb6 c859ab7 d7ff226 e835e58 d7ff226 5ecacd8 011064a e6606d5 974e529 450a3f3 d7ff226 2166fcd d7ff226 2976e81 d7ff226 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 |
import gradio as gr
import librosa
from PIL import Image, ImageDraw, ImageFont
from mutagen.mp3 import MP3
from mutagen.id3 import ID3, APIC, TIT2, TPE1
import io
from colorthief import ColorThief
import colorsys
import math
import os
from multiprocessing import Pool, cpu_count
import tempfile
import ffmpeg
import subprocess
import traceback
import time
import shutil
import LRC
import LRC2SRT
path = "" # Update with your path
def safe_read(i: int, a: list):
if i >= len(a):
return 128
else:
return a[i]
def getTrigger(ad: int, a: list, max: int = 1024) -> int:
i = ad
while not (a[i] < 126 and not a[i + 4] < 130 or i - ad > max):
i += 1
return i
def getRenderCords(ta: list, idx: int, res: int = 1024, size: tuple = (1280, 720)) -> list:
i = idx - res // 2
x, y = size[0] * .9 / -2, (ta[i] - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
c = []
while i < idx + (res // 2):
c.append((x, y))
i += 1
y = (safe_read(i,ta) - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
x += (size[0] * .9) / res
return c
def center_to_top_left(coords, width=1280, height=720):
new_coords = []
for x, y in coords:
new_coords.append(totopleft((x, y), width=width, height=height))
return new_coords
def totopleft(coord, width=1280, height=720):
return coord[0] + width / 2, height / 2 - coord[1]
def getTrigger(ad: int, a: list, max: int = 1024) -> int:
i = ad
while not (safe_read(i,a) < 124 and safe_read(i+2,a) < 128 or i - ad > max):
i += 1
return i
def extract_cover_image(mp3_file):
audio = MP3(mp3_file, ID3=ID3)
if audio.tags == None:
return -1
for tag in audio.tags.values():
if isinstance(tag, APIC):
image_data = tag.data
cover_image = Image.open(io.BytesIO(image_data))
return cover_image
print("No cover image found in the MP3 file.")
return None
def getTitleAndArtist(mp3_file):
audio = MP3(mp3_file, ID3=ID3)
title = audio.get('TIT2', TIT2(encoding=3, text='Unknown Title')).text[0]
artist = audio.get('TPE1', TPE1(encoding=3, text='Unknown Artist')).text[0]
return title, artist
def getColour(img):
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
img.save(tmpfile.name, format="PNG")
color_thief = ColorThief(tmpfile.name)
dominant_color = color_thief.get_color(quality=1)
os.remove(tmpfile.name)
return dominant_color
def clamp(number):
return max(0, min(number, 1))
def normalizeColour(C) -> tuple[int, int, int]:
cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.3 * cc[1]), .8)
return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)
def normalizeColourBar(C) -> tuple[int, int, int]:
cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.4 * cc[1]), .6)
return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)
def stamp_text(draw, text, font, position, align='left'):
text_bbox = draw.textbbox((0, 0), text, font=font)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
x, y = position
y -= text_height // 2
if align == 'center':
x -= text_width // 2
elif align == 'right':
x -= text_width
draw.text((x, y), text, font=font, fill="#fff")
def linear_interpolate(start, stop, progress):
return start + progress * (stop - start)
def filecount(p):
files = os.listdir()
file_count = len(files)
return file_count
def render_frame(params):
n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres, sr = params
num_frames = len(samples_array) // (sr // fps)
img = Image.new('RGB', (width, height), normalizeColour(dominant_color))
d = ImageDraw.Draw(img)
s = (sr // fps) * n
if s > len(samples_array):
return
e = center_to_top_left(getRenderCords(samples_array, getTrigger(s, samples_array, max=oscres),res=oscres,size=(width, height)), width=width, height=height)
d.line(e, fill='#fff', width=2)
cs = math.floor(min(width, height) / 2)
cov = cover_img.resize((cs, cs))
img.paste(cov, (((width // 2) - cs // 2), math.floor(height * .1)))
fontT = ImageFont.truetype(path+'Lexend-Bold.ttf', 50*(min(width, height)/720)//1)
fontA = ImageFont.truetype(path+'Lexend-Bold.ttf', 40*(min(width, height)/720)//1)
fontD = ImageFont.truetype(path+'SpaceMono-Bold.ttf', 30*(min(width, height)/720)//1)
stamp_text(d, title, fontT, totopleft((0, min(width, height) * .3 // -2), width=width, height=height), 'center')
stamp_text(d, artist, fontA, totopleft((0, min(width, height) * .44 // -2), width=width, height=height), 'center')
d.line(center_to_top_left([(width * .96 // -2, height * .95 // -2), (width * .96 // 2, height * .95 // -2)], width=width, height=height),
fill=normalizeColourBar(dominant_color), width=15 * height // 360)
d.line(center_to_top_left([(width * .95 // -2, height * .95 // -2),
(linear_interpolate(width * .95 // -2, width * .95 // 2, s / len(samples_array)),
height * .95 // -2)],width=width, height=height), fill='#fff', width=10 * height // 360)
img.save(path+f'out/{name}/{str(n)}.png', 'PNG',)
return 1 # Indicate one frame processed
def RenderVid(af, n, fps=30):
(ffmpeg
.input(path+f'out/{n}/%d.png', framerate=fps)
.input(af)
.output(n + '.mp4', vcodec='libx264', r=fps, pix_fmt='yuv420p', acodec='aac', shortest=None)
.run()
)
gr.Interface.download(f"{n}.mp4")
invisible_chars = ["\u200B", "\uFEFF"]
def remove_bom(data: str) -> str:
BOM = '\ufeff'
return data.lstrip(BOM)
def stripinvisibles(s):
e = remove_bom(s)
for i in invisible_chars:
e.replace(i,"")
return e
def main(file, name, fps=30, res: tuple=(1280,720), oscres=512, sr=11025, lyrics=None):
p = gr.Progress()
LRC2SRT.clear()
if os.path.exists("out.srt"):
os.remove("out.srt")
haslyrics = False
if lyrics:
p(0.5,"parsing lyrics")
try:
outf = open("out.srt",mode="x", encoding="UTF8")
sf = stripinvisibles(open(lyrics, encoding="UTF8").read())
print(sf[0])
if sf[0] == '[':
gr.Info("Lyrics of LRC type was detected, converting to SRT")
LRC2SRT.convert_to_srt(sf)
outf.write('\n'.join(LRC2SRT.SRT))
haslyrics = True
elif sf[0].isdigit():
outf.write(sf)
gr.Info("Lyrics of SRT type was detected")
haslyrics = True
else:
gr.Warning("Lyrics file is invalid, skipping")
except Exception as e:
print(traceback.format_exc())
gr.Warning("Failed to parse lyrics, ensure there are no blank lines in between and invisible characters")
os.makedirs(path+f'out/{name}/', exist_ok=True)
global iii
iii = 0
# Load the audio file
p(0.25,"loading file")
audio_path = file
y, sr = librosa.load(audio_path, sr=sr) # Resample to 11025 Hz
y_u8 = (y * 128 + 128).astype('uint8')
samples_array = y_u8.tolist()
p(0.5,"extracting metadata")
# Extract cover image, title, and artist
cover_img = extract_cover_image(audio_path)
if cover_img is None:
raise gr.Error("Mp3 must have a cover image")
return # Exit if no cover image found
elif cover_img == -1:
raise gr.Error("Mp3 is missing tags")
return
title, artist = getTitleAndArtist(audio_path)
if title == 'Unknown Title' or artist == 'Unknown Artist':
gr.Warning('Missing Title or Artist')
dominant_color = getColour(cover_img)
# Frame rendering parameters
width, height, fps = res[0], res[1], fps
num_frames = len(samples_array) // (sr // fps)
# Prepare parameters for each frame
params = [(n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres, sr) for n in range(num_frames)]
try:
with Pool(cpu_count()) as pool:
num_frames = len(samples_array) // (sr // fps)
# Use imap to get progress updates
for _ in pool.imap_unordered(render_frame, params):
iii += 1 # Increment frame count for progress
p((iii,num_frames),desc="Rendering Frames")
except Exception as e:
print('Ended in error: ' + traceback.format_exc(), iii)
#gr.Info("Rendering had errored, this typically an out of range error")
p = gr.Progress()
p(0.5,desc="Compiling video")
print('FFMPEG')
if haslyrics:
ffmpeg_cmd = [
"ffmpeg",
'-framerate', '30',
'-i', path + f'out/{name}/%d.png', # Input PNG images
'-i', file, # Input MP3 audio
'-i', path + 'out.srt', # Input SRT subtitles
'-c:v', 'libx264',
'-r', '30',
'-pix_fmt', 'yuv420p',
'-c:a', 'aac',
'-c:s', 'mov_text', # Use mov_text codec for subtitles
'-y',
path + f'{name}.mp4' # Output MP4 filename
]
else:
ffmpeg_cmd = [
"ffmpeg",
'-framerate', '30',
'-i', path+f'out/{name}/%d.png', # Input PNG images
'-i', f'{file}', # Input MP3 audio
'-c:v', 'libx264',
'-r', '30',
'-pix_fmt', 'yuv420p',
'-c:a', 'aac',
'-y',
path+f'{name}.mp4' # Output MP4 filename
]
subprocess.run(ffmpeg_cmd)
def gradio_interface(audio_file, lyrics, output_name, fps=30, vidwidth=1280, vidheight=720, oscres=512, sr=11025):
resolution = f"{vidwidth}x{vidheight}"
res = tuple(map(int, resolution.split('x')))
main(audio_file, output_name, fps=fps, res=res, oscres=oscres, sr=sr, lyrics=lyrics)
time.sleep(5)
shutil.rmtree("out")
return f"{output_name}.mp4"
# Define Gradio interface with progress bar
iface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.components.File(label="Upload your MP3 file", file_count='single', file_types=['mp3']),
gr.components.File(label="(Optional) Upload Lyrics as LRC or SRT", file_count='single', file_types=['lrc','srt']),
gr.components.Textbox(label="Output Video Name", value='video'),
gr.components.Slider(label="Frames per Second", minimum=20, maximum=60, step=1, value=30),
gr.components.Slider(label="Output Video Width", minimum=100, maximum=2000, value=1280, step=2),
gr.components.Slider(label="Output Video Height", minimum=100, maximum=2000, value=720, step=2),
gr.components.Slider(label="Number of Visualization Segments", minimum=256, maximum=2048, step=2, value=512),
#gr.components.Slider(label="Scope Sample Rate", minimum=8000, maximum=44100, step=5, value=11025)
],
outputs=gr.components.Video(label="Output"),
title="MP3 to Video Visualization",
description=""" Upload an MP3 file and configure parameters to create a visualization video.
Optionally upload a word or line synced lyric file
Ensure a blank line at the end to avoid conversion errors"""
)
# Launch Gradio interface
iface.launch()
|