File size: 11,644 Bytes
d7ff226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e5dc03
2fbfeb6
78dd807
 
d7ff226
6979b00
d7ff226
db19dc1
2c9b6e2
8091650
db19dc1
 
 
ece5f12
 
b8bb35c
ece5f12
 
f5213d4
d7ff226
 
 
 
 
 
 
4305074
d7ff226
 
 
 
 
 
 
 
 
 
 
 
db725c9
 
4305074
db725c9
 
d7ff226
 
 
71ce4d9
 
088fe02
d7ff226
 
 
 
 
 
 
 
 
 
 
 
b536a33
 
d7ff226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e835e58
 
 
 
 
d7ff226
5e5dc03
 
d7ff226
 
 
5e5dc03
d7ff226
 
 
 
 
 
 
 
 
35abc84
 
 
d7ff226
 
 
 
 
 
 
 
 
 
1d44699
d7ff226
 
e835e58
004ce63
d7ff226
 
 
 
 
 
 
004ce63
d7ff226
7f8a1b2
 
dc8a999
 
 
 
7f8a1b2
dc8a999
7f8a1b2
dc8a999
 
7f8a1b2
78dd807
 
e32c131
78dd807
 
e7c405a
78dd807
 
 
d782c33
7f8a1b2
dc8a999
d782c33
 
 
 
e7c405a
d782c33
 
 
e7c405a
78dd807
 
dc8a999
 
 
78dd807
1d44699
d7ff226
 
 
78dd807
d7ff226
5e5dc03
d7ff226
 
78dd807
d7ff226
 
 
1d44699
d7ff226
6a8f4ca
1d44699
088fe02
06f1ea0
d7ff226
b536a33
 
 
d7ff226
 
 
 
5e5dc03
d7ff226
 
5e5dc03
78dd807
d7ff226
 
d6584d2
5e5dc03
e835e58
 
 
d6584d2
 
e835e58
d7ff226
2c9b6e2
ca1a3de
d6584d2
cc2fd5e
d7ff226
e7c405a
d782c33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b7f939
d782c33
 
d7ff226
 
 
011064a
e835e58
004ce63
011064a
e6606d5
2fbfeb6
 
c859ab7
d7ff226
e835e58
d7ff226
 
 
5ecacd8
011064a
e6606d5
974e529
 
 
 
450a3f3
d7ff226
2166fcd
d7ff226
2976e81
 
 
d7ff226
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
import gradio as gr
import librosa
from PIL import Image, ImageDraw, ImageFont
from mutagen.mp3 import MP3
from mutagen.id3 import ID3, APIC, TIT2, TPE1
import io
from colorthief import ColorThief
import colorsys
import math
import os
from multiprocessing import Pool, cpu_count
import tempfile
import ffmpeg
import subprocess
import traceback
import time
import shutil
import LRC
import LRC2SRT

path = ""  # Update with your path

def safe_read(i: int, a: list):
    if i >= len(a):
        return 128
    else:
        return a[i]

def getTrigger(ad: int, a: list, max: int = 1024) -> int:
    i = ad
    while not (a[i] < 126 and not a[i + 4] < 130 or i - ad > max):
        i += 1
    return i

def getRenderCords(ta: list, idx: int, res: int = 1024, size: tuple = (1280, 720)) -> list:
    i = idx - res // 2
    x, y = size[0] * .9 / -2, (ta[i] - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
    c = []
    while i < idx + (res // 2):
        c.append((x, y))
        i += 1
        y = (safe_read(i,ta) - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
        x += (size[0] * .9) / res
    return c

def center_to_top_left(coords, width=1280, height=720):
    new_coords = []
    for x, y in coords:
        new_coords.append(totopleft((x, y), width=width, height=height))
    return new_coords

def totopleft(coord, width=1280, height=720):
    return coord[0] + width / 2, height / 2 - coord[1]

def getTrigger(ad: int, a: list, max: int = 1024) -> int:
    i = ad
    while not (safe_read(i,a) < 124 and safe_read(i+2,a) < 128 or i - ad > max):
        i += 1
    return i

def extract_cover_image(mp3_file):
    audio = MP3(mp3_file, ID3=ID3)
    if audio.tags == None:
        
        return -1
    for tag in audio.tags.values():
        if isinstance(tag, APIC):
            image_data = tag.data
            cover_image = Image.open(io.BytesIO(image_data))
            return cover_image
    print("No cover image found in the MP3 file.")
    return None

def getTitleAndArtist(mp3_file):
    audio = MP3(mp3_file, ID3=ID3)
    title = audio.get('TIT2', TIT2(encoding=3, text='Unknown Title')).text[0]
    artist = audio.get('TPE1', TPE1(encoding=3, text='Unknown Artist')).text[0]
        
    
    return title, artist

def getColour(img):
    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
        img.save(tmpfile.name, format="PNG")
        color_thief = ColorThief(tmpfile.name)
        dominant_color = color_thief.get_color(quality=1)
    os.remove(tmpfile.name)
    return dominant_color

def clamp(number):
    return max(0, min(number, 1))

def normalizeColour(C) -> tuple[int, int, int]:
    cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
    ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.3 * cc[1]), .8)
    return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)

def normalizeColourBar(C) -> tuple[int, int, int]:
    cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
    ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.4 * cc[1]), .6)
    return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)

def stamp_text(draw, text, font, position, align='left'):
    text_bbox = draw.textbbox((0, 0), text, font=font)
    text_width = text_bbox[2] - text_bbox[0]
    text_height = text_bbox[3] - text_bbox[1]
    x, y = position
    y -= text_height // 2
    if align == 'center':
        x -= text_width // 2
    elif align == 'right':
        x -= text_width

    draw.text((x, y), text, font=font, fill="#fff")

def linear_interpolate(start, stop, progress):
    return start + progress * (stop - start)

def filecount(p):
    files = os.listdir()
    file_count = len(files)
    return file_count

def render_frame(params):
    n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres, sr = params
    num_frames = len(samples_array) // (sr // fps)
    img = Image.new('RGB', (width, height), normalizeColour(dominant_color))
    d = ImageDraw.Draw(img)

    s = (sr // fps) * n
    if s > len(samples_array): 
        return
    e = center_to_top_left(getRenderCords(samples_array, getTrigger(s, samples_array, max=oscres),res=oscres,size=(width, height)), width=width, height=height)
    d.line(e, fill='#fff', width=2)

    cs = math.floor(min(width, height) / 2)
    cov = cover_img.resize((cs, cs))
    img.paste(cov, (((width // 2) - cs // 2), math.floor(height * .1)))

    fontT = ImageFont.truetype(path+'Lexend-Bold.ttf', 50*(min(width, height)/720)//1) 
    fontA = ImageFont.truetype(path+'Lexend-Bold.ttf', 40*(min(width, height)/720)//1) 
    fontD = ImageFont.truetype(path+'SpaceMono-Bold.ttf', 30*(min(width, height)/720)//1) 

    stamp_text(d, title, fontT, totopleft((0, min(width, height) * .3 // -2), width=width, height=height), 'center')
    stamp_text(d, artist, fontA, totopleft((0, min(width, height) * .44 // -2), width=width, height=height), 'center')

    d.line(center_to_top_left([(width * .96 // -2, height * .95 // -2), (width * .96 // 2, height * .95 // -2)], width=width, height=height),
           fill=normalizeColourBar(dominant_color), width=15 * height // 360)
    d.line(center_to_top_left([(width * .95 // -2, height * .95 // -2),
                               (linear_interpolate(width * .95 // -2, width * .95 // 2, s / len(samples_array)),
                                height * .95 // -2)],width=width, height=height), fill='#fff', width=10 * height // 360)

    
    img.save(path+f'out/{name}/{str(n)}.png', 'PNG',)

    return 1  # Indicate one frame processed

def RenderVid(af, n, fps=30):
    (ffmpeg 
     .input(path+f'out/{n}/%d.png', framerate=fps) 
     .input(af) 
     .output(n + '.mp4', vcodec='libx264', r=fps, pix_fmt='yuv420p', acodec='aac', shortest=None) 
     .run()
     )
    gr.Interface.download(f"{n}.mp4")

invisible_chars = ["\u200B", "\uFEFF"]

def remove_bom(data: str) -> str:
    BOM = '\ufeff'
    return data.lstrip(BOM)

def stripinvisibles(s):
    e = remove_bom(s)
    for i in invisible_chars:
        e.replace(i,"")
    return e

def main(file, name, fps=30, res: tuple=(1280,720), oscres=512, sr=11025, lyrics=None):
    p = gr.Progress()
    LRC2SRT.clear()
    if os.path.exists("out.srt"):
        os.remove("out.srt")
    haslyrics = False
    if lyrics:
        p(0.5,"parsing lyrics")
        try:
            outf = open("out.srt",mode="x", encoding="UTF8")
            sf = stripinvisibles(open(lyrics, encoding="UTF8").read())
            print(sf[0])
            if sf[0] == '[':
                gr.Info("Lyrics of LRC type was detected, converting to SRT")
                LRC2SRT.convert_to_srt(sf)
                outf.write('\n'.join(LRC2SRT.SRT))
                haslyrics = True
            elif sf[0].isdigit():
                outf.write(sf)
                gr.Info("Lyrics of SRT type was detected")
                haslyrics = True
            else:
                gr.Warning("Lyrics file is invalid, skipping")
        except Exception as e:
            print(traceback.format_exc())
            gr.Warning("Failed to parse lyrics, ensure there are no blank lines in between and invisible characters")
        
    os.makedirs(path+f'out/{name}/', exist_ok=True)
    global iii
    iii = 0
    # Load the audio file
    p(0.25,"loading file")
    audio_path = file
    y, sr = librosa.load(audio_path, sr=sr)  # Resample to 11025 Hz
    y_u8 = (y * 128 + 128).astype('uint8')
    samples_array = y_u8.tolist()
    p(0.5,"extracting metadata")
    # Extract cover image, title, and artist
    cover_img = extract_cover_image(audio_path)
    if cover_img is None:
        raise gr.Error("Mp3 must have a cover image")
        return  # Exit if no cover image found
    elif cover_img == -1:
        raise gr.Error("Mp3 is missing tags")
        return
        

    title, artist = getTitleAndArtist(audio_path) 
    if title == 'Unknown Title' or artist == 'Unknown Artist':
        gr.Warning('Missing Title or Artist')
    dominant_color = getColour(cover_img)

    # Frame rendering parameters
    width, height, fps = res[0], res[1], fps
    num_frames = len(samples_array) // (sr // fps)

    # Prepare parameters for each frame
    params = [(n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres, sr) for n in range(num_frames)]
    
    try:
        with Pool(cpu_count()) as pool:
            
            num_frames = len(samples_array) // (sr // fps)
            # Use imap to get progress updates
            for _ in pool.imap_unordered(render_frame, params):
                iii += 1  # Increment frame count for progress
                p((iii,num_frames),desc="Rendering Frames")
                

    except Exception as e:
        print('Ended in error: ' + traceback.format_exc(), iii)
        #gr.Info("Rendering had errored, this typically an out of range error")
    p = gr.Progress()
    p(0.5,desc="Compiling video")
    print('FFMPEG')
    if haslyrics:
        ffmpeg_cmd = [
            "ffmpeg",
            '-framerate', '30',
            '-i', path + f'out/{name}/%d.png',  # Input PNG images
            '-i', file,  # Input MP3 audio
            '-i', path + 'out.srt',  # Input SRT subtitles
            '-c:v', 'libx264',
            '-r', '30',
            '-pix_fmt', 'yuv420p',
            '-c:a', 'aac',
            '-c:s', 'mov_text',  # Use mov_text codec for subtitles
            '-y',
            path + f'{name}.mp4'  # Output MP4 filename
        ]
    else:
        ffmpeg_cmd = [
            "ffmpeg",
            '-framerate', '30',
            '-i', path+f'out/{name}/%d.png',  # Input PNG images
            '-i', f'{file}',              # Input MP3 audio
            '-c:v', 'libx264',
            '-r', '30',
            '-pix_fmt', 'yuv420p',
            '-c:a', 'aac', 
            '-y',
            path+f'{name}.mp4'  # Output MP4 filename
    ]
    subprocess.run(ffmpeg_cmd)

def gradio_interface(audio_file, lyrics, output_name, fps=30, vidwidth=1280, vidheight=720, oscres=512, sr=11025):
    resolution = f"{vidwidth}x{vidheight}"
    res = tuple(map(int, resolution.split('x')))
    main(audio_file, output_name, fps=fps, res=res, oscres=oscres, sr=sr, lyrics=lyrics)
    time.sleep(5)
    
    shutil.rmtree("out")
    return f"{output_name}.mp4"

# Define Gradio interface with progress bar
iface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.components.File(label="Upload your MP3 file", file_count='single', file_types=['mp3']),
        gr.components.File(label="(Optional) Upload Lyrics as LRC or SRT", file_count='single', file_types=['lrc','srt']),
        gr.components.Textbox(label="Output Video Name", value='video'),
        gr.components.Slider(label="Frames per Second", minimum=20, maximum=60, step=1, value=30),
        gr.components.Slider(label="Output Video Width", minimum=100, maximum=2000, value=1280, step=2),
        gr.components.Slider(label="Output Video Height", minimum=100, maximum=2000, value=720, step=2),
        gr.components.Slider(label="Number of Visualization Segments", minimum=256, maximum=2048, step=2, value=512),
        #gr.components.Slider(label="Scope Sample Rate", minimum=8000, maximum=44100, step=5, value=11025)
    ],
    outputs=gr.components.Video(label="Output"),
    title="MP3 to Video Visualization",
    description=""" Upload an MP3 file and configure parameters to create a visualization video.
                    Optionally upload a word or line synced lyric file
                    Ensure a blank line at the end to avoid conversion errors"""
)

# Launch Gradio interface
iface.launch()