Spaces:

ziqiangao
/

musicscopegen

Running

App Files Files Community

ziqiangao commited on Aug 7, 2024

Commit

e50325c

1 Parent(s): 6ecdd65

add progress tracking for ffmpeg

Browse files

Files changed (1) hide show

app.py +129 -56

app.py CHANGED Viewed

@@ -16,43 +16,51 @@ import traceback
 import shutil
 import LRC2SRT
 import sys
-flag = 1
 path = ""  # Update with your path
 def safe_read(i: int, a: list):
     if i >= len(a):
         return 128
     else:
         return a[i]
 def getRenderCords(ta: list, idx: int, res: int = 1024, size: tuple = (1280, 720)) -> list:
     i = idx - res // 2
-    x, y = size[0] * .9 / -2, (safe_read(i,ta) - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
     c = []
     while i < idx + (res // 2):
         c.append((x, y))
         i += 1
-        y = (safe_read(i,ta) - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
         x += (size[0] * .9) / res
     return c
 def center_to_top_left(coords, width=1280, height=720):
     new_coords = []
     for x, y in coords:
         new_coords.append(totopleft((x, y), width=width, height=height))
     return new_coords
 def totopleft(coord, width=1280, height=720):
     return coord[0] + width / 2, height / 2 - coord[1]
 def getTrigger(ad: int, a: list, max: int = 1024) -> int:
     i = ad
-    while not (safe_read(i,a) < 128 and safe_read(i+6,a) < 128 or i - ad > max):
         i += 1
     return i
 def extract_cover_image(mp3_file):
     audio = MP3(mp3_file, ID3=ID3)
     if audio.tags == None:
@@ -65,12 +73,14 @@ def extract_cover_image(mp3_file):
     print("No cover image found in the MP3 file.")
     return None
 def getTitleAndArtist(mp3_file):
     audio = MP3(mp3_file, ID3=ID3)
     title = audio.get('TIT2', TIT2(encoding=3, text='')).text[0]
     artist = audio.get('TPE1', TPE1(encoding=3, text='')).text[0]
     return title, artist
 def getColour(img):
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
         img.save(tmpfile.name, format="PNG")
@@ -79,19 +89,23 @@ def getColour(img):
     os.remove(tmpfile.name)
     return dominant_color
 def clamp(number):
     return max(0, min(number, 1))
-def normalizeColour(C) -> tuple[int, int, int]:
     cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
     ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.3 * cc[1]), .8)
     return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)
-def normalizeColourBar(C) -> tuple[int, int, int]:
     cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
     ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.4 * cc[1]), .6)
     return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)
 def stamp_text(draw, text, font, position, align='left'):
     text_bbox = draw.textbbox((0, 0), text, font=font)
     text_width = text_bbox[2] - text_bbox[0]
@@ -105,14 +119,17 @@ def stamp_text(draw, text, font, position, align='left'):
     draw.text((x, y), text, font=font, fill="#fff")
 def linear_interpolate(start, stop, progress):
     return start + progress * (stop - start)
 def filecount(p):
     files = os.listdir()
     file_count = len(files)
     return file_count
 def render_frame(params):
     n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres, sr = params
     num_frames = len(samples_array) // (sr // fps)
@@ -120,71 +137,89 @@ def render_frame(params):
     d = ImageDraw.Draw(img)
     s = math.floor((sr / fps) * n)
-    e = center_to_top_left(getRenderCords(samples_array, getTrigger(s, samples_array, max=oscres),res=oscres,size=(width, height)), width=width, height=height)
-    d.line(e, fill='#fff', width=2)
     cs = math.floor(min(width, height) / 2)
     cov = cover_img.resize((cs, cs))
     img.paste(cov, (((width // 2) - cs // 2), math.floor(height * .1)))
-    fontT = ImageFont.truetype(path+'Lexend-Bold.ttf', 50*(min(width, height)/720)//1)
-    fontA = ImageFont.truetype(path+'Lexend-Bold.ttf', 40*(min(width, height)/720)//1)
-    fontD = ImageFont.truetype(path+'SpaceMono-Bold.ttf', 30*(min(width, height)/720)//1)
-    stamp_text(d, title, fontT, totopleft((0, min(width, height) * .3 // -2), width=width, height=height), 'center')
-    stamp_text(d, artist, fontA, totopleft((0, min(width, height) * .44 // -2), width=width, height=height), 'center')
     d.line(center_to_top_left([(width * .96 // -2, height * .95 // -2), (width * .96 // 2, height * .95 // -2)], width=width, height=height),
            fill=normalizeColourBar(dominant_color), width=15 * height // 360)
     d.line(center_to_top_left([(width * .95 // -2, height * .95 // -2),
                                (linear_interpolate(width * .95 // -2, width * .95 // 2, s / len(samples_array)),
-                                height * .95 // -2)],width=width, height=height), fill='#fff', width=10 * height // 360)
     img.save(path+f'out/{name}/{str(n)}.png', 'PNG')
     return 1  # Indicate one frame processed
 def RenderVid(af, n, fps=30):
-    (ffmpeg
-     .input(path+f'out/{n}/%d.png', framerate=fps)
-     .input(af)
-     .output(n + '.mp4', vcodec='libx264', r=fps, pix_fmt='yuv420p', acodec='aac', shortest=None)
      .run()
      )
     gr.Interface.download(f"{n}.mp4")
 invisible_chars = ["\u200B", "\uFEFF"]
 def remove_bom(data: str) -> str:
     BOM = '\ufeff'
     return data.lstrip(BOM)
 def stripinvisibles(s):
     e = remove_bom(s)
     for i in invisible_chars:
-        e.replace(i,"")
     return e
 def start_progress(title):
     global progress_x
-    sys.stdout.write(title + ": [" + "-"*40 + "]" + chr(8)*41)
     sys.stdout.flush()
     progress_x = 0
 def progress(x):
     global progress_x
     x = int(x * 40 // 100)
     sys.stdout.write("#" * (x - progress_x))
     sys.stdout.flush()
     progress_x = x
 def end_progress():
-    sys.stdout.write("#" * (40 - progress_x) + "]\n")
     sys.stdout.flush()
 haslyrics = False
-def main(file, name, fps=30, res: tuple=(1280,720), oscres=512, sr=11025, lyrics=None, img=None, tit=None, ast=None):
     global flag
     p = gr.Progress()
     LRC2SRT.clear()
@@ -210,8 +245,9 @@ def main(file, name, fps=30, res: tuple=(1280,720), oscres=512, sr=11025, lyrics
                 gr.Warning("Lyrics file is invalid, skipping")
         except Exception as e:
             print(traceback.format_exc())
-            gr.Warning("Failed to parse lyrics, ensure there are no blank lines in between, you may use Lyrics Editor to ensure compatability")
     os.makedirs(path + f'out/{name}/', exist_ok=True)
     global iii
     iii = 0
@@ -233,11 +269,13 @@ def main(file, name, fps=30, res: tuple=(1280,720), oscres=512, sr=11025, lyrics
     if img:
         cover_img = cover_file
     if cover_img is None:
-        raise gr.Error("Mp3 must have a cover image, upload the image under the 'Metadata' section", duration=None)
     elif cover_img == -1 and not (tit or ast or img):
-        raise gr.Error("Mp3 is missing tags, add the info under the 'Metadata' section", duration=None)
-    title, artist = getTitleAndArtist(audio_path)
     if tit and ast:
         title, artist = tit, ast
     if title == '' or artist == '':
@@ -254,9 +292,10 @@ def main(file, name, fps=30, res: tuple=(1280,720), oscres=512, sr=11025, lyrics
     num_frames = len(samples_array) // (sr // fps)
     # Prepare parameters for each frame
-    params = [(n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres, sr) for n in range(num_frames)]
     print('---------------------------------------------------------')
-    print('Info:')
     print("Title: " + title)
     print("Artist: " + artist)
     print(f'Resolution: {str(width)}x{str(height)}')
@@ -265,19 +304,23 @@ def main(file, name, fps=30, res: tuple=(1280,720), oscres=512, sr=11025, lyrics
     print('Frame Count: ' + str(num_frames))
     print('Segments per frame: ' + str(oscres))
     print('---------------------------------------------------------')
     try:
-        with Pool(cpu_count()) as pool:
             num_frames = len(samples_array) // (sr // fps)
             # Use imap to get progress updates
             for _ in pool.imap_unordered(render_frame, params):
                 iii += 1  # Increment frame count for progress
                 p((iii, num_frames), desc="Rendering Frames")
     except Exception as e:
         raise gr.Error("Something went wrong whilst rendering")
     p = gr.Progress()
-    p(0.5, desc="Compiling video")
     print('---------------------------------------------------------')
     print('FFMPEG')
     if haslyrics:
@@ -304,14 +347,32 @@ def main(file, name, fps=30, res: tuple=(1280,720), oscres=512, sr=11025, lyrics
             '-c:v', 'libx264',
             '-r', str(fps),
             '-pix_fmt', 'yuv420p',
-            '-c:a', 'aac',
             '-y',
             path + f'{name}.mp4'  # Output MP4 filename
         ]
-    subprocess.run(ffmpeg_cmd)
     print('---------------------------------------------------------')
     return f"{name}.mp4", haslyrics
 def gradio_interface(audio_file, lyrics, output_name, fps=30, vidwidth=1280, vidheight=720, oscres=512, img=None, tit=None, ast=None):
     if audio_file is None:
         raise gr.Error("Please Provide an Audio File")
@@ -320,48 +381,59 @@ def gradio_interface(audio_file, lyrics, output_name, fps=30, vidwidth=1280, vid
     resolution = f"{vidwidth}x{vidheight}"
     res = tuple(map(int, resolution.split('x')))
-    video_file, haslyrics = main(audio_file, output_name, fps=fps, res=res, oscres=oscres, lyrics=lyrics, img=img, tit=tit, ast=ast)
     # Clean up the temporary file
-    os.remove(audio_file)
     shutil.rmtree("out")
     srt_output = "out.srt" if haslyrics else None
     return video_file, srt_output, haslyrics
 def update_srt_output_visibility(haslyrics):
     return gr.update(visible=haslyrics)
 with gr.Blocks() as demo:
-    gr.Markdown('Upload an MP3 file and configure parameters to create a visualization video.')
-    gr.Markdown('Optionally upload a word or line synced lyric file in the advanced section.')
     with gr.Row():
         # Inputs on the left
         with gr.Column():
             with gr.Accordion(label="Audio Settings", open=True):
                 gr.Markdown('## Load your mp3 file here')
-                audio_file = gr.File(label="Upload your MP3 file", file_count='single', file_types=['mp3'])
             with gr.Accordion(label="Mp3 Metadata", open=False):
-                gr.Markdown('## Add Metadata here if your mp3 does not have one')
                 cover_img = gr.Image(label='Cover Art', type="filepath")
                 title_input = gr.Textbox(label='Title')
                 artist_input = gr.Textbox(label='Artists')
             with gr.Accordion(label="Video Output Settings", open=False):
                 gr.Markdown('## Configure Video Output Here')
-                output_name = gr.Textbox(label="Output Video Name", value='Output')
-                fps_slider = gr.Slider(label="Frames per Second", minimum=20, maximum=60, step=1, value=30)
-                vidwidth_slider = gr.Slider(label="Output Video Width", minimum=100, maximum=2000, value=1280, step=2)
-                vidheight_slider = gr.Slider(label="Output Video Height", minimum=100, maximum=2000, value=720, step=2)
             with gr.Accordion(label="Advanced Options", open=False):
-                oscres_slider = gr.Slider(label="Number of Visualization Segments", minimum=256, maximum=2048, step=2, value=1024)
-                gr.Markdown('If uploading LRC, ensure a blank timed line at the end to avoid conversion errors')
-                lyrics_file = gr.File(label="(Optional) Upload Lyrics as LRC or SRT", file_count='single', file_types=['lrc', 'srt'])
             # Add a submit button
             submit_btn = gr.Button("Generate Video")
@@ -375,10 +447,11 @@ with gr.Blocks() as demo:
     # Bind the button to the function
     submit_btn.click(
         fn=gradio_interface,
-        inputs=[audio_file, lyrics_file, output_name, fps_slider, vidwidth_slider, vidheight_slider, oscres_slider, cover_img, title_input, artist_input],
         outputs=[output_video, srt_output]
     )
 # Launch Gradio interface
 if __name__ == '__main__':
-    demo.launch()

 import shutil
 import LRC2SRT
 import sys
+import re
+flag = 1
 path = ""  # Update with your path
 def safe_read(i: int, a: list):
     if i >= len(a):
         return 128
     else:
         return a[i]
 def getRenderCords(ta: list, idx: int, res: int = 1024, size: tuple = (1280, 720)) -> list:
     i = idx - res // 2
+    x, y = size[0] * .9 / -2, (safe_read(i, ta) - 128) * \
+        (size[1] / 2000) + (size[1] * .7 / -2)
     c = []
     while i < idx + (res // 2):
         c.append((x, y))
         i += 1
+        y = (safe_read(i, ta) - 128) * (size[1] / 2000) + (size[1] * .7 / -2)
         x += (size[0] * .9) / res
     return c
 def center_to_top_left(coords, width=1280, height=720):
     new_coords = []
     for x, y in coords:
         new_coords.append(totopleft((x, y), width=width, height=height))
     return new_coords
 def totopleft(coord, width=1280, height=720):
     return coord[0] + width / 2, height / 2 - coord[1]
 def getTrigger(ad: int, a: list, max: int = 1024) -> int:
     i = ad
+    while not (safe_read(i, a) < 126 and safe_read(i+10, a) < 130 or i - ad > max):
         i += 1
     return i
 def extract_cover_image(mp3_file):
     audio = MP3(mp3_file, ID3=ID3)
     if audio.tags == None:
     print("No cover image found in the MP3 file.")
     return None
 def getTitleAndArtist(mp3_file):
     audio = MP3(mp3_file, ID3=ID3)
     title = audio.get('TIT2', TIT2(encoding=3, text='')).text[0]
     artist = audio.get('TPE1', TPE1(encoding=3, text='')).text[0]
     return title, artist
 def getColour(img):
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
         img.save(tmpfile.name, format="PNG")
     os.remove(tmpfile.name)
     return dominant_color
 def clamp(number):
     return max(0, min(number, 1))
+def normalizeColour(C):
     cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
     ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.3 * cc[1]), .8)
     return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)
+def normalizeColourBar(C):
     cc = colorsys.rgb_to_hsv(C[0] / 255, C[1] / 255, C[2] / 255)
     ccc = colorsys.hsv_to_rgb(cc[0], clamp(1.4 * cc[1]), .6)
     return math.floor(ccc[0] * 255), math.floor(ccc[1] * 255), math.floor(ccc[2] * 255)
 def stamp_text(draw, text, font, position, align='left'):
     text_bbox = draw.textbbox((0, 0), text, font=font)
     text_width = text_bbox[2] - text_bbox[0]
     draw.text((x, y), text, font=font, fill="#fff")
 def linear_interpolate(start, stop, progress):
     return start + progress * (stop - start)
 def filecount(p):
     files = os.listdir()
     file_count = len(files)
     return file_count
 def render_frame(params):
     n, samples_array, cover_img, title, artist, dominant_color, width, height, fps, name, oscres, sr = params
     num_frames = len(samples_array) // (sr // fps)
     d = ImageDraw.Draw(img)
     s = math.floor((sr / fps) * n)
+    e = center_to_top_left(getRenderCords(samples_array, getTrigger(
+        s, samples_array, max=oscres), res=oscres, size=(width, height)), width=width, height=height)
+    d.line(e, fill='#fff', width=round(min(2*height/720, 2*width/1280)))
     cs = math.floor(min(width, height) / 2)
     cov = cover_img.resize((cs, cs))
     img.paste(cov, (((width // 2) - cs // 2), math.floor(height * .1)))
+    fontT = ImageFont.truetype(
+        path+'Lexend-Bold.ttf', 50*(min(width, height)/720)//1)
+    fontA = ImageFont.truetype(
+        path+'Lexend-Bold.ttf', 40*(min(width, height)/720)//1)
+    fontD = ImageFont.truetype(
+        path+'SpaceMono-Bold.ttf', 30*(min(width, height)/720)//1)
+    stamp_text(d, title, fontT, totopleft(
+        (0, min(width, height) * .3 // -2), width=width, height=height), 'center')
+    stamp_text(d, artist, fontA, totopleft(
+        (0, min(width, height) * .44 // -2), width=width, height=height), 'center')
     d.line(center_to_top_left([(width * .96 // -2, height * .95 // -2), (width * .96 // 2, height * .95 // -2)], width=width, height=height),
            fill=normalizeColourBar(dominant_color), width=15 * height // 360)
     d.line(center_to_top_left([(width * .95 // -2, height * .95 // -2),
                                (linear_interpolate(width * .95 // -2, width * .95 // 2, s / len(samples_array)),
+                                height * .95 // -2)], width=width, height=height), fill='#fff', width=10 * height // 360)
     img.save(path+f'out/{name}/{str(n)}.png', 'PNG')
     return 1  # Indicate one frame processed
 def RenderVid(af, n, fps=30):
+    (ffmpeg
+     .input(path+f'out/{n}/%d.png', framerate=fps)
+     .input(af)
+     .output(n + '.mp4', vcodec='libx264', r=fps, pix_fmt='yuv420p', acodec='aac', shortest=None)
      .run()
      )
     gr.Interface.download(f"{n}.mp4")
 invisible_chars = ["\u200B", "\uFEFF"]
 def remove_bom(data: str) -> str:
     BOM = '\ufeff'
     return data.lstrip(BOM)
 def stripinvisibles(s):
     e = remove_bom(s)
     for i in invisible_chars:
+        e.replace(i, "")
     return e
 def start_progress(title):
     global progress_x
+    sys.stdout.write(title + ": [" + "-"*40 + "] 0%")
+    sys.stdout.write(chr(8) * 44)  # Move the cursor back to the start of the progress bar
     sys.stdout.flush()
     progress_x = 0
 def progress(x):
     global progress_x
     x = int(x * 40 // 100)
+    percent_complete = int(x / 40 * 100)
     sys.stdout.write("#" * (x - progress_x))
+    sys.stdout.write(" " * (40 - x) + "] " + f"{percent_complete}%")
+    sys.stdout.write(chr(8) * (44 + len(str(percent_complete)) + 1))  # Move the cursor back to the start of the progress bar
     sys.stdout.flush()
     progress_x = x
 def end_progress():
+    global progress_x
+    sys.stdout.write("#" * (40 - progress_x) + "] 100%\n")
     sys.stdout.flush()
 haslyrics = False
+def main(file, name, fps=30, res: tuple = (1280, 720), oscres=512, sr=11025, lyrics=None, img=None, tit=None, ast=None):
     global flag
     p = gr.Progress()
     LRC2SRT.clear()
                 gr.Warning("Lyrics file is invalid, skipping")
         except Exception as e:
             print(traceback.format_exc())
+            gr.Warning(
+                "Failed to parse lyrics, ensure there are no blank lines in between, you may use Lyrics Editor to ensure compatability")
     os.makedirs(path + f'out/{name}/', exist_ok=True)
     global iii
     iii = 0
     if img:
         cover_img = cover_file
     if cover_img is None:
+        raise gr.Error(
+            "Mp3 must have a cover image, upload the image under the 'Metadata' section", duration=None)
     elif cover_img == -1 and not (tit or ast or img):
+        raise gr.Error(
+            "Mp3 is missing tags, add the info under the 'Metadata' section", duration=None)
+    title, artist = getTitleAndArtist(audio_path)
     if tit and ast:
         title, artist = tit, ast
     if title == '' or artist == '':
     num_frames = len(samples_array) // (sr // fps)
     # Prepare parameters for each frame
+    params = [(n, samples_array, cover_img, title, artist, dominant_color,
+               width, height, fps, name, oscres, sr) for n in range(num_frames)]
     print('---------------------------------------------------------')
+    print('Info:' + "External" if img else "ID3")
     print("Title: " + title)
     print("Artist: " + artist)
     print(f'Resolution: {str(width)}x{str(height)}')
     print('Frame Count: ' + str(num_frames))
     print('Segments per frame: ' + str(oscres))
     print('---------------------------------------------------------')
+    start_progress("Rendering:")
     try:
+        with Pool(cpu_count()-1) as pool:
             num_frames = len(samples_array) // (sr // fps)
             # Use imap to get progress updates
             for _ in pool.imap_unordered(render_frame, params):
                 iii += 1  # Increment frame count for progress
                 p((iii, num_frames), desc="Rendering Frames")
+                progress(iii/num_frames*100)
     except Exception as e:
         raise gr.Error("Something went wrong whilst rendering")
+    finally:
+        end_progress()
     p = gr.Progress()
+    p(0, desc="Compiling video")
     print('---------------------------------------------------------')
     print('FFMPEG')
     if haslyrics:
             '-c:v', 'libx264',
             '-r', str(fps),
             '-pix_fmt', 'yuv420p',
+            '-c:a', 'aac',
             '-y',
             path + f'{name}.mp4'  # Output MP4 filename
         ]
+    process = subprocess.Popen(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+    # Regular expression to match frame information
+    frame_re = re.compile(r"frame=\s*(\d+)")
+    p = gr.Progress()
+    while True:
+        output = process.stderr.readline()
+        if output == '' and process.poll() is not None:
+            break
+        if output:
+            # Check if the output line contains frame information
+            match = frame_re.search(output)
+            if match:
+                frame = match.group(1)
+                p((int(frame), num_frames), desc="Compiling Video")
+                print(f"Frame: {frame}")
+    # Wait for the process to complete
+    process.wait()
     print('---------------------------------------------------------')
     return f"{name}.mp4", haslyrics
 def gradio_interface(audio_file, lyrics, output_name, fps=30, vidwidth=1280, vidheight=720, oscres=512, img=None, tit=None, ast=None):
     if audio_file is None:
         raise gr.Error("Please Provide an Audio File")
     resolution = f"{vidwidth}x{vidheight}"
     res = tuple(map(int, resolution.split('x')))
+    video_file, haslyrics = main(audio_file, output_name, fps=fps,
+                                 res=res, oscres=oscres, lyrics=lyrics, img=img, tit=tit, ast=ast)
     # Clean up the temporary file
     shutil.rmtree("out")
     srt_output = "out.srt" if haslyrics else None
     return video_file, srt_output, haslyrics
 def update_srt_output_visibility(haslyrics):
     return gr.update(visible=haslyrics)
 with gr.Blocks() as demo:
+    gr.Markdown(
+        'Upload an MP3 file and configure parameters to create a visualization video.')
+    gr.Markdown(
+        'Optionally upload a word or line synced lyric file in the advanced section.')
     with gr.Row():
         # Inputs on the left
         with gr.Column():
             with gr.Accordion(label="Audio Settings", open=True):
                 gr.Markdown('## Load your mp3 file here')
+                audio_file = gr.File(
+                    label="Upload your MP3 file", file_count='single', file_types=['mp3'])
             with gr.Accordion(label="Mp3 Metadata", open=False):
+                gr.Markdown(
+                    '## Add Metadata here if your mp3 does not have one')
                 cover_img = gr.Image(label='Cover Art', type="filepath")
                 title_input = gr.Textbox(label='Title')
                 artist_input = gr.Textbox(label='Artists')
             with gr.Accordion(label="Video Output Settings", open=False):
                 gr.Markdown('## Configure Video Output Here')
+                output_name = gr.Textbox(
+                    label="Output Video Name", value='Output')
+                fps_slider = gr.Slider(
+                    label="Frames per Second", minimum=20, maximum=60, step=1, value=30)
+                vidwidth_slider = gr.Slider(
+                    label="Output Video Width", minimum=100, maximum=2000, value=1280, step=2)
+                vidheight_slider = gr.Slider(
+                    label="Output Video Height", minimum=100, maximum=2000, value=720, step=2)
             with gr.Accordion(label="Advanced Options", open=False):
+                oscres_slider = gr.Slider(
+                    label="Number of Visualization Segments", minimum=256, maximum=2048, step=2, value=1024)
+                gr.Markdown(
+                    'If uploading LRC, ensure a blank timed line at the end to avoid conversion errors')
+                lyrics_file = gr.File(label="(Optional) Upload Lyrics as LRC or SRT",
+                                      file_count='single', file_types=['lrc', 'srt'])
             # Add a submit button
             submit_btn = gr.Button("Generate Video")
     # Bind the button to the function
     submit_btn.click(
         fn=gradio_interface,
+        inputs=[audio_file, lyrics_file, output_name, fps_slider, vidwidth_slider,
+                vidheight_slider, oscres_slider, cover_img, title_input, artist_input],
         outputs=[output_video, srt_output]
     )
 # Launch Gradio interface
 if __name__ == '__main__':
+    demo.launch()