Spaces:
Running
on
T4
Running
on
T4
Add Text and description to video
Browse files- app.py +15 -6
- audiocraft/utils/extend.py +86 -1
- requirements.txt +1 -0
app.py
CHANGED
@@ -13,7 +13,7 @@ import gradio as gr
|
|
13 |
import os
|
14 |
from audiocraft.models import MusicGen
|
15 |
from audiocraft.data.audio import audio_write
|
16 |
-
from audiocraft.utils.extend import generate_music_segments
|
17 |
import numpy as np
|
18 |
|
19 |
MODEL = None
|
@@ -25,7 +25,7 @@ def load_model(version):
|
|
25 |
return MusicGen.get_pretrained(version)
|
26 |
|
27 |
|
28 |
-
def predict(model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background):
|
29 |
global MODEL
|
30 |
output_segments = None
|
31 |
topk = int(topk)
|
@@ -75,6 +75,10 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
|
|
75 |
else:
|
76 |
output = output.detach().cpu().float()[0]
|
77 |
with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
|
|
|
|
|
|
|
|
|
78 |
audio_write(
|
79 |
file.name, output, MODEL.sample_rate, strategy="loudness",
|
80 |
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
@@ -102,12 +106,17 @@ def ui(**kwargs):
|
|
102 |
with gr.Row():
|
103 |
with gr.Column():
|
104 |
with gr.Row():
|
105 |
-
text = gr.Text(label="Input Text", interactive=True)
|
106 |
melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
|
107 |
with gr.Row():
|
108 |
submit = gr.Button("Submit")
|
109 |
with gr.Row():
|
110 |
background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
|
|
|
|
|
|
|
|
|
|
|
111 |
with gr.Row():
|
112 |
model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
|
113 |
with gr.Row():
|
@@ -116,11 +125,11 @@ def ui(**kwargs):
|
|
116 |
with gr.Row():
|
117 |
topk = gr.Number(label="Top-k", value=250, interactive=True)
|
118 |
topp = gr.Number(label="Top-p", value=0, interactive=True)
|
119 |
-
temperature = gr.Number(label="Temperature", value=1.0, interactive=True)
|
120 |
-
cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
|
121 |
with gr.Column():
|
122 |
output = gr.Video(label="Generated Music")
|
123 |
-
submit.click(predict, inputs=[model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background], outputs=[output])
|
124 |
gr.Examples(
|
125 |
fn=predict,
|
126 |
examples=[
|
|
|
13 |
import os
|
14 |
from audiocraft.models import MusicGen
|
15 |
from audiocraft.data.audio import audio_write
|
16 |
+
from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, sanitize_file_name
|
17 |
import numpy as np
|
18 |
|
19 |
MODEL = None
|
|
|
25 |
return MusicGen.get_pretrained(version)
|
26 |
|
27 |
|
28 |
+
def predict(model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background, title, include_settings, settings_font, settings_font_color):
|
29 |
global MODEL
|
30 |
output_segments = None
|
31 |
topk = int(topk)
|
|
|
75 |
else:
|
76 |
output = output.detach().cpu().float()[0]
|
77 |
with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
|
78 |
+
if include_settings:
|
79 |
+
video_description = f"{text}\n Duration: {str(duration)} Dimension: {dimension}\n Top-k:{topk} Top-p:{topp}\n Randomness:{temperature}\n cfg:{cfg_coef}"
|
80 |
+
background = add_settings_to_image(title, video_description, background_path=background, font=settings_font, font_color=settings_font_color)
|
81 |
+
#filename = sanitize_file_name(title) if title != "" else file.name
|
82 |
audio_write(
|
83 |
file.name, output, MODEL.sample_rate, strategy="loudness",
|
84 |
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
|
|
106 |
with gr.Row():
|
107 |
with gr.Column():
|
108 |
with gr.Row():
|
109 |
+
text = gr.Text(label="Input Text", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi")
|
110 |
melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
|
111 |
with gr.Row():
|
112 |
submit = gr.Button("Submit")
|
113 |
with gr.Row():
|
114 |
background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
|
115 |
+
include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
|
116 |
+
with gr.Row():
|
117 |
+
title = gr.Textbox(label="Title", value="MusicGen", interactive=True)
|
118 |
+
settings_font = gr.Text(label="Settings Font", value="arial.ttf", interactive=True)
|
119 |
+
settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#ffffff", interactive=True)
|
120 |
with gr.Row():
|
121 |
model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
|
122 |
with gr.Row():
|
|
|
125 |
with gr.Row():
|
126 |
topk = gr.Number(label="Top-k", value=250, interactive=True)
|
127 |
topp = gr.Number(label="Top-p", value=0, interactive=True)
|
128 |
+
temperature = gr.Number(label="Randomness Temperature", value=1.0, precision=2, interactive=True)
|
129 |
+
cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, precision=2, interactive=True)
|
130 |
with gr.Column():
|
131 |
output = gr.Video(label="Generated Music")
|
132 |
+
submit.click(predict, inputs=[model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background, title, include_settings, settings_font, settings_font_color], outputs=[output])
|
133 |
gr.Examples(
|
134 |
fn=predict,
|
135 |
examples=[
|
audiocraft/utils/extend.py
CHANGED
@@ -2,7 +2,11 @@ import torch
|
|
2 |
import math
|
3 |
from audiocraft.models import MusicGen
|
4 |
import numpy as np
|
5 |
-
|
|
|
|
|
|
|
|
|
6 |
|
7 |
def separate_audio_segments(audio, segment_duration=30):
|
8 |
sr, audio_data = audio[0], audio[1]
|
@@ -106,6 +110,87 @@ def generate_music_segments(text, melody, MODEL, duration:int=10, segment_durati
|
|
106 |
|
107 |
# return output_segments
|
108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
|
111 |
|
|
|
2 |
import math
|
3 |
from audiocraft.models import MusicGen
|
4 |
import numpy as np
|
5 |
+
from PIL import Image, ImageDraw, ImageFont, ImageColor
|
6 |
+
import string
|
7 |
+
import tempfile
|
8 |
+
import os
|
9 |
+
import textwrap
|
10 |
|
11 |
def separate_audio_segments(audio, segment_duration=30):
|
12 |
sr, audio_data = audio[0], audio[1]
|
|
|
110 |
|
111 |
# return output_segments
|
112 |
|
113 |
+
def save_image(image):
|
114 |
+
"""
|
115 |
+
Saves a PIL image to a temporary file and returns the file path.
|
116 |
+
|
117 |
+
Parameters:
|
118 |
+
- image: PIL.Image
|
119 |
+
The PIL image object to be saved.
|
120 |
+
|
121 |
+
Returns:
|
122 |
+
- str or None: The file path where the image was saved,
|
123 |
+
or None if there was an error saving the image.
|
124 |
+
|
125 |
+
"""
|
126 |
+
temp_dir = tempfile.gettempdir()
|
127 |
+
temp_file = tempfile.NamedTemporaryFile(suffix=".png", dir=temp_dir, delete=False)
|
128 |
+
temp_file.close()
|
129 |
+
file_path = temp_file.name
|
130 |
+
|
131 |
+
try:
|
132 |
+
image.save(file_path)
|
133 |
+
|
134 |
+
except Exception as e:
|
135 |
+
print("Unable to save image:", str(e))
|
136 |
+
return None
|
137 |
+
finally:
|
138 |
+
return file_path
|
139 |
+
|
140 |
+
def hex_to_rgba(hex_color):
|
141 |
+
try:
|
142 |
+
# Convert hex color to RGBA tuple
|
143 |
+
rgba = ImageColor.getcolor(hex_color, "RGBA")
|
144 |
+
except ValueError:
|
145 |
+
# If the hex color is invalid, default to yellow
|
146 |
+
rgba = (255,255,0,255)
|
147 |
+
return rgba
|
148 |
+
|
149 |
+
def add_settings_to_image(title: str = "title", description: str = "", width: int = 768, height: int = 512, background_path: str = "", font: str = "arial.ttf", font_color: str = "#ffffff"):
|
150 |
+
# Create a new RGBA image with the specified dimensions
|
151 |
+
image = Image.new("RGBA", (width, height), (255, 255, 255, 0))
|
152 |
+
# If a background image is specified, open it and paste it onto the image
|
153 |
+
if background_path == "":
|
154 |
+
background = Image.new("RGBA", (width, height), (255, 255, 255, 255))
|
155 |
+
else:
|
156 |
+
background = Image.open(background_path).convert("RGBA")
|
157 |
+
|
158 |
+
#Convert font color to RGBA tuple
|
159 |
+
font_color = hex_to_rgba(font_color)
|
160 |
+
|
161 |
+
# Calculate the center coordinates for placing the text
|
162 |
+
text_x = width // 2
|
163 |
+
text_y = height // 2
|
164 |
+
# Draw the title text at the center top
|
165 |
+
title_font = ImageFont.truetype(font, 26) # Replace with your desired font and size
|
166 |
+
title_text = '\n'.join(textwrap.wrap(title, width // 12))
|
167 |
+
title_x, title_y, title_text_width, title_text_height = title_font.getbbox(title_text)
|
168 |
+
title_x = max(text_x - (title_text_width // 2), title_x, 0)
|
169 |
+
title_y = text_y - (height // 2) + 10 # 10 pixels padding from the top
|
170 |
+
title_draw = ImageDraw.Draw(image)
|
171 |
+
title_draw.multiline_text((title_x, title_y), title, fill=font_color, font=title_font, align="center")
|
172 |
+
# Draw the description text two lines below the title
|
173 |
+
description_font = ImageFont.truetype(font, 16) # Replace with your desired font and size
|
174 |
+
description_text = '\n'.join(textwrap.wrap(description, width // 12))
|
175 |
+
description_x, description_y, description_text_width, description_text_height = description_font.getbbox(description_text)
|
176 |
+
description_x = max(text_x - (description_text_width // 2), description_x, 0)
|
177 |
+
description_y = title_y + title_text_height + 20 # 20 pixels spacing between title and description
|
178 |
+
description_draw = ImageDraw.Draw(image)
|
179 |
+
description_draw.multiline_text((description_x, description_y), description_text, fill=font_color, font=description_font, align="center")
|
180 |
+
# Calculate the offset to center the image on the background
|
181 |
+
bg_w, bg_h = background.size
|
182 |
+
offset = ((bg_w - width) // 2, (bg_h - height) // 2)
|
183 |
+
# Paste the image onto the background
|
184 |
+
background.paste(image, offset, mask=image)
|
185 |
+
|
186 |
+
# Save the image and return the file path
|
187 |
+
return save_image(background)
|
188 |
+
|
189 |
+
|
190 |
+
def sanitize_file_name(filename):
|
191 |
+
valid_chars = "-_.() " + string.ascii_letters + string.digits
|
192 |
+
sanitized_filename = ''.join(c for c in filename if c in valid_chars)
|
193 |
+
return sanitized_filename
|
194 |
|
195 |
|
196 |
|
requirements.txt
CHANGED
@@ -18,3 +18,4 @@ xformers
|
|
18 |
demucs
|
19 |
librosa
|
20 |
gradio
|
|
|
|
18 |
demucs
|
19 |
librosa
|
20 |
gradio
|
21 |
+
textwrap
|