Spaces:
Running
on
Zero
Running
on
Zero
Rex Cheng
commited on
Commit
β’
6ab1a8e
1
Parent(s):
c58ca4b
faster encode/decode with a longer GPU duration
Browse files
app.py
CHANGED
@@ -58,7 +58,7 @@ def get_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
|
|
58 |
net, feature_utils, seq_cfg = get_model()
|
59 |
|
60 |
|
61 |
-
@spaces.GPU
|
62 |
@torch.inference_mode()
|
63 |
def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
|
64 |
cfg_strength: float, duration: float):
|
@@ -95,7 +95,7 @@ def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int
|
|
95 |
return video_save_path
|
96 |
|
97 |
|
98 |
-
@spaces.GPU
|
99 |
@torch.inference_mode()
|
100 |
def text_to_audio(prompt: str, negative_prompt: str, seed: int, num_steps: int, cfg_strength: float,
|
101 |
duration: float):
|
@@ -126,6 +126,12 @@ def text_to_audio(prompt: str, negative_prompt: str, seed: int, num_steps: int,
|
|
126 |
|
127 |
video_to_audio_tab = gr.Interface(
|
128 |
fn=video_to_audio,
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
inputs=[
|
130 |
gr.Video(),
|
131 |
gr.Text(label='Prompt'),
|
|
|
58 |
net, feature_utils, seq_cfg = get_model()
|
59 |
|
60 |
|
61 |
+
@spaces.GPU(duration=120)
|
62 |
@torch.inference_mode()
|
63 |
def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
|
64 |
cfg_strength: float, duration: float):
|
|
|
95 |
return video_save_path
|
96 |
|
97 |
|
98 |
+
@spaces.GPU(duration=120)
|
99 |
@torch.inference_mode()
|
100 |
def text_to_audio(prompt: str, negative_prompt: str, seed: int, num_steps: int, cfg_strength: float,
|
101 |
duration: float):
|
|
|
126 |
|
127 |
video_to_audio_tab = gr.Interface(
|
128 |
fn=video_to_audio,
|
129 |
+
description="""
|
130 |
+
Project page: <a href="https://hkchengrex.com/MMAudio/">https://hkchengrex.com/MMAudio/</a><br>
|
131 |
+
Code: <a href="https://github.com/hkchengrex/MMAudio">https://github.com/hkchengrex/MMAudio</a><br>
|
132 |
+
|
133 |
+
NOTE: It takes longer to process high-resolution videos (>384 px on the shorter side) and does not improve results.
|
134 |
+
""",
|
135 |
inputs=[
|
136 |
gr.Video(),
|
137 |
gr.Text(label='Prompt'),
|