Spaces:
Sleeping
Sleeping
modify app
Browse files
app.py
CHANGED
@@ -83,9 +83,6 @@ def process_audio(input_audio, reference_audio):
|
|
83 |
if output_audio.shape[1] > output_audio.shape[0]:
|
84 |
output_audio = output_audio.transpose(1,0)
|
85 |
|
86 |
-
print(output_audio.shape)
|
87 |
-
print(f"sr: {sr}")
|
88 |
-
|
89 |
# Normalize output audio
|
90 |
output_audio = loudness_normalize(output_audio, sr)
|
91 |
# Denormalize the audio to int16
|
@@ -122,34 +119,53 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
|
|
122 |
ito_log += result['log']
|
123 |
loss_values.append({"step": result['step'], "loss": result['loss']})
|
124 |
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
|
|
|
|
139 |
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
|
145 |
-
|
146 |
|
147 |
def update_ito_output(all_results, selected_step):
|
148 |
-
print(all_results[selected_step - 1])
|
149 |
-
print(selected_step)
|
150 |
selected_result = all_results[selected_step - 1]
|
151 |
-
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
|
155 |
""" APP display """
|
@@ -233,13 +249,7 @@ with gr.Blocks() as demo:
|
|
233 |
ito_log = gr.Textbox(label="ITO Log", lines=10)
|
234 |
|
235 |
all_results = gr.State([])
|
236 |
-
|
237 |
-
|
238 |
-
def on_ito_complete(results, min_step, loss_df):
|
239 |
-
all_results.value = results
|
240 |
-
min_loss_step.value = min_step
|
241 |
-
return loss_df, gr.update(maximum=len(results), value=min_step+1)
|
242 |
-
|
243 |
ito_button.click(
|
244 |
perform_ito,
|
245 |
inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
|
|
|
83 |
if output_audio.shape[1] > output_audio.shape[0]:
|
84 |
output_audio = output_audio.transpose(1,0)
|
85 |
|
|
|
|
|
|
|
86 |
# Normalize output audio
|
87 |
output_audio = loudness_normalize(output_audio, sr)
|
88 |
# Denormalize the audio to int16
|
|
|
119 |
ito_log += result['log']
|
120 |
loss_values.append({"step": result['step'], "loss": result['loss']})
|
121 |
|
122 |
+
# Return the results of the last step
|
123 |
+
last_result = all_results[-1]
|
124 |
+
current_output = last_result['audio']
|
125 |
+
ito_param_output = mastering_transfer.get_param_output_string(last_result['params'])
|
126 |
+
|
127 |
+
# Convert current_output to numpy array if it's a tensor
|
128 |
+
if isinstance(current_output, torch.Tensor):
|
129 |
+
current_output = current_output.cpu().numpy()
|
130 |
+
|
131 |
+
if current_output.ndim == 1:
|
132 |
+
current_output = current_output.reshape(-1, 1)
|
133 |
+
elif current_output.ndim > 2:
|
134 |
+
current_output = current_output.squeeze()
|
135 |
+
# Ensure the audio is in the correct shape (samples, channels)
|
136 |
+
if current_output.shape[1] > current_output.shape[0]:
|
137 |
+
current_output = current_output.transpose(1,0)
|
138 |
|
139 |
+
# Loudness normalize output audio
|
140 |
+
current_output = loudness_normalize(current_output, args.sample_rate)
|
141 |
+
# Denormalize the audio to int16
|
142 |
+
current_output = denormalize_audio(current_output, dtype=np.int16)
|
143 |
|
144 |
+
return (args.sample_rate, current_output), ito_param_output, num_steps, ito_log, pd.DataFrame(loss_values), all_results
|
145 |
|
146 |
def update_ito_output(all_results, selected_step):
|
|
|
|
|
147 |
selected_result = all_results[selected_step - 1]
|
148 |
+
current_output = selected_result['audio']
|
149 |
+
ito_param_output = mastering_transfer.get_param_output_string(selected_result['params'])
|
150 |
+
|
151 |
+
# Convert current_output to numpy array if it's a tensor
|
152 |
+
if isinstance(current_output, torch.Tensor):
|
153 |
+
current_output = current_output.cpu().numpy()
|
154 |
+
|
155 |
+
if current_output.ndim == 1:
|
156 |
+
current_output = current_output.reshape(-1, 1)
|
157 |
+
elif current_output.ndim > 2:
|
158 |
+
current_output = current_output.squeeze()
|
159 |
+
# Ensure the audio is in the correct shape (samples, channels)
|
160 |
+
if current_output.shape[1] > current_output.shape[0]:
|
161 |
+
current_output = current_output.transpose(1,0)
|
162 |
+
|
163 |
+
# Loudness normalize output audio
|
164 |
+
current_output = loudness_normalize(current_output, args.sample_rate)
|
165 |
+
# Denormalize the audio to int16
|
166 |
+
current_output = denormalize_audio(current_output, dtype=np.int16)
|
167 |
+
|
168 |
+
return (args.sample_rate, current_output), ito_param_output, selected_result['log']
|
169 |
|
170 |
|
171 |
""" APP display """
|
|
|
249 |
ito_log = gr.Textbox(label="ITO Log", lines=10)
|
250 |
|
251 |
all_results = gr.State([])
|
252 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
253 |
ito_button.click(
|
254 |
perform_ito,
|
255 |
inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
|