Update app.py
Browse files
app.py
CHANGED
@@ -17,31 +17,34 @@ semanticodec = load_model()
|
|
17 |
|
18 |
@spaces.GPU(duration=20)
|
19 |
def encode_audio(audio_path):
|
20 |
-
"""Encode audio file to tokens and
|
21 |
tokens = semanticodec.encode(audio_path)
|
22 |
# Move tokens to CPU before converting to numpy
|
23 |
if isinstance(tokens, torch.Tensor):
|
24 |
tokens = tokens.cpu().numpy()
|
25 |
|
26 |
-
# Save to a
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
|
31 |
-
#
|
32 |
-
if
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
|
37 |
@spaces.GPU(duration=60)
|
38 |
-
def decode_tokens(
|
39 |
"""Decode tokens to audio"""
|
40 |
-
# Ensure the
|
41 |
-
if not
|
42 |
-
return None, "Error: Empty
|
|
|
|
|
|
|
|
|
43 |
|
44 |
-
tokens = np.load(token_path)
|
45 |
# Convert to torch tensor if needed by the model
|
46 |
if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
|
47 |
tokens = torch.tensor(tokens)
|
@@ -50,7 +53,7 @@ def decode_tokens(token_path):
|
|
50 |
if isinstance(waveform, torch.Tensor):
|
51 |
waveform = waveform.cpu().numpy()
|
52 |
|
53 |
-
# Create in-memory file
|
54 |
output_buffer = io.BytesIO()
|
55 |
sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
|
56 |
output_buffer.seek(0)
|
@@ -95,14 +98,14 @@ with gr.Blocks(title="Oterin Audio Codec") as demo:
|
|
95 |
with gr.Tab("Encode Audio"):
|
96 |
with gr.Row():
|
97 |
encode_input = gr.Audio(type="filepath", label="Input Audio")
|
98 |
-
encode_output = gr.File(label="Encoded Tokens (.oterin)")
|
99 |
encode_status = gr.Textbox(label="Status")
|
100 |
encode_btn = gr.Button("Encode")
|
101 |
encode_btn.click(encode_audio, inputs=encode_input, outputs=[encode_output, encode_status])
|
102 |
|
103 |
with gr.Tab("Decode Tokens"):
|
104 |
with gr.Row():
|
105 |
-
decode_input = gr.File(label="Token File (.oterin)")
|
106 |
decode_output = gr.Audio(label="Decoded Audio")
|
107 |
decode_status = gr.Textbox(label="Status")
|
108 |
decode_btn = gr.Button("Decode")
|
|
|
17 |
|
18 |
@spaces.GPU(duration=20)
|
19 |
def encode_audio(audio_path):
|
20 |
+
"""Encode audio file to tokens and return them as a binary buffer"""
|
21 |
tokens = semanticodec.encode(audio_path)
|
22 |
# Move tokens to CPU before converting to numpy
|
23 |
if isinstance(tokens, torch.Tensor):
|
24 |
tokens = tokens.cpu().numpy()
|
25 |
|
26 |
+
# Save to a BytesIO buffer
|
27 |
+
buffer = io.BytesIO()
|
28 |
+
np.save(buffer, tokens)
|
29 |
+
buffer.seek(0)
|
30 |
|
31 |
+
# Verify the buffer has content
|
32 |
+
if buffer.getbuffer().nbytes == 0:
|
33 |
+
raise Exception("Failed to create token buffer")
|
34 |
+
|
35 |
+
return buffer, f"Encoded to {len(tokens)} tokens"
|
36 |
|
37 |
@spaces.GPU(duration=60)
|
38 |
+
def decode_tokens(token_buffer):
|
39 |
"""Decode tokens to audio"""
|
40 |
+
# Ensure the buffer has content
|
41 |
+
if not token_buffer or token_buffer.getbuffer().nbytes == 0:
|
42 |
+
return None, "Error: Empty token buffer"
|
43 |
+
|
44 |
+
# Reset buffer position to start
|
45 |
+
token_buffer.seek(0)
|
46 |
+
tokens = np.load(token_buffer)
|
47 |
|
|
|
48 |
# Convert to torch tensor if needed by the model
|
49 |
if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
|
50 |
tokens = torch.tensor(tokens)
|
|
|
53 |
if isinstance(waveform, torch.Tensor):
|
54 |
waveform = waveform.cpu().numpy()
|
55 |
|
56 |
+
# Create in-memory file for audio
|
57 |
output_buffer = io.BytesIO()
|
58 |
sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
|
59 |
output_buffer.seek(0)
|
|
|
98 |
with gr.Tab("Encode Audio"):
|
99 |
with gr.Row():
|
100 |
encode_input = gr.Audio(type="filepath", label="Input Audio")
|
101 |
+
encode_output = gr.File(label="Encoded Tokens (.oterin)", file_types=[".oterin"])
|
102 |
encode_status = gr.Textbox(label="Status")
|
103 |
encode_btn = gr.Button("Encode")
|
104 |
encode_btn.click(encode_audio, inputs=encode_input, outputs=[encode_output, encode_status])
|
105 |
|
106 |
with gr.Tab("Decode Tokens"):
|
107 |
with gr.Row():
|
108 |
+
decode_input = gr.File(label="Token File (.oterin)", file_types=[".oterin"])
|
109 |
decode_output = gr.Audio(label="Decoded Audio")
|
110 |
decode_status = gr.Textbox(label="Status")
|
111 |
decode_btn = gr.Button("Decode")
|