Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,8 @@ from semanticodec import SemantiCodec
|
|
6 |
from huggingface_hub import HfApi
|
7 |
import spaces
|
8 |
import torch
|
|
|
|
|
9 |
|
10 |
# Initialize the model
|
11 |
def load_model():
|
@@ -13,15 +15,19 @@ def load_model():
|
|
13 |
|
14 |
semanticodec = load_model()
|
15 |
|
16 |
-
@spaces.GPU(duration=
|
17 |
def encode_audio(audio_path):
|
18 |
"""Encode audio file to tokens and save them"""
|
19 |
tokens = semanticodec.encode(audio_path)
|
20 |
# Move tokens to CPU before converting to numpy
|
21 |
if isinstance(tokens, torch.Tensor):
|
22 |
tokens = tokens.cpu().numpy()
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
25 |
return token_path, f"Encoded to {len(tokens)} tokens"
|
26 |
|
27 |
@spaces.GPU(duration=60)
|
@@ -35,15 +41,35 @@ def decode_tokens(token_path):
|
|
35 |
# Move waveform to CPU if it's a tensor
|
36 |
if isinstance(waveform, torch.Tensor):
|
37 |
waveform = waveform.cpu().numpy()
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
41 |
|
|
|
42 |
def process_both(audio_path):
|
43 |
-
"""Encode and then decode the audio"""
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
# Create Gradio interface
|
49 |
with gr.Blocks(title="Oterin Audio Codec") as demo:
|
@@ -75,4 +101,4 @@ with gr.Blocks(title="Oterin Audio Codec") as demo:
|
|
75 |
both_btn.click(process_both, inputs=both_input, outputs=[both_output, both_status])
|
76 |
|
77 |
if __name__ == "__main__":
|
78 |
-
demo.launch()
|
|
|
6 |
from huggingface_hub import HfApi
|
7 |
import spaces
|
8 |
import torch
|
9 |
+
import tempfile
|
10 |
+
import io
|
11 |
|
12 |
# Initialize the model
|
13 |
def load_model():
|
|
|
15 |
|
16 |
semanticodec = load_model()
|
17 |
|
18 |
+
@spaces.GPU(duration=20)
|
19 |
def encode_audio(audio_path):
|
20 |
"""Encode audio file to tokens and save them"""
|
21 |
tokens = semanticodec.encode(audio_path)
|
22 |
# Move tokens to CPU before converting to numpy
|
23 |
if isinstance(tokens, torch.Tensor):
|
24 |
tokens = tokens.cpu().numpy()
|
25 |
+
|
26 |
+
# Save to a temporary file
|
27 |
+
with tempfile.NamedTemporaryFile(suffix='.oterin', delete=False) as tmp_file:
|
28 |
+
np.save(tmp_file.name, tokens)
|
29 |
+
token_path = tmp_file.name
|
30 |
+
|
31 |
return token_path, f"Encoded to {len(tokens)} tokens"
|
32 |
|
33 |
@spaces.GPU(duration=60)
|
|
|
41 |
# Move waveform to CPU if it's a tensor
|
42 |
if isinstance(waveform, torch.Tensor):
|
43 |
waveform = waveform.cpu().numpy()
|
44 |
+
|
45 |
+
# Create in-memory file
|
46 |
+
output_buffer = io.BytesIO()
|
47 |
+
sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
|
48 |
+
output_buffer.seek(0)
|
49 |
+
|
50 |
+
return output_buffer, f"Decoded {len(tokens)} tokens to audio"
|
51 |
|
52 |
+
@spaces.GPU(duration=80)
|
53 |
def process_both(audio_path):
|
54 |
+
"""Encode and then decode the audio without saving intermediate files"""
|
55 |
+
# Encode
|
56 |
+
tokens = semanticodec.encode(audio_path)
|
57 |
+
if isinstance(tokens, torch.Tensor):
|
58 |
+
tokens = tokens.cpu().numpy()
|
59 |
+
|
60 |
+
# Decode directly
|
61 |
+
if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
|
62 |
+
tokens = torch.tensor(tokens)
|
63 |
+
waveform = semanticodec.decode(tokens)
|
64 |
+
if isinstance(waveform, torch.Tensor):
|
65 |
+
waveform = waveform.cpu().numpy()
|
66 |
+
|
67 |
+
# Create in-memory file
|
68 |
+
output_buffer = io.BytesIO()
|
69 |
+
sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
|
70 |
+
output_buffer.seek(0)
|
71 |
+
|
72 |
+
return output_buffer, f"Encoded to {len(tokens)} tokens\nDecoded {len(tokens)} tokens to audio"
|
73 |
|
74 |
# Create Gradio interface
|
75 |
with gr.Blocks(title="Oterin Audio Codec") as demo:
|
|
|
101 |
both_btn.click(process_both, inputs=both_input, outputs=[both_output, both_status])
|
102 |
|
103 |
if __name__ == "__main__":
|
104 |
+
demo.launch(share=True)
|