Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import soundfile as sf
|
|
5 |
from semanticodec import SemantiCodec
|
6 |
from huggingface_hub import HfApi
|
7 |
import spaces
|
|
|
8 |
|
9 |
# Initialize the model
|
10 |
def load_model():
|
@@ -16,6 +17,9 @@ semanticodec = load_model()
|
|
16 |
def encode_audio(audio_path):
|
17 |
"""Encode audio file to tokens and save them"""
|
18 |
tokens = semanticodec.encode(audio_path)
|
|
|
|
|
|
|
19 |
token_path = "encoded_audio.oterin"
|
20 |
np.save(token_path, tokens)
|
21 |
return token_path, f"Encoded to {len(tokens)} tokens"
|
@@ -24,7 +28,13 @@ def encode_audio(audio_path):
|
|
24 |
def decode_tokens(token_path):
|
25 |
"""Decode tokens to audio"""
|
26 |
tokens = np.load(token_path)
|
|
|
|
|
|
|
27 |
waveform = semanticodec.decode(tokens)
|
|
|
|
|
|
|
28 |
output_path = "output.wav"
|
29 |
sf.write(output_path, waveform[0, 0], 32000)
|
30 |
return output_path, f"Decoded {len(tokens)} tokens to audio"
|
|
|
5 |
from semanticodec import SemantiCodec
|
6 |
from huggingface_hub import HfApi
|
7 |
import spaces
|
8 |
+
import torch
|
9 |
|
10 |
# Initialize the model
|
11 |
def load_model():
|
|
|
17 |
def encode_audio(audio_path):
|
18 |
"""Encode audio file to tokens and save them"""
|
19 |
tokens = semanticodec.encode(audio_path)
|
20 |
+
# Move tokens to CPU before converting to numpy
|
21 |
+
if isinstance(tokens, torch.Tensor):
|
22 |
+
tokens = tokens.cpu().numpy()
|
23 |
token_path = "encoded_audio.oterin"
|
24 |
np.save(token_path, tokens)
|
25 |
return token_path, f"Encoded to {len(tokens)} tokens"
|
|
|
28 |
def decode_tokens(token_path):
|
29 |
"""Decode tokens to audio"""
|
30 |
tokens = np.load(token_path)
|
31 |
+
# Convert to torch tensor if needed by the model
|
32 |
+
if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
|
33 |
+
tokens = torch.tensor(tokens)
|
34 |
waveform = semanticodec.decode(tokens)
|
35 |
+
# Move waveform to CPU if it's a tensor
|
36 |
+
if isinstance(waveform, torch.Tensor):
|
37 |
+
waveform = waveform.cpu().numpy()
|
38 |
output_path = "output.wav"
|
39 |
sf.write(output_path, waveform[0, 0], 32000)
|
40 |
return output_path, f"Decoded {len(tokens)} tokens to audio"
|