Spaces:

owiedotch
/

oac

Sleeping

owiedotch commited on Feb 27

Commit

5c44be0

verified ·

1 Parent(s): fc8b181

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import soundfile as sf
 from semanticodec import SemantiCodec
 from huggingface_hub import HfApi
 import spaces
 # Initialize the model
 def load_model():
@@ -16,6 +17,9 @@ semanticodec = load_model()
 def encode_audio(audio_path):
     """Encode audio file to tokens and save them"""
     tokens = semanticodec.encode(audio_path)
     token_path = "encoded_audio.oterin"
     np.save(token_path, tokens)
     return token_path, f"Encoded to {len(tokens)} tokens"
@@ -24,7 +28,13 @@ def encode_audio(audio_path):
 def decode_tokens(token_path):
     """Decode tokens to audio"""
     tokens = np.load(token_path)
     waveform = semanticodec.decode(tokens)
     output_path = "output.wav"
     sf.write(output_path, waveform[0, 0], 32000)
     return output_path, f"Decoded {len(tokens)} tokens to audio"

 from semanticodec import SemantiCodec
 from huggingface_hub import HfApi
 import spaces
+import torch
 # Initialize the model
 def load_model():
 def encode_audio(audio_path):
     """Encode audio file to tokens and save them"""
     tokens = semanticodec.encode(audio_path)
+    # Move tokens to CPU before converting to numpy
+    if isinstance(tokens, torch.Tensor):
+        tokens = tokens.cpu().numpy()
     token_path = "encoded_audio.oterin"
     np.save(token_path, tokens)
     return token_path, f"Encoded to {len(tokens)} tokens"
 def decode_tokens(token_path):
     """Decode tokens to audio"""
     tokens = np.load(token_path)
+    # Convert to torch tensor if needed by the model
+    if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
+        tokens = torch.tensor(tokens)
     waveform = semanticodec.decode(tokens)
+    # Move waveform to CPU if it's a tensor
+    if isinstance(waveform, torch.Tensor):
+        waveform = waveform.cpu().numpy()
     output_path = "output.wav"
     sf.write(output_path, waveform[0, 0], 32000)
     return output_path, f"Decoded {len(tokens)} tokens to audio"