owiedotch commited on
Commit
5c44be0
·
verified ·
1 Parent(s): fc8b181

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -0
app.py CHANGED
@@ -5,6 +5,7 @@ import soundfile as sf
5
  from semanticodec import SemantiCodec
6
  from huggingface_hub import HfApi
7
  import spaces
 
8
 
9
  # Initialize the model
10
  def load_model():
@@ -16,6 +17,9 @@ semanticodec = load_model()
16
  def encode_audio(audio_path):
17
  """Encode audio file to tokens and save them"""
18
  tokens = semanticodec.encode(audio_path)
 
 
 
19
  token_path = "encoded_audio.oterin"
20
  np.save(token_path, tokens)
21
  return token_path, f"Encoded to {len(tokens)} tokens"
@@ -24,7 +28,13 @@ def encode_audio(audio_path):
24
  def decode_tokens(token_path):
25
  """Decode tokens to audio"""
26
  tokens = np.load(token_path)
 
 
 
27
  waveform = semanticodec.decode(tokens)
 
 
 
28
  output_path = "output.wav"
29
  sf.write(output_path, waveform[0, 0], 32000)
30
  return output_path, f"Decoded {len(tokens)} tokens to audio"
 
5
  from semanticodec import SemantiCodec
6
  from huggingface_hub import HfApi
7
  import spaces
8
+ import torch
9
 
10
  # Initialize the model
11
  def load_model():
 
17
  def encode_audio(audio_path):
18
  """Encode audio file to tokens and save them"""
19
  tokens = semanticodec.encode(audio_path)
20
+ # Move tokens to CPU before converting to numpy
21
+ if isinstance(tokens, torch.Tensor):
22
+ tokens = tokens.cpu().numpy()
23
  token_path = "encoded_audio.oterin"
24
  np.save(token_path, tokens)
25
  return token_path, f"Encoded to {len(tokens)} tokens"
 
28
  def decode_tokens(token_path):
29
  """Decode tokens to audio"""
30
  tokens = np.load(token_path)
31
+ # Convert to torch tensor if needed by the model
32
+ if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
33
+ tokens = torch.tensor(tokens)
34
  waveform = semanticodec.decode(tokens)
35
+ # Move waveform to CPU if it's a tensor
36
+ if isinstance(waveform, torch.Tensor):
37
+ waveform = waveform.cpu().numpy()
38
  output_path = "output.wav"
39
  sf.write(output_path, waveform[0, 0], 32000)
40
  return output_path, f"Decoded {len(tokens)} tokens to audio"