owiedotch commited on
Commit
4ad7b57
·
verified ·
1 Parent(s): 544ae95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -67
app.py CHANGED
@@ -20,30 +20,32 @@ semanticodec = load_model()
20
  @spaces.GPU(duration=20)
21
  def encode_audio(audio_path):
22
  """Encode audio file to tokens and return them as a file"""
23
- tokens = semanticodec.encode(audio_path)
24
- # Move tokens to CPU before converting to numpy
25
- if isinstance(tokens, torch.Tensor):
26
- tokens = tokens.cpu().numpy()
27
-
28
- # Save to a BytesIO buffer
29
- buffer = io.BytesIO()
30
- np.save(buffer, tokens)
31
- buffer.seek(0)
32
-
33
- # Verify the buffer has content
34
- if buffer.getbuffer().nbytes == 0:
35
- raise Exception("Failed to create token buffer")
36
-
37
- # Create a temporary file in /tmp which is writable in Spaces
38
- temp_dir = "/tmp"
39
- os.makedirs(temp_dir, exist_ok=True)
40
- temp_file_path = os.path.join(temp_dir, f"tokens_{uuid.uuid4()}.oterin")
41
-
42
- # Write buffer to the temporary file
43
- with open(temp_file_path, "wb") as f:
44
- f.write(buffer.getvalue())
45
-
46
- return temp_file_path, f"Encoded to {len(tokens)} tokens"
 
 
47
 
48
  @spaces.GPU(duration=60)
49
  def decode_tokens(token_file):
@@ -52,53 +54,82 @@ def decode_tokens(token_file):
52
  if not token_file or not os.path.exists(token_file):
53
  return None, "Error: Empty or missing token file"
54
 
55
- # Load tokens from file
56
- tokens = np.load(token_file)
57
-
58
- # Convert to torch tensor if needed by the model
59
- if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
60
- tokens = torch.tensor(tokens)
61
- waveform = semanticodec.decode(tokens)
62
- # Move waveform to CPU if it's a tensor
63
- if isinstance(waveform, torch.Tensor):
64
- waveform = waveform.cpu().numpy()
65
-
66
- # Create in-memory file for audio
67
- output_buffer = io.BytesIO()
68
- sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
69
- output_buffer.seek(0)
70
-
71
- # Verify the buffer has content
72
- if output_buffer.getbuffer().nbytes == 0:
73
- return None, "Error: Failed to generate audio"
74
-
75
- return output_buffer, f"Decoded {len(tokens)} tokens to audio"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  @spaces.GPU(duration=80)
78
  def process_both(audio_path):
79
  """Encode and then decode the audio without saving intermediate files"""
80
- # Encode
81
- tokens = semanticodec.encode(audio_path)
82
- if isinstance(tokens, torch.Tensor):
83
- tokens = tokens.cpu().numpy()
84
-
85
- # Decode directly
86
- if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
87
- tokens = torch.tensor(tokens)
88
- waveform = semanticodec.decode(tokens)
89
- if isinstance(waveform, torch.Tensor):
90
- waveform = waveform.cpu().numpy()
91
-
92
- # Create in-memory file
93
- output_buffer = io.BytesIO()
94
- sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
95
- output_buffer.seek(0)
96
-
97
- # Verify the buffer has content
98
- if output_buffer.getbuffer().nbytes == 0:
99
- return None, "Error: Failed to generate audio"
100
-
101
- return output_buffer, f"Encoded to {len(tokens)} tokens\nDecoded {len(tokens)} tokens to audio"
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  # Create Gradio interface
104
  with gr.Blocks(title="Oterin Audio Codec") as demo:
 
20
  @spaces.GPU(duration=20)
21
  def encode_audio(audio_path):
22
  """Encode audio file to tokens and return them as a file"""
23
+ try:
24
+ tokens = semanticodec.encode(audio_path)
25
+ # Move tokens to CPU before converting to numpy
26
+ if isinstance(tokens, torch.Tensor):
27
+ tokens = tokens.cpu().numpy()
28
+
29
+ # Ensure tokens are in the right shape for later decoding
30
+ if tokens.ndim == 1:
31
+ # Reshape to match expected format [batch, seq_len, features]
32
+ tokens = tokens.reshape(1, -1, 1)
33
+
34
+ # Create a temporary file in /tmp which is writable in Spaces
35
+ temp_dir = "/tmp"
36
+ os.makedirs(temp_dir, exist_ok=True)
37
+ temp_file_path = os.path.join(temp_dir, f"tokens_{uuid.uuid4()}.oterin")
38
+
39
+ # Save tokens directly to file
40
+ np.save(temp_file_path, tokens)
41
+
42
+ # Verify the file exists and has content
43
+ if not os.path.exists(temp_file_path) or os.path.getsize(temp_file_path) == 0:
44
+ raise Exception("Failed to create token file")
45
+
46
+ return temp_file_path, f"Encoded to {tokens.shape[1]} tokens"
47
+ except Exception as e:
48
+ return None, f"Error encoding audio: {str(e)}"
49
 
50
  @spaces.GPU(duration=60)
51
  def decode_tokens(token_file):
 
54
  if not token_file or not os.path.exists(token_file):
55
  return None, "Error: Empty or missing token file"
56
 
57
+ try:
58
+ # Load tokens from file
59
+ tokens = np.load(token_file, allow_pickle=True)
60
+
61
+ # Convert to torch tensor with proper dimensions
62
+ if isinstance(tokens, np.ndarray):
63
+ # Ensure tokens are in the right shape
64
+ if tokens.ndim == 1:
65
+ # Reshape to match expected format [batch, seq_len, features]
66
+ tokens = tokens.reshape(1, -1, 1)
67
+
68
+ # Convert to torch tensor
69
+ tokens = torch.tensor(tokens)
70
+
71
+ # Ensure tokens are on the right device
72
+ if torch.cuda.is_available():
73
+ tokens = tokens.cuda()
74
+
75
+ # Decode the tokens
76
+ waveform = semanticodec.decode(tokens)
77
+
78
+ # Move waveform to CPU if it's a tensor
79
+ if isinstance(waveform, torch.Tensor):
80
+ waveform = waveform.cpu().numpy()
81
+
82
+ # Create in-memory file for audio
83
+ output_buffer = io.BytesIO()
84
+ sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
85
+ output_buffer.seek(0)
86
+
87
+ # Verify the buffer has content
88
+ if output_buffer.getbuffer().nbytes == 0:
89
+ return None, "Error: Failed to generate audio"
90
+
91
+ return output_buffer, f"Decoded {tokens.shape[1]} tokens to audio"
92
+ except Exception as e:
93
+ return None, f"Error decoding tokens: {str(e)}"
94
 
95
  @spaces.GPU(duration=80)
96
  def process_both(audio_path):
97
  """Encode and then decode the audio without saving intermediate files"""
98
+ try:
99
+ # Encode
100
+ tokens = semanticodec.encode(audio_path)
101
+ if isinstance(tokens, torch.Tensor):
102
+ tokens = tokens.cpu().numpy()
103
+
104
+ # Ensure tokens are in the right shape for decoding
105
+ if tokens.ndim == 1:
106
+ # Reshape to match expected format [batch, seq_len, features]
107
+ tokens = tokens.reshape(1, -1, 1)
108
+
109
+ # Convert back to tensor for decoding
110
+ tokens_tensor = torch.tensor(tokens)
111
+
112
+ # Ensure tokens are on the right device
113
+ if torch.cuda.is_available():
114
+ tokens_tensor = tokens_tensor.cuda()
115
+
116
+ # Decode
117
+ waveform = semanticodec.decode(tokens_tensor)
118
+ if isinstance(waveform, torch.Tensor):
119
+ waveform = waveform.cpu().numpy()
120
+
121
+ # Create in-memory file
122
+ output_buffer = io.BytesIO()
123
+ sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
124
+ output_buffer.seek(0)
125
+
126
+ # Verify the buffer has content
127
+ if output_buffer.getbuffer().nbytes == 0:
128
+ return None, "Error: Failed to generate audio"
129
+
130
+ return output_buffer, f"Encoded to {tokens.shape[1]} tokens\nDecoded {tokens.shape[1]} tokens to audio"
131
+ except Exception as e:
132
+ return None, f"Error processing audio: {str(e)}"
133
 
134
  # Create Gradio interface
135
  with gr.Blocks(title="Oterin Audio Codec") as demo: