Spaces:

owiedotch
/

oac

Sleeping

App Files Files Community

owiedotch commited on 27 days ago

Commit

4ad7b57

verified ·

1 Parent(s): 544ae95

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -67

app.py CHANGED Viewed

@@ -20,30 +20,32 @@ semanticodec = load_model()
 @spaces.GPU(duration=20)
 def encode_audio(audio_path):
     """Encode audio file to tokens and return them as a file"""
-    tokens = semanticodec.encode(audio_path)
-    # Move tokens to CPU before converting to numpy
-    if isinstance(tokens, torch.Tensor):
-        tokens = tokens.cpu().numpy()
-    # Save to a BytesIO buffer
-    buffer = io.BytesIO()
-    np.save(buffer, tokens)
-    buffer.seek(0)
-    # Verify the buffer has content
-    if buffer.getbuffer().nbytes == 0:
-        raise Exception("Failed to create token buffer")
-    # Create a temporary file in /tmp which is writable in Spaces
-    temp_dir = "/tmp"
-    os.makedirs(temp_dir, exist_ok=True)
-    temp_file_path = os.path.join(temp_dir, f"tokens_{uuid.uuid4()}.oterin")
-    # Write buffer to the temporary file
-    with open(temp_file_path, "wb") as f:
-        f.write(buffer.getvalue())
-    return temp_file_path, f"Encoded to {len(tokens)} tokens"
 @spaces.GPU(duration=60)
 def decode_tokens(token_file):
@@ -52,53 +54,82 @@ def decode_tokens(token_file):
     if not token_file or not os.path.exists(token_file):
         return None, "Error: Empty or missing token file"
-    # Load tokens from file
-    tokens = np.load(token_file)
-    # Convert to torch tensor if needed by the model
-    if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
-        tokens = torch.tensor(tokens)
-    waveform = semanticodec.decode(tokens)
-    # Move waveform to CPU if it's a tensor
-    if isinstance(waveform, torch.Tensor):
-        waveform = waveform.cpu().numpy()
-    # Create in-memory file for audio
-    output_buffer = io.BytesIO()
-    sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
-    output_buffer.seek(0)
-    # Verify the buffer has content
-    if output_buffer.getbuffer().nbytes == 0:
-        return None, "Error: Failed to generate audio"
-    return output_buffer, f"Decoded {len(tokens)} tokens to audio"
 @spaces.GPU(duration=80)
 def process_both(audio_path):
     """Encode and then decode the audio without saving intermediate files"""
-    # Encode
-    tokens = semanticodec.encode(audio_path)
-    if isinstance(tokens, torch.Tensor):
-        tokens = tokens.cpu().numpy()
-    # Decode directly
-    if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
-        tokens = torch.tensor(tokens)
-    waveform = semanticodec.decode(tokens)
-    if isinstance(waveform, torch.Tensor):
-        waveform = waveform.cpu().numpy()
-    # Create in-memory file
-    output_buffer = io.BytesIO()
-    sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
-    output_buffer.seek(0)
-    # Verify the buffer has content
-    if output_buffer.getbuffer().nbytes == 0:
-        return None, "Error: Failed to generate audio"
-    return output_buffer, f"Encoded to {len(tokens)} tokens\nDecoded {len(tokens)} tokens to audio"
 # Create Gradio interface
 with gr.Blocks(title="Oterin Audio Codec") as demo:

 @spaces.GPU(duration=20)
 def encode_audio(audio_path):
     """Encode audio file to tokens and return them as a file"""
+    try:
+        tokens = semanticodec.encode(audio_path)
+        # Move tokens to CPU before converting to numpy
+        if isinstance(tokens, torch.Tensor):
+            tokens = tokens.cpu().numpy()
+        # Ensure tokens are in the right shape for later decoding
+        if tokens.ndim == 1:
+            # Reshape to match expected format [batch, seq_len, features]
+            tokens = tokens.reshape(1, -1, 1)
+        # Create a temporary file in /tmp which is writable in Spaces
+        temp_dir = "/tmp"
+        os.makedirs(temp_dir, exist_ok=True)
+        temp_file_path = os.path.join(temp_dir, f"tokens_{uuid.uuid4()}.oterin")
+        # Save tokens directly to file
+        np.save(temp_file_path, tokens)
+        # Verify the file exists and has content
+        if not os.path.exists(temp_file_path) or os.path.getsize(temp_file_path) == 0:
+            raise Exception("Failed to create token file")
+        return temp_file_path, f"Encoded to {tokens.shape[1]} tokens"
+    except Exception as e:
+        return None, f"Error encoding audio: {str(e)}"
 @spaces.GPU(duration=60)
 def decode_tokens(token_file):
     if not token_file or not os.path.exists(token_file):
         return None, "Error: Empty or missing token file"
+    try:
+        # Load tokens from file
+        tokens = np.load(token_file, allow_pickle=True)
+        # Convert to torch tensor with proper dimensions
+        if isinstance(tokens, np.ndarray):
+            # Ensure tokens are in the right shape
+            if tokens.ndim == 1:
+                # Reshape to match expected format [batch, seq_len, features]
+                tokens = tokens.reshape(1, -1, 1)
+            # Convert to torch tensor
+            tokens = torch.tensor(tokens)
+        # Ensure tokens are on the right device
+        if torch.cuda.is_available():
+            tokens = tokens.cuda()
+        # Decode the tokens
+        waveform = semanticodec.decode(tokens)
+        # Move waveform to CPU if it's a tensor
+        if isinstance(waveform, torch.Tensor):
+            waveform = waveform.cpu().numpy()
+        # Create in-memory file for audio
+        output_buffer = io.BytesIO()
+        sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
+        output_buffer.seek(0)
+        # Verify the buffer has content
+        if output_buffer.getbuffer().nbytes == 0:
+            return None, "Error: Failed to generate audio"
+        return output_buffer, f"Decoded {tokens.shape[1]} tokens to audio"
+    except Exception as e:
+        return None, f"Error decoding tokens: {str(e)}"
 @spaces.GPU(duration=80)
 def process_both(audio_path):
     """Encode and then decode the audio without saving intermediate files"""
+    try:
+        # Encode
+        tokens = semanticodec.encode(audio_path)
+        if isinstance(tokens, torch.Tensor):
+            tokens = tokens.cpu().numpy()
+        # Ensure tokens are in the right shape for decoding
+        if tokens.ndim == 1:
+            # Reshape to match expected format [batch, seq_len, features]
+            tokens = tokens.reshape(1, -1, 1)
+        # Convert back to tensor for decoding
+        tokens_tensor = torch.tensor(tokens)
+        # Ensure tokens are on the right device
+        if torch.cuda.is_available():
+            tokens_tensor = tokens_tensor.cuda()
+        # Decode
+        waveform = semanticodec.decode(tokens_tensor)
+        if isinstance(waveform, torch.Tensor):
+            waveform = waveform.cpu().numpy()
+        # Create in-memory file
+        output_buffer = io.BytesIO()
+        sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
+        output_buffer.seek(0)
+        # Verify the buffer has content
+        if output_buffer.getbuffer().nbytes == 0:
+            return None, "Error: Failed to generate audio"
+        return output_buffer, f"Encoded to {tokens.shape[1]} tokens\nDecoded {tokens.shape[1]} tokens to audio"
+    except Exception as e:
+        return None, f"Error processing audio: {str(e)}"
 # Create Gradio interface
 with gr.Blocks(title="Oterin Audio Codec") as demo: