Spaces:

owiedotch
/

oac

Sleeping

owiedotch commited on Feb 27

Commit

055ea67

verified ·

1 Parent(s): f28e066

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -104,16 +104,14 @@ def decode_tokens(token_file):
         if isinstance(waveform, torch.Tensor):
             waveform = waveform.cpu().numpy()
-        # Create in-memory file for audio
-        output_buffer = io.BytesIO()
-        sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
-        output_buffer.seek(0)
-        # Verify the buffer has content
-        if output_buffer.getbuffer().nbytes == 0:
-            return None, "Error: Failed to generate audio"
-        return output_buffer, f"Decoded {tokens.shape[1]} tokens to audio"
     except Exception as e:
         print(f"Decoding error: {str(e)}")
         return None, f"Error decoding tokens: {str(e)}"
@@ -155,16 +153,14 @@ def process_both(audio_path):
         if isinstance(waveform, torch.Tensor):
             waveform = waveform.cpu().numpy()
-        # Create in-memory file
-        output_buffer = io.BytesIO()
-        sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
-        output_buffer.seek(0)
-        # Verify the buffer has content
-        if output_buffer.getbuffer().nbytes == 0:
-            return None, "Error: Failed to generate audio"
-        return output_buffer, f"Encoded to {tokens.shape[1]} tokens\nDecoded {tokens.shape[1]} tokens to audio"
     except Exception as e:
         print(f"Processing error: {str(e)}")
         return None, f"Error processing audio: {str(e)}"

         if isinstance(waveform, torch.Tensor):
             waveform = waveform.cpu().numpy()
+        # Extract audio data - this should be a numpy array
+        audio_data = waveform[0, 0]  # Shape should be [time]
+        sample_rate = 32000
+        print(f"Audio data shape: {audio_data.shape}, dtype: {audio_data.dtype}")
+        # Return in Gradio Audio compatible format: (sample_rate, audio_data)
+        return (sample_rate, audio_data), f"Decoded {tokens.shape[1]} tokens to audio"
     except Exception as e:
         print(f"Decoding error: {str(e)}")
         return None, f"Error decoding tokens: {str(e)}"
         if isinstance(waveform, torch.Tensor):
             waveform = waveform.cpu().numpy()
+        # Extract audio data - this should be a numpy array
+        audio_data = waveform[0, 0]  # Shape should be [time]
+        sample_rate = 32000
+        print(f"Audio data shape: {audio_data.shape}, dtype: {audio_data.dtype}")
+        # Return in Gradio Audio compatible format: (sample_rate, audio_data)
+        return (sample_rate, audio_data), f"Encoded to {tokens.shape[1]} tokens\nDecoded {tokens.shape[1]} tokens to audio"
     except Exception as e:
         print(f"Processing error: {str(e)}")
         return None, f"Error processing audio: {str(e)}"