owiedotch commited on
Commit
a32055a
·
verified ·
1 Parent(s): 9e8dfb9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -19
app.py CHANGED
@@ -11,9 +11,10 @@ from typing import Generator
11
  import asyncio # Import asyncio for cancellation
12
  import traceback # Import traceback for error handling
13
  import pickle
 
14
 
15
- # Load the SemantiCodec model without specifying a device
16
- semanticodec = SemantiCodec(token_rate=100, semantic_vocab_size=16384)
17
 
18
  # Move the entire model to GPU if available
19
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -27,27 +28,18 @@ cancel_stream = False
27
 
28
  @spaces.GPU(duration=30)
29
  def encode_audio(filepath):
30
- """Encode audio file and save tokens"""
31
  try:
32
- # Encode the audio
33
  tokens = semanticodec.encode(filepath)
 
34
 
35
- # Get the original audio's sample rate
36
- waveform, sample_rate = torchaudio.load(filepath)
37
-
38
- # Save tokens and sample rate
39
- data = {
40
- 'tokens': tokens.cpu().detach(),
41
- 'sample_rate': sample_rate
42
- }
43
-
44
- with open('encoded.pkl', 'wb') as f:
45
- pickle.dump(data, f)
46
-
47
- return 'encoded.pkl'
48
 
49
  except Exception as e:
50
- print(f"Encoding error: {e}")
51
  traceback.print_exc()
52
  return None
53
 
@@ -173,4 +165,8 @@ with gr.Blocks() as demo:
173
  stream_button.click(stream_decode_audio, inputs=input_encoded_stream, outputs=audio_output)
174
  cancel_stream_button.click(lambda: globals().update(cancel_stream=True), outputs=None)
175
 
176
- demo.queue().launch()
 
 
 
 
 
11
  import asyncio # Import asyncio for cancellation
12
  import traceback # Import traceback for error handling
13
  import pickle
14
+ import soundfile as sf
15
 
16
+ # Initialize model with the specified parameters
17
+ semanticodec = SemantiCodec(token_rate=100, semantic_vocab_size=32768) # 1.40 kbps
18
 
19
  # Move the entire model to GPU if available
20
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
28
 
29
  @spaces.GPU(duration=30)
30
  def encode_audio(filepath):
31
+ """Encode and decode audio file"""
32
  try:
33
+ # Encode and decode directly as in the example
34
  tokens = semanticodec.encode(filepath)
35
+ waveform = semanticodec.decode(tokens)
36
 
37
+ # Save using soundfile
38
+ sf.write("output.wav", waveform[0,0], 16000)
39
+ return "output.wav"
 
 
 
 
 
 
 
 
 
 
40
 
41
  except Exception as e:
42
+ print(f"Error: {e}")
43
  traceback.print_exc()
44
  return None
45
 
 
165
  stream_button.click(stream_decode_audio, inputs=input_encoded_stream, outputs=audio_output)
166
  cancel_stream_button.click(lambda: globals().update(cancel_stream=True), outputs=None)
167
 
168
+ demo.queue().launch()
169
+
170
+ if __name__ == "__main__":
171
+ filepath = "test/test.wav"
172
+ encode_audio(filepath)