owiedotch commited on
Commit
85dc4b0
·
verified ·
1 Parent(s): 8ad4b15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -18
app.py CHANGED
@@ -17,31 +17,34 @@ semanticodec = load_model()
17
 
18
  @spaces.GPU(duration=20)
19
  def encode_audio(audio_path):
20
- """Encode audio file to tokens and save them"""
21
  tokens = semanticodec.encode(audio_path)
22
  # Move tokens to CPU before converting to numpy
23
  if isinstance(tokens, torch.Tensor):
24
  tokens = tokens.cpu().numpy()
25
 
26
- # Save to a temporary file
27
- temp_file = tempfile.NamedTemporaryFile(suffix='.oterin', delete=False)
28
- temp_file.close() # Close the file before writing to it
29
- np.save(temp_file.name, tokens)
30
 
31
- # Ensure the file exists and has content
32
- if os.path.exists(temp_file.name) and os.path.getsize(temp_file.name) > 0:
33
- return temp_file.name, f"Encoded to {len(tokens)} tokens"
34
- else:
35
- raise Exception("Failed to create token file")
36
 
37
  @spaces.GPU(duration=60)
38
- def decode_tokens(token_path):
39
  """Decode tokens to audio"""
40
- # Ensure the file exists and has content
41
- if not os.path.exists(token_path) or os.path.getsize(token_path) == 0:
42
- return None, "Error: Empty or missing token file"
 
 
 
 
43
 
44
- tokens = np.load(token_path)
45
  # Convert to torch tensor if needed by the model
46
  if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
47
  tokens = torch.tensor(tokens)
@@ -50,7 +53,7 @@ def decode_tokens(token_path):
50
  if isinstance(waveform, torch.Tensor):
51
  waveform = waveform.cpu().numpy()
52
 
53
- # Create in-memory file
54
  output_buffer = io.BytesIO()
55
  sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
56
  output_buffer.seek(0)
@@ -95,14 +98,14 @@ with gr.Blocks(title="Oterin Audio Codec") as demo:
95
  with gr.Tab("Encode Audio"):
96
  with gr.Row():
97
  encode_input = gr.Audio(type="filepath", label="Input Audio")
98
- encode_output = gr.File(label="Encoded Tokens (.oterin)")
99
  encode_status = gr.Textbox(label="Status")
100
  encode_btn = gr.Button("Encode")
101
  encode_btn.click(encode_audio, inputs=encode_input, outputs=[encode_output, encode_status])
102
 
103
  with gr.Tab("Decode Tokens"):
104
  with gr.Row():
105
- decode_input = gr.File(label="Token File (.oterin)")
106
  decode_output = gr.Audio(label="Decoded Audio")
107
  decode_status = gr.Textbox(label="Status")
108
  decode_btn = gr.Button("Decode")
 
17
 
18
  @spaces.GPU(duration=20)
19
  def encode_audio(audio_path):
20
+ """Encode audio file to tokens and return them as a binary buffer"""
21
  tokens = semanticodec.encode(audio_path)
22
  # Move tokens to CPU before converting to numpy
23
  if isinstance(tokens, torch.Tensor):
24
  tokens = tokens.cpu().numpy()
25
 
26
+ # Save to a BytesIO buffer
27
+ buffer = io.BytesIO()
28
+ np.save(buffer, tokens)
29
+ buffer.seek(0)
30
 
31
+ # Verify the buffer has content
32
+ if buffer.getbuffer().nbytes == 0:
33
+ raise Exception("Failed to create token buffer")
34
+
35
+ return buffer, f"Encoded to {len(tokens)} tokens"
36
 
37
  @spaces.GPU(duration=60)
38
+ def decode_tokens(token_buffer):
39
  """Decode tokens to audio"""
40
+ # Ensure the buffer has content
41
+ if not token_buffer or token_buffer.getbuffer().nbytes == 0:
42
+ return None, "Error: Empty token buffer"
43
+
44
+ # Reset buffer position to start
45
+ token_buffer.seek(0)
46
+ tokens = np.load(token_buffer)
47
 
 
48
  # Convert to torch tensor if needed by the model
49
  if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
50
  tokens = torch.tensor(tokens)
 
53
  if isinstance(waveform, torch.Tensor):
54
  waveform = waveform.cpu().numpy()
55
 
56
+ # Create in-memory file for audio
57
  output_buffer = io.BytesIO()
58
  sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
59
  output_buffer.seek(0)
 
98
  with gr.Tab("Encode Audio"):
99
  with gr.Row():
100
  encode_input = gr.Audio(type="filepath", label="Input Audio")
101
+ encode_output = gr.File(label="Encoded Tokens (.oterin)", file_types=[".oterin"])
102
  encode_status = gr.Textbox(label="Status")
103
  encode_btn = gr.Button("Encode")
104
  encode_btn.click(encode_audio, inputs=encode_input, outputs=[encode_output, encode_status])
105
 
106
  with gr.Tab("Decode Tokens"):
107
  with gr.Row():
108
+ decode_input = gr.File(label="Token File (.oterin)", file_types=[".oterin"])
109
  decode_output = gr.Audio(label="Decoded Audio")
110
  decode_status = gr.Textbox(label="Status")
111
  decode_btn = gr.Button("Decode")