owiedotch commited on
Commit
e173c02
·
verified ·
1 Parent(s): 5c44be0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -11
app.py CHANGED
@@ -6,6 +6,8 @@ from semanticodec import SemantiCodec
6
  from huggingface_hub import HfApi
7
  import spaces
8
  import torch
 
 
9
 
10
  # Initialize the model
11
  def load_model():
@@ -13,15 +15,19 @@ def load_model():
13
 
14
  semanticodec = load_model()
15
 
16
- @spaces.GPU(duration=60)
17
  def encode_audio(audio_path):
18
  """Encode audio file to tokens and save them"""
19
  tokens = semanticodec.encode(audio_path)
20
  # Move tokens to CPU before converting to numpy
21
  if isinstance(tokens, torch.Tensor):
22
  tokens = tokens.cpu().numpy()
23
- token_path = "encoded_audio.oterin"
24
- np.save(token_path, tokens)
 
 
 
 
25
  return token_path, f"Encoded to {len(tokens)} tokens"
26
 
27
  @spaces.GPU(duration=60)
@@ -35,15 +41,35 @@ def decode_tokens(token_path):
35
  # Move waveform to CPU if it's a tensor
36
  if isinstance(waveform, torch.Tensor):
37
  waveform = waveform.cpu().numpy()
38
- output_path = "output.wav"
39
- sf.write(output_path, waveform[0, 0], 32000)
40
- return output_path, f"Decoded {len(tokens)} tokens to audio"
 
 
 
 
41
 
 
42
  def process_both(audio_path):
43
- """Encode and then decode the audio"""
44
- token_path, encode_msg = encode_audio(audio_path)
45
- output_path, decode_msg = decode_tokens(token_path)
46
- return output_path, f"{encode_msg}\n{decode_msg}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  # Create Gradio interface
49
  with gr.Blocks(title="Oterin Audio Codec") as demo:
@@ -75,4 +101,4 @@ with gr.Blocks(title="Oterin Audio Codec") as demo:
75
  both_btn.click(process_both, inputs=both_input, outputs=[both_output, both_status])
76
 
77
  if __name__ == "__main__":
78
- demo.launch()
 
6
  from huggingface_hub import HfApi
7
  import spaces
8
  import torch
9
+ import tempfile
10
+ import io
11
 
12
  # Initialize the model
13
  def load_model():
 
15
 
16
  semanticodec = load_model()
17
 
18
+ @spaces.GPU(duration=20)
19
  def encode_audio(audio_path):
20
  """Encode audio file to tokens and save them"""
21
  tokens = semanticodec.encode(audio_path)
22
  # Move tokens to CPU before converting to numpy
23
  if isinstance(tokens, torch.Tensor):
24
  tokens = tokens.cpu().numpy()
25
+
26
+ # Save to a temporary file
27
+ with tempfile.NamedTemporaryFile(suffix='.oterin', delete=False) as tmp_file:
28
+ np.save(tmp_file.name, tokens)
29
+ token_path = tmp_file.name
30
+
31
  return token_path, f"Encoded to {len(tokens)} tokens"
32
 
33
  @spaces.GPU(duration=60)
 
41
  # Move waveform to CPU if it's a tensor
42
  if isinstance(waveform, torch.Tensor):
43
  waveform = waveform.cpu().numpy()
44
+
45
+ # Create in-memory file
46
+ output_buffer = io.BytesIO()
47
+ sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
48
+ output_buffer.seek(0)
49
+
50
+ return output_buffer, f"Decoded {len(tokens)} tokens to audio"
51
 
52
+ @spaces.GPU(duration=80)
53
  def process_both(audio_path):
54
+ """Encode and then decode the audio without saving intermediate files"""
55
+ # Encode
56
+ tokens = semanticodec.encode(audio_path)
57
+ if isinstance(tokens, torch.Tensor):
58
+ tokens = tokens.cpu().numpy()
59
+
60
+ # Decode directly
61
+ if hasattr(semanticodec, 'decode_requires_tensor') and semanticodec.decode_requires_tensor:
62
+ tokens = torch.tensor(tokens)
63
+ waveform = semanticodec.decode(tokens)
64
+ if isinstance(waveform, torch.Tensor):
65
+ waveform = waveform.cpu().numpy()
66
+
67
+ # Create in-memory file
68
+ output_buffer = io.BytesIO()
69
+ sf.write(output_buffer, waveform[0, 0], 32000, format='WAV')
70
+ output_buffer.seek(0)
71
+
72
+ return output_buffer, f"Encoded to {len(tokens)} tokens\nDecoded {len(tokens)} tokens to audio"
73
 
74
  # Create Gradio interface
75
  with gr.Blocks(title="Oterin Audio Codec") as demo:
 
101
  both_btn.click(process_both, inputs=both_input, outputs=[both_output, both_status])
102
 
103
  if __name__ == "__main__":
104
+ demo.launch(share=True)