Spaces:

ernestchu
/

tsm-net

Sleeping

ernestchu commited on Jun 29, 2023

Commit

f6c8d4d

1 Parent(s): b6ef12a

update

Files changed (4) hide show

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import torch
 import torchaudio
 model_root = './weights'
-available_models = ['general', 'pop-music', 'classical-music', 'speech']
 working_sr = 22050
 def prepare_models():
@@ -28,19 +28,17 @@ def prepare_audio_file(rec, audio_file, yt_url):
 def run(rec, audio_file, yt_url, speed, model, start_time, end_time):
     audio_file = prepare_audio_file(rec, audio_file, yt_url)
-    if speed == 1:
-        return processing_utils.audio_from_file(audio_file)
-    model = models[model]
     x, sr = torchaudio.load(audio_file)
     x = torchaudio.transforms.Resample(orig_freq=sr, new_freq=working_sr)(x)
     sr = working_sr
-    x = model(x, speed).cpu()
-    torchaudio.save(audio_file, x, sr)
     return processing_utils.audio_from_file(audio_file)
@@ -66,11 +64,11 @@ with gr.Blocks() as demo:
             speed_box = gr.Slider(label='Playback speed', minimum=0, maximum=2, value=1)
             with gr.Accordion('Fine-grained settings', open=False):
-                with gr.Row():
-                    gr.Textbox(label='', value='Trim audio sample', interactive=False)
-                    start_time_box = gr.Number(label='Start', value=0)
-                    end_time_box = gr.Number(label='End', value=20)
                 model_box = gr.Dropdown(label='Model weight', choices=available_models, value=available_models[0])
             submit_btn = gr.Button('Submit')

 import torchaudio
 model_root = './weights'
+available_models = ['speech', 'pop-music', 'classical-music']
 working_sr = 22050
 def prepare_models():
 def run(rec, audio_file, yt_url, speed, model, start_time, end_time):
     audio_file = prepare_audio_file(rec, audio_file, yt_url)
     x, sr = torchaudio.load(audio_file)
     x = torchaudio.transforms.Resample(orig_freq=sr, new_freq=working_sr)(x)
     sr = working_sr
+    x = x[:, int(start_time * sr):int(end_time * sr)]
+    if speed != 1:
+        x = models[model](x, speed).cpu()
+    torchaudio.save(audio_file, x, sr)
     return processing_utils.audio_from_file(audio_file)
             speed_box = gr.Slider(label='Playback speed', minimum=0, maximum=2, value=1)
             with gr.Accordion('Fine-grained settings', open=False):
+                with gr.Tab('Trim audio sample (sec)'):
+                    # gr.Markdown('### Trim audio sample (sec)')
+                    with gr.Row():
+                        start_time_box = gr.Number(label='Start', value=0)
+                        end_time_box = gr.Number(label='End', value=20)
                 model_box = gr.Dropdown(label='Model weight', choices=available_models, value=available_models[0])
             submit_btn = gr.Button('Submit')

tsmnet/interface.py CHANGED Viewed

@@ -68,7 +68,7 @@ class Stretcher:
         self.neuralgram = Neuralgram(path, device)
     @torch.no_grad()
-    def __call__(self, audio, rate , interpolation=InterpolationMode.NEAREST): # NEAREST | BILINEAR | BICUBIC
         if rate == 1:
             return audio.numpy() if isinstance(audio, torch.Tensor) else audio
         neu = self.neuralgram(audio)

         self.neuralgram = Neuralgram(path, device)
     @torch.no_grad()
+    def __call__(self, audio, rate , interpolation=InterpolationMode.BICUBIC): # NEAREST | BILINEAR | BICUBIC
         if rate == 1:
             return audio.numpy() if isinstance(audio, torch.Tensor) else audio
         neu = self.neuralgram(audio)

weights/general.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e70b0ca672ab2008da3517ae3eb524135a1ef5685d59cc034084316a665f69f6
-size 100400920

weights/speech.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e29674ce2312e1ba8f9071348de84031e8afbb08412cbc8088b7365f2162f497
-size 100400879

 version https://git-lfs.github.com/spec/v1
+oid sha256:f3010d34e0d538ecb4c63c8bc89ad4023630dc36e2746bb71b799026d2b03ad4
+size 100400898