ernestchu
commited on
Commit
·
f6c8d4d
1
Parent(s):
b6ef12a
update
Browse files- app.py +10 -12
- tsmnet/interface.py +1 -1
- weights/general.pt +0 -3
- weights/speech.pt +2 -2
app.py
CHANGED
@@ -6,7 +6,7 @@ import torch
|
|
6 |
import torchaudio
|
7 |
|
8 |
model_root = './weights'
|
9 |
-
available_models = ['
|
10 |
working_sr = 22050
|
11 |
|
12 |
def prepare_models():
|
@@ -28,19 +28,17 @@ def prepare_audio_file(rec, audio_file, yt_url):
|
|
28 |
|
29 |
def run(rec, audio_file, yt_url, speed, model, start_time, end_time):
|
30 |
audio_file = prepare_audio_file(rec, audio_file, yt_url)
|
31 |
-
if speed == 1:
|
32 |
-
return processing_utils.audio_from_file(audio_file)
|
33 |
-
|
34 |
-
model = models[model]
|
35 |
|
36 |
x, sr = torchaudio.load(audio_file)
|
37 |
x = torchaudio.transforms.Resample(orig_freq=sr, new_freq=working_sr)(x)
|
38 |
sr = working_sr
|
39 |
|
40 |
-
x =
|
41 |
|
42 |
-
|
|
|
43 |
|
|
|
44 |
return processing_utils.audio_from_file(audio_file)
|
45 |
|
46 |
|
@@ -66,11 +64,11 @@ with gr.Blocks() as demo:
|
|
66 |
|
67 |
speed_box = gr.Slider(label='Playback speed', minimum=0, maximum=2, value=1)
|
68 |
with gr.Accordion('Fine-grained settings', open=False):
|
69 |
-
with gr.
|
70 |
-
gr.
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
model_box = gr.Dropdown(label='Model weight', choices=available_models, value=available_models[0])
|
75 |
|
76 |
submit_btn = gr.Button('Submit')
|
|
|
6 |
import torchaudio
|
7 |
|
8 |
model_root = './weights'
|
9 |
+
available_models = ['speech', 'pop-music', 'classical-music']
|
10 |
working_sr = 22050
|
11 |
|
12 |
def prepare_models():
|
|
|
28 |
|
29 |
def run(rec, audio_file, yt_url, speed, model, start_time, end_time):
|
30 |
audio_file = prepare_audio_file(rec, audio_file, yt_url)
|
|
|
|
|
|
|
|
|
31 |
|
32 |
x, sr = torchaudio.load(audio_file)
|
33 |
x = torchaudio.transforms.Resample(orig_freq=sr, new_freq=working_sr)(x)
|
34 |
sr = working_sr
|
35 |
|
36 |
+
x = x[:, int(start_time * sr):int(end_time * sr)]
|
37 |
|
38 |
+
if speed != 1:
|
39 |
+
x = models[model](x, speed).cpu()
|
40 |
|
41 |
+
torchaudio.save(audio_file, x, sr)
|
42 |
return processing_utils.audio_from_file(audio_file)
|
43 |
|
44 |
|
|
|
64 |
|
65 |
speed_box = gr.Slider(label='Playback speed', minimum=0, maximum=2, value=1)
|
66 |
with gr.Accordion('Fine-grained settings', open=False):
|
67 |
+
with gr.Tab('Trim audio sample (sec)'):
|
68 |
+
# gr.Markdown('### Trim audio sample (sec)')
|
69 |
+
with gr.Row():
|
70 |
+
start_time_box = gr.Number(label='Start', value=0)
|
71 |
+
end_time_box = gr.Number(label='End', value=20)
|
72 |
model_box = gr.Dropdown(label='Model weight', choices=available_models, value=available_models[0])
|
73 |
|
74 |
submit_btn = gr.Button('Submit')
|
tsmnet/interface.py
CHANGED
@@ -68,7 +68,7 @@ class Stretcher:
|
|
68 |
self.neuralgram = Neuralgram(path, device)
|
69 |
|
70 |
@torch.no_grad()
|
71 |
-
def __call__(self, audio, rate , interpolation=InterpolationMode.
|
72 |
if rate == 1:
|
73 |
return audio.numpy() if isinstance(audio, torch.Tensor) else audio
|
74 |
neu = self.neuralgram(audio)
|
|
|
68 |
self.neuralgram = Neuralgram(path, device)
|
69 |
|
70 |
@torch.no_grad()
|
71 |
+
def __call__(self, audio, rate , interpolation=InterpolationMode.BICUBIC): # NEAREST | BILINEAR | BICUBIC
|
72 |
if rate == 1:
|
73 |
return audio.numpy() if isinstance(audio, torch.Tensor) else audio
|
74 |
neu = self.neuralgram(audio)
|
weights/general.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:e70b0ca672ab2008da3517ae3eb524135a1ef5685d59cc034084316a665f69f6
|
3 |
-
size 100400920
|
|
|
|
|
|
|
|
weights/speech.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3010d34e0d538ecb4c63c8bc89ad4023630dc36e2746bb71b799026d2b03ad4
|
3 |
+
size 100400898
|