|
import os |
|
import random |
|
|
|
import numpy as np |
|
import soundfile as sf |
|
import torch |
|
from cog import BasePredictor, Input, Path |
|
|
|
from audiosr import build_model, super_resolution |
|
|
|
os.environ["TOKENIZERS_PARALLELISM"] = "true" |
|
torch.set_float32_matmul_precision("high") |
|
|
|
class Predictor(BasePredictor): |
|
def setup(self, model_name="basic", device="auto"): |
|
self.model_name = model_name |
|
self.device = device |
|
self.sr = 48000 |
|
self.audiosr = build_model(model_name=self.model_name, device=self.device) |
|
|
|
def predict(self, |
|
input_file: Path = Input(description="Audio to upsample"), |
|
ddim_steps: int = Input(description="Number of inference steps", default=50, ge=10, le=500), |
|
guidance_scale: float = Input(description="Scale for classifier free guidance", default=3.5, ge=1.0, le=20.0), |
|
seed: int = Input(description="Random seed. Leave blank to randomize the seed", default=None) |
|
) -> Path: |
|
"""Run a single prediction on the model""" |
|
if seed is None: |
|
seed = random.randint(0, 2**32 - 1) |
|
print(f"Setting seed to: {seed}") |
|
|
|
waveform = super_resolution( |
|
self.audiosr, |
|
input_file, |
|
seed=seed, |
|
guidance_scale=guidance_scale, |
|
ddim_steps=ddim_steps, |
|
latent_t_per_second=12.8 |
|
) |
|
out_wav = (waveform[0] * 32767).astype(np.int16).T |
|
sf.write("out.wav", data=out_wav, samplerate=48000) |
|
return Path("out.wav") |
|
|
|
|
|
if __name__ == "__main__": |
|
p = Predictor() |
|
p.setup() |
|
out = p.predict( |
|
"example/music.wav", |
|
ddim_steps=50, |
|
guidance_scale=3.5, |
|
seed=42 |
|
) |