import os import random import numpy as np import soundfile as sf import torch from cog import BasePredictor, Input, Path from audiosr import build_model, super_resolution os.environ["TOKENIZERS_PARALLELISM"] = "true" torch.set_float32_matmul_precision("high") class Predictor(BasePredictor): def setup(self, model_name="basic", device="auto"): self.model_name = model_name self.device = device self.sr = 48000 self.audiosr = build_model(model_name=self.model_name, device=self.device) def predict(self, input_file: Path = Input(description="Audio to upsample"), ddim_steps: int = Input(description="Number of inference steps", default=50, ge=10, le=500), guidance_scale: float = Input(description="Scale for classifier free guidance", default=3.5, ge=1.0, le=20.0), seed: int = Input(description="Random seed. Leave blank to randomize the seed", default=None) ) -> Path: """Run a single prediction on the model""" if seed is None: seed = random.randint(0, 2**32 - 1) print(f"Setting seed to: {seed}") waveform = super_resolution( self.audiosr, input_file, seed=seed, guidance_scale=guidance_scale, ddim_steps=ddim_steps, latent_t_per_second=12.8 ) out_wav = (waveform[0] * 32767).astype(np.int16).T sf.write("out.wav", data=out_wav, samplerate=48000) return Path("out.wav") if __name__ == "__main__": p = Predictor() p.setup() out = p.predict( "example/music.wav", ddim_steps=50, guidance_scale=3.5, seed=42 )