File size: 2,775 Bytes
2de0cdd
94b63de
2de0cdd
 
 
 
693611a
 
 
94b63de
693611a
 
2ee115a
65afc75
 
 
 
94b63de
2ee115a
65afc75
693611a
2ee115a
aa1731e
a3b14a2
2ee115a
693611a
 
 
 
 
65afc75
 
693611a
 
 
 
94b63de
65afc75
 
94b63de
693611a
a3b14a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94b63de
 
 
 
693611a
94b63de
693611a
 
 
94b63de
693611a
94b63de
 
 
693611a
 
 
65afc75
94b63de
 
65afc75
2de0cdd
 
65afc75
94b63de
693611a
 
2de0cdd
 
 
a9abbb0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
try:
    import spaces  # Para ambientes específicos como Hugging Face Spaces
    USING_SPACES = True
except ImportError:
    USING_SPACES = False

import gradio as gr
import torch
import torchaudio
from functools import partial
from resemble_enhance.enhancer.inference import denoise, enhance

def gpu_decorator(func):
    if USING_SPACES:
        return spaces.GPU(func)
    else:
        return func

@gpu_decorator
def _fn(path, solver, nfe, tau, denoising, unlimited):
    if path is None:
        return "Please upload an audio file.", None
    info = torchaudio.info(path)
    if not unlimited and (info.num_frames / info.sample_rate > 60):
        return "Only audio files shorter than 60 seconds are supported.", None
    solver = solver.lower()
    nfe = int(nfe)
    lambd = 0.9 if denoising else 0.1
    dwav, sr = torchaudio.load(path)
    dwav = dwav.mean(dim=0)
    wav1, new_sr = denoise(dwav, sr)  # Remove o argumento device
    wav2, new_sr = enhance(dwav, sr, nfe=nfe, solver=solver, lambd=lambd, tau=tau)  # Remove o argumento device
    wav1 = wav1.cpu().numpy()
    wav2 = wav2.cpu().numpy()
    return (new_sr, wav1), (new_sr, wav2)



with gr.Blocks() as app:
    inputs = [
        gr.Audio(type="filepath", label="Input Audio"),
        gr.Dropdown(
            choices=["Midpoint", "RK4", "Euler"],
            value="Midpoint",
            label="CFM ODE Solver (Midpoint is recommended)",
        ),
        gr.Slider(
            minimum=1,
            maximum=128,
            value=64,
            step=1,
            label="CFM Number of Function Evaluations (higher values in general yield better quality but may be slower)",
        ),
        gr.Slider(
            minimum=0,
            maximum=1,
            value=0.5,
            step=0.01,
            label="CFM Prior Temperature (higher values can improve quality but can reduce stability)",
        ),
        gr.Checkbox(
            value=False,
            label="Denoise Before Enhancement (tick if your audio contains heavy background noise)",
        ),
        gr.Checkbox(
            value=False,
            label="Allow Unlimited Audio Length (supports files longer than 60 seconds)",
        ),
    ]
    outputs = [
        gr.Audio(label="Output Denoised Audio"),
        gr.Audio(label="Output Enhanced Audio"),
    ]
    # Interface Gradio
    interface = gr.Interface(
        fn=_fn,
        title="Audio Enhancement",
        description="AI-driven audio enhancement powered by Resemble AI.",
        inputs=inputs,
        outputs=outputs,
    )
    app = interface.queue()


def main():
    global app
    print("Starting app...")
    app.launch(share=True)


if __name__ == "__main__":
    if not USING_SPACES:
        main()
    else:
        app.launch()