M4xjunior commited on
Commit
061a1dd
·
verified ·
1 Parent(s): 0c340f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -49
app.py CHANGED
@@ -16,80 +16,122 @@ def gpu_decorator(func):
16
  else:
17
  return func
18
 
 
19
  @gpu_decorator
20
- def _fn(path, solver, nfe, tau, denoising, unlimited):
 
 
21
  if path is None:
22
  return "Please upload an audio file.", None
 
23
  info = torchaudio.info(path)
24
  if not unlimited and (info.num_frames / info.sample_rate > 60):
25
  return "Only audio files shorter than 60 seconds are supported.", None
 
26
  solver = solver.lower()
27
  nfe = int(nfe)
28
  lambd = 0.9 if denoising else 0.1
 
29
  dwav, sr = torchaudio.load(path)
30
  dwav = dwav.mean(dim=0)
31
- wav1, new_sr = denoise(dwav, sr) # Remove o argumento device
32
- wav2, new_sr = enhance(dwav, sr, nfe=nfe, solver=solver, lambd=lambd, tau=tau) # Remove o argumento device
 
 
 
 
 
 
 
33
  wav1 = wav1.cpu().numpy()
34
  wav2 = wav2.cpu().numpy()
 
35
  return (new_sr, wav1), (new_sr, wav2)
36
 
37
 
38
-
39
  with gr.Blocks() as app:
40
- inputs = [
41
- gr.Audio(type="filepath", label="Input Audio"),
42
- gr.Dropdown(
43
- choices=["Midpoint", "RK4", "Euler"],
44
- value="Midpoint",
45
- label="CFM ODE Solver (Midpoint is recommended)",
46
- ),
47
- gr.Slider(
48
- minimum=1,
49
- maximum=128,
50
- value=64,
51
- step=1,
52
- label="CFM Number of Function Evaluations (higher values in general yield better quality but may be slower)",
53
- ),
54
- gr.Slider(
55
- minimum=0,
56
- maximum=1,
57
- value=0.5,
58
- step=0.01,
59
- label="CFM Prior Temperature (higher values can improve quality but can reduce stability)",
60
- ),
61
- gr.Checkbox(
62
- value=False,
63
- label="Denoise Before Enhancement (tick if your audio contains heavy background noise)",
64
- ),
65
- gr.Checkbox(
66
- value=False,
67
- label="Allow Unlimited Audio Length (supports files longer than 60 seconds)",
68
- ),
69
- ]
70
- outputs = [
71
- gr.Audio(label="Output Denoised Audio"),
72
- gr.Audio(label="Output Enhanced Audio"),
73
- ]
74
- # Interface Gradio
75
- interface = gr.Interface(
76
- fn=_fn,
77
- title="Audio Enhancement",
78
- description="AI-driven audio enhancement powered by Resemble AI.",
79
- inputs=inputs,
80
- outputs=outputs,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  )
82
- app = interface.queue()
83
 
84
 
85
- def main():
 
 
 
 
 
 
 
 
 
 
 
86
  global app
87
  print("Starting app...")
88
- app.launch(share=True)
89
 
90
 
91
  if __name__ == "__main__":
92
  if not USING_SPACES:
93
  main()
94
  else:
95
- app.launch()
 
16
  else:
17
  return func
18
 
19
+
20
  @gpu_decorator
21
+ def enhance_audio(
22
+ path, solver, nfe, tau, denoising, unlimited, show_info=gr.Info
23
+ ):
24
  if path is None:
25
  return "Please upload an audio file.", None
26
+
27
  info = torchaudio.info(path)
28
  if not unlimited and (info.num_frames / info.sample_rate > 60):
29
  return "Only audio files shorter than 60 seconds are supported.", None
30
+
31
  solver = solver.lower()
32
  nfe = int(nfe)
33
  lambd = 0.9 if denoising else 0.1
34
+
35
  dwav, sr = torchaudio.load(path)
36
  dwav = dwav.mean(dim=0)
37
+
38
+ if show_info:
39
+ show_info("Denoising audio...")
40
+ wav1, new_sr = denoise(dwav, sr)
41
+
42
+ if show_info:
43
+ show_info("Enhancing audio...")
44
+ wav2, new_sr = enhance(dwav, sr, nfe=nfe, solver=solver, lambd=lambd, tau=tau)
45
+
46
  wav1 = wav1.cpu().numpy()
47
  wav2 = wav2.cpu().numpy()
48
+
49
  return (new_sr, wav1), (new_sr, wav2)
50
 
51
 
 
52
  with gr.Blocks() as app:
53
+ gr.Markdown("# Audio Enhancement with Resemble AI")
54
+
55
+ with gr.Row():
56
+ with gr.Column():
57
+ # Entradas
58
+ audio_input = gr.Audio(type="filepath", label="Input Audio")
59
+ solver_dropdown = gr.Dropdown(
60
+ choices=["Midpoint", "RK4", "Euler"],
61
+ value="Midpoint",
62
+ label="CFM ODE Solver",
63
+ info="Midpoint is recommended",
64
+ )
65
+ nfe_slider = gr.Slider(
66
+ minimum=1,
67
+ maximum=128,
68
+ value=64,
69
+ step=1,
70
+ label="CFM Number of Function Evaluations",
71
+ info="Higher values generally yield better quality but may be slower",
72
+ )
73
+ tau_slider = gr.Slider(
74
+ minimum=0,
75
+ maximum=1,
76
+ value=0.5,
77
+ step=0.01,
78
+ label="CFM Prior Temperature",
79
+ info="Higher values can improve quality but can reduce stability",
80
+ )
81
+ denoising_checkbox = gr.Checkbox(
82
+ value=False,
83
+ label="Denoise Before Enhancement",
84
+ info="Tick if your audio contains heavy background noise",
85
+ )
86
+ unlimited_checkbox = gr.Checkbox(
87
+ value=False,
88
+ label="Allow Unlimited Audio Length",
89
+ info="Supports files longer than 60 seconds",
90
+ )
91
+ enhance_btn = gr.Button("Enhance Audio", variant="primary")
92
+
93
+ with gr.Column():
94
+ # Saídas
95
+ denoised_audio_output = gr.Audio(label="Output Denoised Audio")
96
+ enhanced_audio_output = gr.Audio(label="Output Enhanced Audio")
97
+
98
+ # Associação do botão `enhance_btn` à função `enhance_audio`
99
+ enhance_btn.click(
100
+ enhance_audio,
101
+ inputs=[
102
+ audio_input,
103
+ solver_dropdown,
104
+ nfe_slider,
105
+ tau_slider,
106
+ denoising_checkbox,
107
+ unlimited_checkbox,
108
+ ],
109
+ outputs=[
110
+ denoised_audio_output,
111
+ enhanced_audio_output,
112
+ ],
113
  )
 
114
 
115
 
116
+ @click.command()
117
+ @click.option("--port", "-p", default=None, type=int, help="Port to run the app on")
118
+ @click.option("--host", "-H", default=None, help="Host to run the app on")
119
+ @click.option(
120
+ "--share",
121
+ "-s",
122
+ default=False,
123
+ is_flag=True,
124
+ help="Share the app via Gradio share link",
125
+ )
126
+ @click.option("--api", "-a", default=True, is_flag=True, help="Allow API access")
127
+ def main(port, host, share, api):
128
  global app
129
  print("Starting app...")
130
+ app.queue(api_open=api).launch(server_name=host, server_port=port, share=share, show_api=api)
131
 
132
 
133
  if __name__ == "__main__":
134
  if not USING_SPACES:
135
  main()
136
  else:
137
+ app.queue().launch()