teticio commited on
Commit
30b435a
1 Parent(s): d1fdbae

fix imports

Browse files
audiodiffusion/__init__.py CHANGED
@@ -9,7 +9,7 @@ from tqdm.auto import tqdm
9
  # from diffusers import AudioDiffusionPipeline
10
  from .pipeline_audio_diffusion import AudioDiffusionPipeline
11
 
12
- VERSION = "1.4.0"
13
 
14
 
15
  class AudioDiffusion:
 
9
  # from diffusers import AudioDiffusionPipeline
10
  from .pipeline_audio_diffusion import AudioDiffusionPipeline
11
 
12
+ VERSION = "1.4.1"
13
 
14
 
15
  class AudioDiffusion:
audiodiffusion/mel.py CHANGED
@@ -23,8 +23,21 @@ from diffusers.schedulers.scheduling_utils import SchedulerMixin
23
 
24
  warnings.filterwarnings("ignore")
25
 
26
- import librosa # noqa: E402
27
  import numpy as np # noqa: E402
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  from PIL import Image # noqa: E402
29
 
30
 
@@ -61,6 +74,9 @@ class Mel(ConfigMixin, SchedulerMixin):
61
  self.set_resolution(x_res, y_res)
62
  self.audio = None
63
 
 
 
 
64
  def set_resolution(self, x_res: int, y_res: int):
65
  """Set resolution.
66
 
@@ -87,12 +103,7 @@ class Mel(ConfigMixin, SchedulerMixin):
87
 
88
  # Pad with silence if necessary.
89
  if len(self.audio) < self.x_res * self.hop_length:
90
- self.audio = np.concatenate(
91
- [
92
- self.audio,
93
- np.zeros((self.x_res * self.hop_length - len(self.audio),)),
94
- ]
95
- )
96
 
97
  def get_number_of_slices(self) -> int:
98
  """Get number of slices in audio.
@@ -131,11 +142,7 @@ class Mel(ConfigMixin, SchedulerMixin):
131
  `PIL Image`: grayscale image of x_res x y_res
132
  """
133
  S = librosa.feature.melspectrogram(
134
- y=self.get_audio_slice(slice),
135
- sr=self.sr,
136
- n_fft=self.n_fft,
137
- hop_length=self.hop_length,
138
- n_mels=self.n_mels,
139
  )
140
  log_S = librosa.power_to_db(S, ref=np.max, top_db=self.top_db)
141
  bytedata = (((log_S + self.top_db) * 255 / self.top_db).clip(0, 255) + 0.5).astype(np.uint8)
@@ -155,10 +162,6 @@ class Mel(ConfigMixin, SchedulerMixin):
155
  log_S = bytedata.astype("float") * self.top_db / 255 - self.top_db
156
  S = librosa.db_to_power(log_S)
157
  audio = librosa.feature.inverse.mel_to_audio(
158
- S,
159
- sr=self.sr,
160
- n_fft=self.n_fft,
161
- hop_length=self.hop_length,
162
- n_iter=self.n_iter,
163
  )
164
  return audio
 
23
 
24
  warnings.filterwarnings("ignore")
25
 
 
26
  import numpy as np # noqa: E402
27
+
28
+
29
+ try:
30
+ import librosa # noqa: E402
31
+
32
+ _librosa_can_be_imported = True
33
+ _import_error = ""
34
+ except Exception as e:
35
+ _librosa_can_be_imported = False
36
+ _import_error = (
37
+ f"Cannot import librosa because {e}. Make sure to correctly install librosa to be able to install it."
38
+ )
39
+
40
+
41
  from PIL import Image # noqa: E402
42
 
43
 
 
74
  self.set_resolution(x_res, y_res)
75
  self.audio = None
76
 
77
+ if not _librosa_can_be_imported:
78
+ raise ValueError(_import_error)
79
+
80
  def set_resolution(self, x_res: int, y_res: int):
81
  """Set resolution.
82
 
 
103
 
104
  # Pad with silence if necessary.
105
  if len(self.audio) < self.x_res * self.hop_length:
106
+ self.audio = np.concatenate([self.audio, np.zeros((self.x_res * self.hop_length - len(self.audio),))])
 
 
 
 
 
107
 
108
  def get_number_of_slices(self) -> int:
109
  """Get number of slices in audio.
 
142
  `PIL Image`: grayscale image of x_res x y_res
143
  """
144
  S = librosa.feature.melspectrogram(
145
+ y=self.get_audio_slice(slice), sr=self.sr, n_fft=self.n_fft, hop_length=self.hop_length, n_mels=self.n_mels
 
 
 
 
146
  )
147
  log_S = librosa.power_to_db(S, ref=np.max, top_db=self.top_db)
148
  bytedata = (((log_S + self.top_db) * 255 / self.top_db).clip(0, 255) + 0.5).astype(np.uint8)
 
162
  log_S = bytedata.astype("float") * self.top_db / 255 - self.top_db
163
  S = librosa.db_to_power(log_S)
164
  audio = librosa.feature.inverse.mel_to_audio(
165
+ S, sr=self.sr, n_fft=self.n_fft, hop_length=self.hop_length, n_iter=self.n_iter
 
 
 
 
166
  )
167
  return audio
audiodiffusion/pipeline_audio_diffusion.py CHANGED
@@ -21,13 +21,12 @@ from typing import List, Tuple, Union
21
 
22
  import numpy as np
23
  import torch
24
- from diffusers import AutoencoderKL, DDIMScheduler, DDPMScheduler, Mel, UNet2DConditionModel
25
  from diffusers.pipeline_utils import AudioPipelineOutput, BaseOutput, DiffusionPipeline, ImagePipelineOutput
26
  from PIL import Image
27
 
28
  from .mel import Mel
29
 
30
-
31
  class AudioDiffusionPipeline(DiffusionPipeline):
32
  """
33
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
 
21
 
22
  import numpy as np
23
  import torch
24
+ from diffusers import AutoencoderKL, DDIMScheduler, DDPMScheduler, UNet2DConditionModel
25
  from diffusers.pipeline_utils import AudioPipelineOutput, BaseOutput, DiffusionPipeline, ImagePipelineOutput
26
  from PIL import Image
27
 
28
  from .mel import Mel
29
 
 
30
  class AudioDiffusionPipeline(DiffusionPipeline):
31
  """
32
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the