TheStinger Lusbert commited on
Commit
ccd4978
1 Parent(s): 763202b

Added Mel spectrogram (#1)

Browse files

- Added Mel spectrogram (ee40ca2679686ea68ecdb4546b7d2fdac3a8c0ae)


Co-authored-by: Armin <Lusbert@users.noreply.huggingface.co>

Files changed (1) hide show
  1. app.py +35 -39
app.py CHANGED
@@ -1,10 +1,11 @@
1
  import gradio as gr
2
  import matplotlib.pyplot as plt
3
- from matplotlib.colors import LinearSegmentedColormap
4
  import numpy as np
5
  import os
6
  import soundfile as sf
7
  import requests
 
 
8
 
9
  def download_file(url):
10
  file_id = url.split('/')[-2]
@@ -14,16 +15,23 @@ def download_file(url):
14
  open(local_filename, 'wb').write(response.content)
15
  return local_filename
16
 
 
17
  def main():
18
  with gr.Blocks() as app:
19
  gr.Markdown(
20
  """
21
- Audio Analyzer Software by Ilaria, Help me on Ko-Fi!\n
22
- Special thanks to Alex Murkoff for helping me coding it!
23
- Need help with AI? Join Join AI Hub!
 
 
 
24
  """
25
  )
26
-
 
 
 
27
  with gr.Row():
28
  with gr.Column():
29
  audio_input = gr.Audio(type='filepath')
@@ -31,56 +39,44 @@ def main():
31
 
32
  with gr.Column():
33
  output_markdown = gr.Markdown(value="", visible=True)
34
- image_output = gr.Image(type='filepath', interactive=False)
35
-
36
  with gr.Accordion('Audio Downloader', open=False):
37
  url_input = gr.Textbox(value='', label='Google Drive Audio URL')
38
  download_butt = gr.Button(value='Download audio', variant='primary')
39
-
40
  download_butt.click(fn=download_file, inputs=[url_input], outputs=[audio_input])
41
- create_spec_butt.click(fn=create_spectrogram_and_get_info, inputs=[audio_input], outputs=[output_markdown, image_output])
42
-
 
43
  download_butt.click(fn=download_file, inputs=[url_input], outputs=[audio_input])
44
- create_spec_butt.click(fn=create_spectrogram_and_get_info, inputs=[audio_input], outputs=[output_markdown, image_output])
45
-
46
- app.queue(max_size=1022).launch(share=True)
47
 
48
- from matplotlib.colors import LinearSegmentedColormap
49
 
50
  def create_spectrogram_and_get_info(audio_file):
51
  plt.clf()
52
-
53
- cdict = {'red': [(0.0, 0.0, 0.0),
54
- (0.5, 1.0, 1.0),
55
- (1.0, 1.0, 1.0)],
56
- 'green': [(0.0, 0.0, 0.0),
57
- (0.25, 0.0, 0.0),
58
- (0.75, 1.0, 1.0),
59
- (1.0, 1.0, 1.0)],
60
- 'blue': [(0.0, 0.0, 0.0),
61
- (0.5, 0.0, 0.0),
62
- (1.0, 0.0, 0.0)]}
63
- custom_cmap = LinearSegmentedColormap('CustomMap', cdict)
64
-
65
- fig = plt.figure(figsize=(15, 5))
66
- fig.patch.set_facecolor('black') # Imposta il colore di sfondo su nero
67
- audio_data, sample_rate = sf.read(audio_file)
68
- if len(audio_data.shape) > 1:
69
- audio_data = np.mean(audio_data, axis=1)
70
- plt.specgram(audio_data, Fs=sample_rate / 1, NFFT=4096, sides='onesided',
71
- cmap=custom_cmap, scale_by_freq=True, scale='dB', mode='magnitude', window=np.hanning(4096)) # Usa la mappa di colori personalizzata
72
- plt.savefig('spectrogram.png', dpi=300)
73
  audio_info = sf.info(audio_file)
74
  bit_depth = {'PCM_16': 16, 'FLOAT': 32}.get(audio_info.subtype, 0)
75
  minutes, seconds = divmod(audio_info.duration, 60)
76
  seconds, milliseconds = divmod(seconds, 1)
77
  milliseconds *= 1000
78
- bitrate = audio_info.samplerate * audio_info.channels * bit_depth / 8 / 1024 / 1024
 
79
  speed_in_kbps = audio_info.samplerate * bit_depth / 1000
80
  filename_without_extension, _ = os.path.splitext(os.path.basename(audio_file))
81
  info_table = f"""
82
 
83
-
84
  | Information | Value |
85
  | :---: | :---: |
86
  | File Name | {filename_without_extension} |
@@ -89,9 +85,9 @@ def create_spectrogram_and_get_info(audio_file):
89
  | Audio Channels | {audio_info.channels} |
90
  | Samples per second | {audio_info.samplerate} Hz |
91
  | Bit per second | {audio_info.samplerate * audio_info.channels * bit_depth} bit/s |
92
-
93
  """
94
-
95
  # Return the PNG file of the spectrogram and the info table
96
  return info_table, 'spectrogram.png'
97
 
 
1
  import gradio as gr
2
  import matplotlib.pyplot as plt
 
3
  import numpy as np
4
  import os
5
  import soundfile as sf
6
  import requests
7
+ import librosa.display
8
+
9
 
10
  def download_file(url):
11
  file_id = url.split('/')[-2]
 
15
  open(local_filename, 'wb').write(response.content)
16
  return local_filename
17
 
18
+
19
  def main():
20
  with gr.Blocks() as app:
21
  gr.Markdown(
22
  """
23
+ <h1><center>Audio Analyzer by Ilaria</center></h1>\n
24
+ <h3><center>Help me on <a href="https://ko-fi.com/ilariaowo/shop">Ko-Fi</a>!</center></h3>\n
25
+ ## Special thanks to Alex Murkoff for helping me code it!
26
+ #### Need help with AI? Join [AI Hub](https://discord.gg/aihub)!\n
27
+ **Note**: Try to keep the audio length under **2 minutes**,
28
+ since long audio files dont work well with a static spectrogram
29
  """
30
  )
31
+
32
+ with gr.Row():
33
+ image_output = gr.Image(type='filepath', interactive=False)
34
+
35
  with gr.Row():
36
  with gr.Column():
37
  audio_input = gr.Audio(type='filepath')
 
39
 
40
  with gr.Column():
41
  output_markdown = gr.Markdown(value="", visible=True)
42
+
 
43
  with gr.Accordion('Audio Downloader', open=False):
44
  url_input = gr.Textbox(value='', label='Google Drive Audio URL')
45
  download_butt = gr.Button(value='Download audio', variant='primary')
46
+
47
  download_butt.click(fn=download_file, inputs=[url_input], outputs=[audio_input])
48
+ create_spec_butt.click(fn=create_spectrogram_and_get_info, inputs=[audio_input],
49
+ outputs=[output_markdown, image_output])
50
+
51
  download_butt.click(fn=download_file, inputs=[url_input], outputs=[audio_input])
52
+ create_spec_butt.click(fn=create_spectrogram_and_get_info, inputs=[audio_input],
53
+ outputs=[output_markdown, image_output])
 
54
 
55
+ app.queue(max_size=1022).launch(share=True)
56
 
57
  def create_spectrogram_and_get_info(audio_file):
58
  plt.clf()
59
+
60
+ y, sr = librosa.load(audio_file, sr=None)
61
+ S = librosa.feature.melspectrogram(y, sr=sr, n_mels=256)
62
+ log_S = librosa.amplitude_to_db(S, ref=np.max, top_db=256)
63
+ plt.figure(figsize=(12, 5.5))
64
+ librosa.display.specshow(log_S, sr=sr, x_axis='time')
65
+ plt.colorbar(format='%+2.0f dB', pad=0.01)
66
+ plt.tight_layout(pad=0.5)
67
+ plt.savefig('spectrogram.png', dpi=500)
 
 
 
 
 
 
 
 
 
 
 
 
68
  audio_info = sf.info(audio_file)
69
  bit_depth = {'PCM_16': 16, 'FLOAT': 32}.get(audio_info.subtype, 0)
70
  minutes, seconds = divmod(audio_info.duration, 60)
71
  seconds, milliseconds = divmod(seconds, 1)
72
  milliseconds *= 1000
73
+ # bitrate = audio_info.samplerate * audio_info.channels * bit_depth / 8 / 1024 / 1024
74
+ # this bitrate one doesnt seem to be used anywhere so i just removed it
75
  speed_in_kbps = audio_info.samplerate * bit_depth / 1000
76
  filename_without_extension, _ = os.path.splitext(os.path.basename(audio_file))
77
  info_table = f"""
78
 
79
+
80
  | Information | Value |
81
  | :---: | :---: |
82
  | File Name | {filename_without_extension} |
 
85
  | Audio Channels | {audio_info.channels} |
86
  | Samples per second | {audio_info.samplerate} Hz |
87
  | Bit per second | {audio_info.samplerate * audio_info.channels * bit_depth} bit/s |
88
+
89
  """
90
+
91
  # Return the PNG file of the spectrogram and the info table
92
  return info_table, 'spectrogram.png'
93