Pattr commited on
Commit
c3c8722
·
1 Parent(s): 6b86393

Upload 5 files

Browse files
Beat60 (4).wav ADDED
Binary file (706 kB). View file
 
__pycache__/app.cpython-39.pyc ADDED
Binary file (398 Bytes). View file
 
__pycache__/main.cpython-39.pyc ADDED
Binary file (423 Bytes). View file
 
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy
3
+ import librosa
4
+ from pydub import *
5
+ import fastbook
6
+ from fastbook import *
7
+ import pathlib
8
+ temp = pathlib.PosixPath
9
+ pathlib.PosixPath = pathlib.WindowsPath
10
+ model = load_learner("thedrum.pkl")
11
+
12
+ def play(x,y,ti,beat,audio):
13
+
14
+ sts=0
15
+ b=0
16
+ all=[]
17
+ countfname=1
18
+ for i in range(int(ti*beat*2)):
19
+ sound = AudioSegment.from_mp3(audio)
20
+ StrtSec = sts
21
+ EndSec = beat*(i+1)/2
22
+ StrtTime = StrtSec*1000
23
+ EndTime = EndSec*1000
24
+ extract = sound[StrtTime:EndTime]
25
+ extract.export("Half.wav", format="wav")
26
+
27
+
28
+
29
+ x,y = librosa.load('Half.wav')
30
+ plt.figure(figsize=(12,4))
31
+ a = librosa.feature.melspectrogram(y=x,sr=y,n_mels=550)
32
+ b = librosa.power_to_db(a,ref=np.max)
33
+ librosa.display.specshow(b,sr=y, x_axis='time', y_axis='mel')
34
+ plt.savefig(f'{countfname}')
35
+ wit = model.predict(f'{countfname}.png')
36
+ all.append(wit[0])
37
+ countfname+=1
38
+
39
+ sts=EndSec
40
+ return all
41
+
42
+ def transcribe(audio):
43
+ x,y = librosa.load(audio)
44
+ ti = librosa.get_duration(y=x,sr=y)
45
+ beat = 1
46
+ text = play(x,y,ti,beat,audio)
47
+ return text
48
+
49
+ gr.Interface(
50
+ fn=transcribe,
51
+ inputs=gr.Audio(type="filepath"),
52
+ outputs="text").launch()
thedrum.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:957dedb8f9fd2a9648b5564f4cf168d83a121624eede0091b67557c0465b1de9
3
+ size 87805227