File size: 2,226 Bytes
d3d38a1
94c1c1f
 
 
 
 
d3d38a1
94c1c1f
 
d3d38a1
94c1c1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4749619
 
94c1c1f
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import gradio as gr
import numpy as np
import pandas as pd
import csv
import librosa
import tensorflow as tf

#!gdown https://drive.google.com/uc?id=1hKQdsTZ35KQmNV9Zrqg-ksTLSmPapR53
model = tf.keras.models.load_model('TTM_model.h5')

def config_audio(audio):
    print('enter2')
    header = 'ChromaSTFT RMS SpectralCentroid SpectralBandwidth Rolloff ZeroCrossingRate'
    for i in range(1, 21):
        header += f' mfcc{i}'
    header += ' label'
    header = header.split()
    print(1)
    file = open('predict_file.csv', 'w', newline='')
    with file:
        writer = csv.writer(file)
        writer.writerow(header)
    print(2)
    #taalfile = audio
    #print('stored in taalfile')
    y, sr = librosa.load(audio, mono=True, duration=30)
    print(3)
    rms = librosa.feature.rms(y=y)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spec_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    to_append = f' {np.mean(chroma)} {np.mean(rms)} {np.mean(spec_centroid)} {np.mean(spec_bandwidth)} {np.mean(rolloff)} {np.mean(zcr)} '    
    for e in mfcc:
        to_append += f' {np.mean(e)}'
    #to_append += f' {t}'
    file = open('predict_file.csv', 'a', newline='')
    with file:
        writer = csv.writer(file)
        writer.writerow(to_append.split())
    predict_file = pd.read_csv("predict_file.csv")
    X_predict = predict_file.drop('label', axis=1)
    print('exit2')
    return X_predict
    
def predict_audio(Input_Audio, Playable_Audio):
    audio=Input_Audio.name
    print('enter1')
    X_predict = config_audio(audio)
    taals = ['addhatrital','bhajani','dadra','deepchandi','ektal','jhaptal','rupak','trital']
    pred = model.predict(X_predict).flatten()
    print('exit1')
    return {taals[i]: float(pred[i]) for i in range(7)}
    
audio = gr.inputs.Audio(source="upload", optional=False)
label = gr.outputs.Label()

gr.Interface(predict_audio, ["file","audio"],outputs=label).launch(debug=True)