Browse files<audio controls src=""></audio>
@@ -0,0 +1,103 @@
1 |
from flask import Flask, request, jsonify, render_template
2 |
import os
3 |
import torch
4 |
import librosa
5 |
import numpy as np
6 |
from torchvision import models
7 |
from scipy.ndimage import zoom
8 |
from sklearn.decomposition import PCA
9 |
import joblib
10 |
from keras.utils import to_categorical
11 |
12 |
# Assuming you already have the 'ann_model' trained and 'pca' instance from the previous code
13 |
language_mapping = {'malayalam': 0, 'english': 1, 'tamil': 2,'hindi':3,'kannada':4,'telugu':5}
14 |
15 |
app = Flask(__name__)
16 |
17 |
# Load the trained model and PCA instance
18 |
ann_model = torch.load('ann_model.pth')
19 |
pca = joblib.load('pca.pkl')
20 |
21 |
# Load VGG16 model
22 |
vgg16 = models.vgg16(pretrained=True).features
23 |
def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
24 |
# Load and preprocess the audio file
25 |
y, sr = librosa.load(audio_file, sr=None) # Load audio
26 |
mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128) # Compute Mel spectrogram
27 |
log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max) # Apply log transformation
28 |
norm_mel_spec = (log_mel_spec - np.mean(log_mel_spec)) / np.std(log_mel_spec) # Normalize
29 |
30 |
# Resize mel spectrogram to the target shape (128, 128) using zoom
31 |
target_shape = (128, 128)
32 |
resized_mel_spec = zoom(norm_mel_spec, (target_shape[0] / norm_mel_spec.shape[0], target_shape[1] / norm_mel_spec.shape[1]), mode='nearest')
33 |
34 |
# Stack the resized mel spectrogram along the third axis to create 3 channels
35 |
mel_spec_rgb = np.stack([resized_mel_spec] * 3, axis=-1)
36 |
37 |
# Convert the preprocessed audio data into a format suitable for the VGG16 model
38 |
mel_spec_tensor = torch.from_numpy(mel_spec_rgb).permute(2, 0, 1).unsqueeze(0).float() # Add batch dimension and change channel order
39 |
40 |
# Extract features using VGG16
41 |
42 |
with torch.no_grad():
43 |
features = vgg16_model(mel_spec_tensor)
44 |
45 |
# Convert the features to numpy array and flatten them
46 |
features_np = features.squeeze().detach().numpy()
47 |
features_flattened = features_np.flatten().reshape(1, -1)
48 |
49 |
# Apply PCA transformation
50 |
features_pca = pca_instance.transform(features_flattened)
51 |
52 |
# Convert to PyTorch tensor
53 |
features_tensor = torch.from_numpy(features_pca).float()
54 |
55 |
return features_tensor
56 |
57 |
58 |
59 |
def home():
60 |
return render_template('index.html')
61 |
62 |
@app.route('/predict', methods=['POST'])
63 |
def predict():
64 |
65 |
# Get the audio file from the request
66 |
audio_file = request.files['file']
67 |
68 |
audio_file_path = 'temp.wav'
69 |
# Preprocess the audio file
70 |
preprocessed_features = preprocess_single_audio_vgg16(audio_file_path, vgg16, pca)
71 |
72 |
# Make a prediction
73 |
74 |
with torch.no_grad():
75 |
output = ann_model(preprocessed_features)
76 |
_, predicted_class = torch.max(output, 1)
77 |
78 |
# Map predicted class index to actual label
79 |
predicted_label = {v: k for k, v in language_mapping.items()}[predicted_class.item()]
80 |
81 |
# Delete the temporary audio file
82 |
83 |
84 |
# Return the prediction
85 |
return jsonify({'prediction': predicted_label})
86 |
except KeyError:
87 |
return jsonify({'error': 'Audio file not found in the request'}), 400
88 |
89 |
except Exception as e:
90 |
return jsonify({'error': str(e)}), 500
91 |
if __name__ == '__main__':
92 |
+'', port=8000)
93 |
94 |
95 |
96 |
97 |
98 |
# Assuming you already have the 'ann_model' trained and 'pca' instance from the previous code
99 |
100 |
# Function to load and preprocess a single audio file
101 |
102 |
# Load VGG16 model
103 |