new_lang

Runtime error

App Files Files Community

saronium commited on Mar 6, 2024

Commit

6abefd9

verified ·

1 Parent(s): 66ca40c

app

Browse files

Files changed (1) hide show

app.py +103 -0

app.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from flask import Flask, request, jsonify, render_template
+import os
+import torch
+import librosa
+import numpy as np
+from torchvision import models
+from scipy.ndimage import zoom
+from sklearn.decomposition import PCA
+import joblib
+from keras.utils import to_categorical
+# Assuming you already have the 'ann_model' trained and 'pca' instance from the previous code
+language_mapping = {'malayalam': 0, 'english': 1, 'tamil': 2,'hindi':3,'kannada':4,'telugu':5}
+app = Flask(__name__)
+# Load the trained model and PCA instance
+ann_model = torch.load('ann_model.pth')
+pca = joblib.load('pca.pkl')
+# Load VGG16 model
+vgg16 = models.vgg16(pretrained=True).features
+def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
+    # Load and preprocess the audio file
+    y, sr = librosa.load(audio_file, sr=None)  # Load audio
+    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)  # Compute Mel spectrogram
+    log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)  # Apply log transformation
+    norm_mel_spec = (log_mel_spec - np.mean(log_mel_spec)) / np.std(log_mel_spec)  # Normalize
+    # Resize mel spectrogram to the target shape (128, 128) using zoom
+    target_shape = (128, 128)
+    resized_mel_spec = zoom(norm_mel_spec, (target_shape[0] / norm_mel_spec.shape[0], target_shape[1] / norm_mel_spec.shape[1]), mode='nearest')
+    # Stack the resized mel spectrogram along the third axis to create 3 channels
+    mel_spec_rgb = np.stack([resized_mel_spec] * 3, axis=-1)
+    # Convert the preprocessed audio data into a format suitable for the VGG16 model
+    mel_spec_tensor = torch.from_numpy(mel_spec_rgb).permute(2, 0, 1).unsqueeze(0).float()  # Add batch dimension and change channel order
+    # Extract features using VGG16
+    vgg16_model.eval()
+    with torch.no_grad():
+        features = vgg16_model(mel_spec_tensor)
+    # Convert the features to numpy array and flatten them
+    features_np = features.squeeze().detach().numpy()
+    features_flattened = features_np.flatten().reshape(1, -1)
+    # Apply PCA transformation
+    features_pca = pca_instance.transform(features_flattened)
+    # Convert to PyTorch tensor
+    features_tensor = torch.from_numpy(features_pca).float()
+    return features_tensor
+@app.route('/')
+def home():
+    return render_template('index.html')
+@app.route('/predict', methods=['POST'])
+def predict():
+        try:
+    # Get the audio file from the request
+            audio_file = request.files['file']
+            audio_file.save('temp.wav')
+            audio_file_path = 'temp.wav'
+            # Preprocess the audio file
+            preprocessed_features = preprocess_single_audio_vgg16(audio_file_path, vgg16, pca)
+            # Make a prediction
+            ann_model.eval()
+            with torch.no_grad():
+                output = ann_model(preprocessed_features)
+                _, predicted_class = torch.max(output, 1)
+            # Map predicted class index to actual label
+            predicted_label = {v: k for k, v in language_mapping.items()}[predicted_class.item()]
+            # Delete the temporary audio file
+            os.remove('temp.wav')
+            # Return the prediction
+            return jsonify({'prediction': predicted_label})
+        except KeyError:
+          return jsonify({'error': 'Audio file not found in the request'}), 400
+        except Exception as e:
+          return jsonify({'error': str(e)}), 500
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=8000)
+# Assuming you already have the 'ann_model' trained and 'pca' instance from the previous code
+# Function to load and preprocess a single audio file
+# Load VGG16 model