Spaces:

b1nay
/

testa

Sleeping

b1nay commited on Jun 12, 2024

Commit

60620c4

1 Parent(s): 161a13b

speech analysis code complete

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import streamlit as st
 import torch
 import librosa
 from transformers import HubertForSequenceClassification, Wav2Vec2FeatureExtractor
 from transformers import pipeline
@@ -22,23 +23,22 @@ if uploaded_file is not None:
     # Display audio player
     st.audio(uploaded_file, format='audio/wav')
-    # Process the audio
-    inputs = feature_extractor(speech, sampling_rate=16000, padding=True, return_tensors="pt")
-    # Predict emotion
     with torch.no_grad():
-        logits = model(**inputs).logits
         predicted_ids = torch.argmax(logits, dim=-1)
         labels = [model.config.id2label[_id] for _id in predicted_ids.tolist()]
-    # Display the result
-    # st.write("Predicted Emotion:", labels[0])
-    # Alternatively using the pipeline
-    results = classifier(uploaded_file, top_k=5)
     st.write("Top 5 Predicted Emotions:")
     for result in results:
         st.write(f"{result['label']}: {result['score']:.4f}")

 import streamlit as st
 import torch
 import librosa
+import numpy as np
 from transformers import HubertForSequenceClassification, Wav2Vec2FeatureExtractor
 from transformers import pipeline
     # Display audio player
     st.audio(uploaded_file, format='audio/wav')
+    # Convert the audio file to the format expected by the classifier
+    inputs = feature_extractor(speech, sampling_rate=16000, padding=True, return_tensors="np")
+    # Predict emotion using the model directly
     with torch.no_grad():
+        inputs_pt = feature_extractor(speech, sampling_rate=16000, padding=True, return_tensors="pt")
+        logits = model(**inputs_pt).logits
         predicted_ids = torch.argmax(logits, dim=-1)
         labels = [model.config.id2label[_id] for _id in predicted_ids.tolist()]
+    # Display the result from the model directly
+    st.write("Predicted Emotion:", labels[0])
+    # Alternatively, using the pipeline
+    inputs_ndarray = inputs["input_values"][0]
+    results = classifier(inputs_ndarray, top_k=5)
     st.write("Top 5 Predicted Emotions:")
     for result in results:
         st.write(f"{result['label']}: {result['score']:.4f}")