b1nay commited on
Commit
60620c4
1 Parent(s): 161a13b

speech analysis code complete

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  import torch
3
  import librosa
 
4
  from transformers import HubertForSequenceClassification, Wav2Vec2FeatureExtractor
5
  from transformers import pipeline
6
 
@@ -22,23 +23,22 @@ if uploaded_file is not None:
22
  # Display audio player
23
  st.audio(uploaded_file, format='audio/wav')
24
 
25
- # Process the audio
26
- inputs = feature_extractor(speech, sampling_rate=16000, padding=True, return_tensors="pt")
27
-
28
- # Predict emotion
29
  with torch.no_grad():
30
- logits = model(**inputs).logits
 
31
  predicted_ids = torch.argmax(logits, dim=-1)
32
  labels = [model.config.id2label[_id] for _id in predicted_ids.tolist()]
33
 
34
- # Display the result
35
- # st.write("Predicted Emotion:", labels[0])
36
 
37
- # Alternatively using the pipeline
38
- results = classifier(uploaded_file, top_k=5)
 
39
  st.write("Top 5 Predicted Emotions:")
40
  for result in results:
41
  st.write(f"{result['label']}: {result['score']:.4f}")
42
-
43
-
44
-
 
1
  import streamlit as st
2
  import torch
3
  import librosa
4
+ import numpy as np
5
  from transformers import HubertForSequenceClassification, Wav2Vec2FeatureExtractor
6
  from transformers import pipeline
7
 
 
23
  # Display audio player
24
  st.audio(uploaded_file, format='audio/wav')
25
 
26
+ # Convert the audio file to the format expected by the classifier
27
+ inputs = feature_extractor(speech, sampling_rate=16000, padding=True, return_tensors="np")
28
+
29
+ # Predict emotion using the model directly
30
  with torch.no_grad():
31
+ inputs_pt = feature_extractor(speech, sampling_rate=16000, padding=True, return_tensors="pt")
32
+ logits = model(**inputs_pt).logits
33
  predicted_ids = torch.argmax(logits, dim=-1)
34
  labels = [model.config.id2label[_id] for _id in predicted_ids.tolist()]
35
 
36
+ # Display the result from the model directly
37
+ st.write("Predicted Emotion:", labels[0])
38
 
39
+ # Alternatively, using the pipeline
40
+ inputs_ndarray = inputs["input_values"][0]
41
+ results = classifier(inputs_ndarray, top_k=5)
42
  st.write("Top 5 Predicted Emotions:")
43
  for result in results:
44
  st.write(f"{result['label']}: {result['score']:.4f}")