Demosthene-OR
commited on
Commit
•
a545669
1
Parent(s):
d3cbfff
Identification de la langue parlée uniquement avec whisper
Browse files- tabs/modelisation_seq2seq_tab.py +20 -13
tabs/modelisation_seq2seq_tab.py
CHANGED
@@ -7,7 +7,7 @@ from transformers import pipeline
|
|
7 |
# from translate import Translator
|
8 |
from deep_translator import GoogleTranslator
|
9 |
from audio_recorder_streamlit import audio_recorder
|
10 |
-
import speech_recognition as sr
|
11 |
import whisper
|
12 |
import io
|
13 |
import wavio
|
@@ -488,32 +488,39 @@ def run():
|
|
488 |
st.write("## **"+tr("Résultats")+" :**\n")
|
489 |
st.audio(audio_bytes, format="audio/wav")
|
490 |
try:
|
491 |
-
if detection:
|
492 |
-
|
493 |
-
|
494 |
|
495 |
-
|
496 |
-
|
497 |
|
498 |
-
|
499 |
-
|
500 |
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
result = model_speech.transcribe(audio_input)
|
506 |
st.write(tr("Langue détectée")+" : "+result["language"])
|
507 |
Lang_detected = result["language"]
|
508 |
# Transcription Whisper (si result a été préalablement calculé)
|
509 |
custom_sentence = result["text"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
510 |
else:
|
511 |
Lang_detected = l_src
|
512 |
# Transcription google
|
513 |
audio_stream = sr.AudioData(audio_bytes, 32000, 2)
|
514 |
r = sr.Recognizer()
|
515 |
custom_sentence = r.recognize_google(audio_stream, language = Lang_detected)
|
516 |
-
|
517 |
if custom_sentence!="":
|
518 |
# Lang_detected = lang_classifier (custom_sentence)[0]['label']
|
519 |
#st.write('Langue détectée : **'+Lang_detected+'**')
|
|
|
7 |
# from translate import Translator
|
8 |
from deep_translator import GoogleTranslator
|
9 |
from audio_recorder_streamlit import audio_recorder
|
10 |
+
# import speech_recognition as sr
|
11 |
import whisper
|
12 |
import io
|
13 |
import wavio
|
|
|
488 |
st.write("## **"+tr("Résultats")+" :**\n")
|
489 |
st.audio(audio_bytes, format="audio/wav")
|
490 |
try:
|
491 |
+
# if detection:
|
492 |
+
# Create a BytesIO object from the audio stream
|
493 |
+
audio_stream_bytesio = io.BytesIO(audio_bytes)
|
494 |
|
495 |
+
# Read the WAV stream using wavio
|
496 |
+
wav = wavio.read(audio_stream_bytesio)
|
497 |
|
498 |
+
# Extract the audio data from the wavio.Wav object
|
499 |
+
audio_data = wav.data
|
500 |
|
501 |
+
# Convert the audio data to a NumPy array
|
502 |
+
audio_input = np.array(audio_data, dtype=np.float32)
|
503 |
+
audio_input = np.mean(audio_input, axis=1)/32768
|
504 |
+
if detection:
|
505 |
result = model_speech.transcribe(audio_input)
|
506 |
st.write(tr("Langue détectée")+" : "+result["language"])
|
507 |
Lang_detected = result["language"]
|
508 |
# Transcription Whisper (si result a été préalablement calculé)
|
509 |
custom_sentence = result["text"]
|
510 |
+
else:
|
511 |
+
Lang_detected = l_src
|
512 |
+
model_speech.set_language(Lang_detected)
|
513 |
+
result = model_speech.transcribe(audio_input)
|
514 |
+
custom_sentence = result["text"]
|
515 |
+
model_speech.reset_language()
|
516 |
+
"""
|
517 |
else:
|
518 |
Lang_detected = l_src
|
519 |
# Transcription google
|
520 |
audio_stream = sr.AudioData(audio_bytes, 32000, 2)
|
521 |
r = sr.Recognizer()
|
522 |
custom_sentence = r.recognize_google(audio_stream, language = Lang_detected)
|
523 |
+
"""
|
524 |
if custom_sentence!="":
|
525 |
# Lang_detected = lang_classifier (custom_sentence)[0]['label']
|
526 |
#st.write('Langue détectée : **'+Lang_detected+'**')
|