avr23-cds-translation2

Running

App Files Files Community

Demosthene-OR commited on Mar 9, 2024

Commit

ec595bb

1 Parent(s): 14f4ac2

Retour en arrirer sure requirements, et ses2seq

Browse files

Files changed (3) hide show

requirements.txt +1 -0
tabs/exploration_tab.py +1 -0
tabs/modelisation_seq2seq_tab.py +15 -3

requirements.txt CHANGED Viewed

@@ -21,6 +21,7 @@ tensorflow==2.12.0
 sentencepiece==0.1.99
 openai-whisper==20231117
 torch==2.2.0
 audio_recorder_streamlit==0.0.8
 whisper==1.1.10
 wavio==0.0.8

 sentencepiece==0.1.99
 openai-whisper==20231117
 torch==2.2.0
+speechrecognition==3.10.1
 audio_recorder_streamlit==0.0.8
 whisper==1.1.10
 wavio==0.0.8

tabs/exploration_tab.py CHANGED Viewed

@@ -7,6 +7,7 @@ from nltk import download
 from ast import literal_eval
 from translate_app import tr
 if st.session_state.Cloud == 0:
     import contextlib
     import re
     from nltk.corpus import stopwords

 from ast import literal_eval
 from translate_app import tr
 if st.session_state.Cloud == 0:
+    # import nltk
     import contextlib
     import re
     from nltk.corpus import stopwords

tabs/modelisation_seq2seq_tab.py CHANGED Viewed

@@ -7,7 +7,7 @@ from transformers import pipeline
 # from translate import Translator
 from deep_translator import GoogleTranslator
 from audio_recorder_streamlit import audio_recorder
-# import speech_recognition as sr
 import whisper
 import io
 import wavio
@@ -19,6 +19,7 @@ from tensorflow import keras
 from keras_nlp.layers import TransformerEncoder
 from tensorflow.keras import layers
 from tensorflow.keras.utils import plot_model
 from gtts import gTTS
 from extra_streamlit_components import tab_bar, TabBarItemData
 from translate_app import tr
@@ -488,7 +489,6 @@ def run():
             st.write("## **"+tr("Résultats")+" :**\n")
             st.audio(audio_bytes, format="audio/wav")
             try:
-                # if detection:
                 # Create a BytesIO object from the audio stream
                 audio_stream_bytesio = io.BytesIO(audio_bytes)
@@ -501,16 +501,28 @@ def run():
                 # Convert the audio data to a NumPy array
                 audio_input = np.array(audio_data, dtype=np.float32)
                 audio_input = np.mean(audio_input, axis=1)/32768
-                if detection:
                     result = model_speech.transcribe(audio_input)
                     st.write(tr("Langue détectée")+" : "+result["language"])
                     Lang_detected = result["language"]
                     # Transcription Whisper (si result a été préalablement calculé)
                     custom_sentence = result["text"]
                 else:
                     Lang_detected = l_src
                     result = model_speech.transcribe(audio_input, language=Lang_detected)
                     custom_sentence = result["text"]
                 if custom_sentence!="":
                     # Lang_detected = lang_classifier (custom_sentence)[0]['label']
                     #st.write('Langue détectée : **'+Lang_detected+'**')

 # from translate import Translator
 from deep_translator import GoogleTranslator
 from audio_recorder_streamlit import audio_recorder
+import speech_recognition as sr
 import whisper
 import io
 import wavio
 from keras_nlp.layers import TransformerEncoder
 from tensorflow.keras import layers
 from tensorflow.keras.utils import plot_model
+# from PIL import Image
 from gtts import gTTS
 from extra_streamlit_components import tab_bar, TabBarItemData
 from translate_app import tr
             st.write("## **"+tr("Résultats")+" :**\n")
             st.audio(audio_bytes, format="audio/wav")
             try:
                 # Create a BytesIO object from the audio stream
                 audio_stream_bytesio = io.BytesIO(audio_bytes)
                 # Convert the audio data to a NumPy array
                 audio_input = np.array(audio_data, dtype=np.float32)
                 audio_input = np.mean(audio_input, axis=1)/32768
+                if detection:
                     result = model_speech.transcribe(audio_input)
                     st.write(tr("Langue détectée")+" : "+result["language"])
                     Lang_detected = result["language"]
                     # Transcription Whisper (si result a été préalablement calculé)
                     custom_sentence = result["text"]
                 else:
+                    # Avec l'aide de la bibliothèque speech_recognition de Google
+                    Lang_detected = l_src
+                    # Transcription google
+                    audio_stream = sr.AudioData(audio_bytes, 32000, 2)
+                    r = sr.Recognizer()
+                    custom_sentence = r.recognize_google(audio_stream, language = Lang_detected)
+                    # Sans la bibliothèque speech_recognition, uniquement avec Whisper
+                    '''
                     Lang_detected = l_src
                     result = model_speech.transcribe(audio_input, language=Lang_detected)
                     custom_sentence = result["text"]
+                    '''
                 if custom_sentence!="":
                     # Lang_detected = lang_classifier (custom_sentence)[0]['label']
                     #st.write('Langue détectée : **'+Lang_detected+'**')