frogcho123 commited on
Commit
bbee8bf
1 Parent(s): a287ca7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -39
app.py CHANGED
@@ -1,49 +1,63 @@
1
  import gradio as gr
2
- import os
3
  import whisper
4
- from pydub import AudioSegment
5
-
6
- # Load the Whisper model
7
- model = whisper.load_model("base")
8
-
9
- # Function to process the uploaded audio file and perform transcription
10
- def process_audio(upload):
11
- # Save the uploaded audio file
12
- file_path = "uploaded_audio"
13
- upload_path = f"{file_path}.mp3"
14
- upload.save(upload_path)
15
-
16
- # Convert the audio file to WAV format
17
- wav_path = f"{file_path}.wav"
18
- audio = AudioSegment.from_file(upload_path)
19
- audio.export(wav_path, format="wav")
20
 
21
- # Load the audio file and perform preprocessing
22
- audio = whisper.load_audio(wav_path)
 
 
 
 
 
 
23
  audio = whisper.pad_or_trim(audio)
24
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
25
-
26
- # Detect the spoken language
27
  _, probs = model.detect_language(mel)
28
- detected_language = max(probs, key=probs.get)
29
-
30
- # Perform transcription using Whisper ASR
31
  options = whisper.DecodingOptions()
32
  result = whisper.decode(model, mel, options)
33
- transcription = result.text
34
-
35
- # Delete the temporary audio files
36
- os.remove(upload_path)
37
- os.remove(wav_path)
38
-
39
- return transcription
40
-
41
- # Create a file input component for uploading the audio file
42
- audio_input = gr.inputs.File(label="Upload Audio", accept=".wav, .mp3")
43
-
44
- # Create a text output component for displaying the transcription
45
- text_output = gr.outputs.Textbox(label="Transcription")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- # Create a Gradio interface
48
- gr.Interface(fn=process_audio, inputs=audio_input, outputs=text_output, title="Audio Transcription").launch()
49
 
 
1
  import gradio as gr
 
2
  import whisper
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+ from gtts import gTTS
5
+ import sounddevice as sd
6
+ import scipy.io.wavfile as wav
7
+ import os
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ def translate_speech_to_speech(input_audio):
10
+ # Save the input audio to a temporary file
11
+ input_file = "input_audio" + os.path.splitext(input_audio.name)[1]
12
+ input_audio.save(input_file)
13
+
14
+ # Language detection and translation code from the first code snippet
15
+ model = whisper.load_model("base")
16
+ audio = whisper.load_audio(input_file)
17
  audio = whisper.pad_or_trim(audio)
18
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
 
 
19
  _, probs = model.detect_language(mel)
20
+
 
 
21
  options = whisper.DecodingOptions()
22
  result = whisper.decode(model, mel, options)
23
+
24
+ text = result.text
25
+ lang = max(probs, key=probs.get)
26
+
27
+ # Translation code from the first code snippet
28
+ to_lang = 'ru'
29
+ tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
30
+ model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
31
+
32
+ tokenizer.src_lang = lang
33
+ encoded_bg = tokenizer(text, return_tensors="pt")
34
+ generated_tokens = model.generate(**encoded_bg)
35
+ translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
36
+
37
+ # Text-to-speech (TTS) code from the first code snippet
38
+ tts = gTTS(text=translated_text, lang=to_lang)
39
+ output_file = "translated_speech.mp3"
40
+ tts.save(output_file)
41
+
42
+ # Load the translated audio and return as an output
43
+ translated_audio = open(output_file, "rb")
44
+
45
+ return translated_audio
46
+
47
+ title = "Speech-to-Speech Translator"
48
+
49
+ input_audio = gr.inputs.Audio(type=["mp3", "wav"])
50
+ output_audio = gr.outputs.Audio(type=["mp3", "wav"])
51
+
52
+ stt_demo = gr.Interface(
53
+ fn=translate_speech_to_speech,
54
+ inputs=input_audio,
55
+ outputs=output_audio,
56
+ title=title,
57
+ description="Speak in any language, and the translator will convert it to speech in the target language.",
58
+ )
59
+
60
+ if __name__ == "__main__":
61
+ stt_demo.launch()
62
 
 
 
63