artificialguybr commited on
Commit
fbd6bad
1 Parent(s): 3364e9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -6,7 +6,7 @@ import uuid
6
  from googletrans import Translator
7
  from TTS.api import TTS
8
  import ffmpeg
9
- import whisper
10
  from scipy.signal import wiener
11
  import soundfile as sf
12
  from pydub import AudioSegment
@@ -26,6 +26,9 @@ ZipFile("ffmpeg.zip").extractall()
26
  st = os.stat('ffmpeg')
27
  os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
28
 
 
 
 
29
  def process_video(radio, video, target_language):
30
  # Check video duration
31
  video_info = ffmpeg.probe(video)
@@ -60,11 +63,9 @@ def process_video(radio, video, target_language):
60
  shell_command = f"ffmpeg -y -i {run_uuid}_output_audio.wav -af lowpass=3000,highpass=100 {run_uuid}_output_audio_final.wav".split(" ")
61
  subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)
62
 
63
- model = whisper.load_model("base")
64
- result = model.transcribe(f"{run_uuid}_output_audio_final.wav")
65
- whisper_text = result["text"]
66
- whisper_language = result['language']
67
-
68
  print(whisper_text)
69
 
70
  language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}
 
6
  from googletrans import Translator
7
  from TTS.api import TTS
8
  import ffmpeg
9
+ from faster_whisper import WhisperModel
10
  from scipy.signal import wiener
11
  import soundfile as sf
12
  from pydub import AudioSegment
 
26
  st = os.stat('ffmpeg')
27
  os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
28
 
29
+ model_size = "small"
30
+ model = WhisperModel(model_size, device="cuda", compute_type="int8")
31
+
32
  def process_video(radio, video, target_language):
33
  # Check video duration
34
  video_info = ffmpeg.probe(video)
 
63
  shell_command = f"ffmpeg -y -i {run_uuid}_output_audio.wav -af lowpass=3000,highpass=100 {run_uuid}_output_audio_final.wav".split(" ")
64
  subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)
65
 
66
+ segments, info = model.transcribe(f"{run_uuid}_output_audio_final.wav", beam_size=5)
67
+ whisper_text = " ".join(segment.text for segment in segments)
68
+ whisper_language = info.language
 
 
69
  print(whisper_text)
70
 
71
  language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}