Spaces:
Paused
Paused
artificialguybr
commited on
Commit
•
fbd6bad
1
Parent(s):
3364e9c
Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ import uuid
|
|
6 |
from googletrans import Translator
|
7 |
from TTS.api import TTS
|
8 |
import ffmpeg
|
9 |
-
import
|
10 |
from scipy.signal import wiener
|
11 |
import soundfile as sf
|
12 |
from pydub import AudioSegment
|
@@ -26,6 +26,9 @@ ZipFile("ffmpeg.zip").extractall()
|
|
26 |
st = os.stat('ffmpeg')
|
27 |
os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
|
28 |
|
|
|
|
|
|
|
29 |
def process_video(radio, video, target_language):
|
30 |
# Check video duration
|
31 |
video_info = ffmpeg.probe(video)
|
@@ -60,11 +63,9 @@ def process_video(radio, video, target_language):
|
|
60 |
shell_command = f"ffmpeg -y -i {run_uuid}_output_audio.wav -af lowpass=3000,highpass=100 {run_uuid}_output_audio_final.wav".split(" ")
|
61 |
subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
whisper_language = result['language']
|
67 |
-
|
68 |
print(whisper_text)
|
69 |
|
70 |
language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}
|
|
|
6 |
from googletrans import Translator
|
7 |
from TTS.api import TTS
|
8 |
import ffmpeg
|
9 |
+
from faster_whisper import WhisperModel
|
10 |
from scipy.signal import wiener
|
11 |
import soundfile as sf
|
12 |
from pydub import AudioSegment
|
|
|
26 |
st = os.stat('ffmpeg')
|
27 |
os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
|
28 |
|
29 |
+
model_size = "small"
|
30 |
+
model = WhisperModel(model_size, device="cuda", compute_type="int8")
|
31 |
+
|
32 |
def process_video(radio, video, target_language):
|
33 |
# Check video duration
|
34 |
video_info = ffmpeg.probe(video)
|
|
|
63 |
shell_command = f"ffmpeg -y -i {run_uuid}_output_audio.wav -af lowpass=3000,highpass=100 {run_uuid}_output_audio_final.wav".split(" ")
|
64 |
subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)
|
65 |
|
66 |
+
segments, info = model.transcribe(f"{run_uuid}_output_audio_final.wav", beam_size=5)
|
67 |
+
whisper_text = " ".join(segment.text for segment in segments)
|
68 |
+
whisper_language = info.language
|
|
|
|
|
69 |
print(whisper_text)
|
70 |
|
71 |
language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}
|