Spaces:
Runtime error
Runtime error
artificialguybr
commited on
Commit
β’
88a4625
1
Parent(s):
f64cb13
Update appf.py
Browse files
appf.py
CHANGED
@@ -1,75 +1,66 @@
|
|
1 |
import gradio as gr
|
2 |
import subprocess
|
3 |
-
import whisper
|
4 |
-
from googletrans import Translator
|
5 |
-
import asyncio
|
6 |
-
import edge_tts
|
7 |
import os
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
# Translate Text
|
18 |
-
def translate_text(whisper_text, whisper_language, target_language):
|
19 |
-
language_mapping = {
|
20 |
-
'English': 'en',
|
21 |
-
'Spanish': 'es',
|
22 |
-
# ... (other mappings)
|
23 |
-
}
|
24 |
-
target_language_code = language_mapping[target_language]
|
25 |
-
translator = Translator()
|
26 |
-
translated_text = translator.translate(whisper_text, src=whisper_language, dest=target_language_code).text
|
27 |
-
return translated_text
|
28 |
-
|
29 |
-
# Generate Voice
|
30 |
-
async def generate_voice(translated_text, target_language):
|
31 |
-
VOICE_MAPPING = {
|
32 |
-
'English': 'en-GB-SoniaNeural',
|
33 |
-
'Spanish': 'es-ES-PabloNeural',
|
34 |
-
# ... (other mappings)
|
35 |
-
}
|
36 |
-
voice = VOICE_MAPPING[target_language]
|
37 |
-
communicate = edge_tts.Communicate(translated_text, voice)
|
38 |
-
await communicate.save("output_synth.wav")
|
39 |
-
return "output_synth.wav"
|
40 |
|
41 |
-
|
42 |
-
def generate_lip_synced_video(video_path, output_audio_path):
|
43 |
-
# Your lip-synced video generation code here
|
44 |
-
# ...
|
45 |
-
return "output_high_qual.mp4"
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
f.write(video.read())
|
52 |
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
-
|
57 |
-
|
58 |
|
59 |
-
|
60 |
-
loop = asyncio.get_event_loop()
|
61 |
-
output_audio_path = loop.run_until_complete(generate_voice(translated_text, target_language))
|
62 |
|
63 |
-
|
64 |
-
output_video_path = generate_lip_synced_video(video_path, output_audio_path)
|
65 |
|
66 |
-
|
|
|
67 |
|
68 |
-
# Gradio Interface
|
69 |
iface = gr.Interface(
|
70 |
-
fn=process_video,
|
71 |
-
inputs=[
|
72 |
-
|
|
|
|
|
|
|
|
|
73 |
live=False
|
74 |
)
|
75 |
-
|
|
|
|
1 |
import gradio as gr
|
2 |
import subprocess
|
|
|
|
|
|
|
|
|
3 |
import os
|
4 |
+
from googletrans import Translator
|
5 |
+
from TTS.api import TTS
|
6 |
+
from IPython.display import Audio, display
|
7 |
+
import ffmpeg
|
8 |
+
import whisper
|
9 |
|
10 |
+
def process_video(video, high_quality, target_language):
|
11 |
+
try:
|
12 |
+
output_filename = "resized_video.mp4"
|
13 |
+
if high_quality:
|
14 |
+
ffmpeg.input(video).output(output_filename, vf='scale=-1:720').run()
|
15 |
+
video_path = output_filename
|
16 |
+
else:
|
17 |
+
video_path = video
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
ffmpeg.input(video_path).output('output_audio.wav', acodec='pcm_s24le', ar=48000, map='a').run()
|
|
|
|
|
|
|
|
|
20 |
|
21 |
+
model = whisper.load_model("base")
|
22 |
+
result = model.transcribe("output_audio.wav")
|
23 |
+
whisper_text = result["text"]
|
24 |
+
whisper_language = result['language']
|
|
|
25 |
|
26 |
+
language_mapping = {
|
27 |
+
'English': 'en',
|
28 |
+
'Spanish': 'es',
|
29 |
+
'French': 'fr',
|
30 |
+
'German': 'de',
|
31 |
+
'Italian': 'it',
|
32 |
+
'Portuguese': 'pt',
|
33 |
+
'Polish': 'pl',
|
34 |
+
'Turkish': 'tr',
|
35 |
+
'Russian': 'ru',
|
36 |
+
'Dutch': 'nl',
|
37 |
+
'Czech': 'cs',
|
38 |
+
'Arabic': 'ar',
|
39 |
+
'Chinese (Simplified)': 'zh-cn'
|
40 |
+
}
|
41 |
+
target_language_code = language_mapping[target_language]
|
42 |
+
translator = Translator()
|
43 |
+
translated_text = translator.translate(whisper_text, src=whisper_language, dest=target_language_code).text
|
44 |
|
45 |
+
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1", gpu=True)
|
46 |
+
tts.tts_to_file(translated_text, speaker_wav='output_audio.wav', file_path="output_synth.wav", language=target_language_code)
|
47 |
|
48 |
+
subprocess.run(f"python inference.py --face {video_path} --audio 'output_synth.wav' --outfile 'output_high_qual.mp4'", shell=True)
|
|
|
|
|
49 |
|
50 |
+
return "output_high_qual.mp4"
|
|
|
51 |
|
52 |
+
except Exception as e:
|
53 |
+
return str(e)
|
54 |
|
|
|
55 |
iface = gr.Interface(
|
56 |
+
fn=process_video,
|
57 |
+
inputs=[
|
58 |
+
gr.Video(),
|
59 |
+
gr.inputs.Checkbox(label="High Quality"),
|
60 |
+
gr.inputs.Dropdown(choices=["English", "Spanish", "French", "German", "Italian", "Portuguese", "Polish", "Turkish", "Russian", "Dutch", "Czech", "Arabic", "Chinese (Simplified)"], label="Target Language for Dubbing")
|
61 |
+
],
|
62 |
+
outputs=gr.outputs.File(),
|
63 |
live=False
|
64 |
)
|
65 |
+
|
66 |
+
iface.launch(share=True)
|