Spaces:
Paused
Paused
import gradio as gr | |
import subprocess | |
import whisper | |
from googletrans import Translator | |
import asyncio | |
import edge_tts | |
import os | |
# Extract and Transcribe Audio | |
def extract_and_transcribe_audio(video_path): | |
ffmpeg_command = f"ffmpeg -i '{video_path}' -acodec pcm_s24le -ar 48000 -q:a 0 -map a -y 'output_audio.wav'" | |
subprocess.run(ffmpeg_command, shell=True) | |
model = whisper.load_model("base") | |
result = model.transcribe("output_audio.wav") | |
return result["text"], result['language'] | |
# Translate Text | |
def translate_text(whisper_text, whisper_language, target_language): | |
language_mapping = { | |
'English': 'en', | |
'Spanish': 'es', | |
# ... (other mappings) | |
} | |
target_language_code = language_mapping[target_language] | |
translator = Translator() | |
translated_text = translator.translate(whisper_text, src=whisper_language, dest=target_language_code).text | |
return translated_text | |
# Generate Voice | |
async def generate_voice(translated_text, target_language): | |
VOICE_MAPPING = { | |
'English': 'en-GB-SoniaNeural', | |
'Spanish': 'es-ES-PabloNeural', | |
# ... (other mappings) | |
} | |
voice = VOICE_MAPPING[target_language] | |
communicate = edge_tts.Communicate(translated_text, voice) | |
await communicate.save("output_synth.wav") | |
return "output_synth.wav" | |
# Generate Lip-synced Video (Placeholder) | |
def generate_lip_synced_video(video_path, output_audio_path): | |
# Your lip-synced video generation code here | |
# ... | |
return "output_high_qual.mp4" | |
# Main function to be called by Gradio | |
def process_video(video, target_language): | |
video_path = "uploaded_video.mp4" | |
with open(video_path, "wb") as f: | |
f.write(video.read()) | |
# Step 1: Extract and Transcribe Audio | |
whisper_text, whisper_language = extract_and_transcribe_audio(video_path) | |
# Step 2: Translate Text | |
translated_text = translate_text(whisper_text, whisper_language, target_language) | |
# Step 3: Generate Voice | |
loop = asyncio.get_event_loop() | |
output_audio_path = loop.run_until_complete(generate_voice(translated_text, target_language)) | |
# Step 4: Generate Lip-synced Video | |
output_video_path = generate_lip_synced_video(video_path, output_audio_path) | |
return output_video_path | |
# Gradio Interface | |
iface = gr.Interface( | |
fn=process_video, | |
inputs=["file", gr.Interface.Component(type="dropdown", choices=["English", "Spanish"])], | |
outputs="file", | |
live=False | |
) | |
iface.launch() | |