import streamlit as st import pandas as pd from faster_whisper import WhisperModel import logging import os import pysrt from transformers import MarianMTModel, MarianTokenizer import ffmpeg # Configuration initiale et chargement des données url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md" df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all') df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name'] df['ISO 639-1'] = df['ISO 639-1'].str.strip() language_options = df['ISO 639-1'].tolist() model_size_options = ["tiny", "base", "small", "medium", "large", "large-v2", "large-v3"] logging.basicConfig(level=logging.DEBUG) def text_to_srt(text): lines = text.split('\n') srt_content = "" for i, line in enumerate(lines): if line.strip() == "": continue try: times, content = line.split(']', 1) start, end = times[1:].split(' -> ') if start.count(":") == 1: start = "00:" + start if end.count(":") == 1: end = "00:" + end srt_content += f"{i+1}\n{start.replace('.', ',')} --> {end.replace('.', ',')}\n{content.strip()}\n\n" except ValueError: continue temp_file_path = '/tmp/output.srt' with open(temp_file_path, 'w', encoding='utf-8') as file: file.write(srt_content) return temp_file_path def format_timestamp(seconds): hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) seconds_remainder = seconds % 60 return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}" def translate_text(text, source_language_code, target_language_code): model_name = f"Helsinki-NLP/opus-mt-{source_language_code}-{target_language_code}" if source_language_code == target_language_code: return "Translation between the same languages is not supported." try: tokenizer = MarianTokenizer.from_pretrained(model_name) model = MarianMTModel.from_pretrained(model_name) except Exception as e: return f"Failed to load model for {source_language_code} to {target_language_code}: {str(e)}" translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)) translated_text = tokenizer.decode(translated[0], skip_special_tokens=True) return translated_text def transcribe(audio_file_path, model_size="base"): device = "cpu" compute_type = "int8" model = WhisperModel(model_size, device=device, compute_type=compute_type) segments, _ = model.transcribe(audio_file_path) transcription_with_timestamps = [ f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}" for segment in segments ] return "\n".join(transcription_with_timestamps) def add_subtitle_to_video(input_video, subtitle_file, subtitle_language, soft_subtitle=False): video_input_stream = ffmpeg.input(input_video) subtitle_input_stream = ffmpeg.input(subtitle_file) input_video_name = os.path.splitext(os.path.basename(input_video))[0] output_video = f"/tmp/{input_video_name}_subtitled.mp4" if soft_subtitle: stream = ffmpeg.output(video_input_stream, subtitle_input_stream, output_video, **{"c": "copy", "c:s": "mov_text"}) else: stream = ffmpeg.output(video_input_stream, output_video, vf=f"subtitles={subtitle_file}") ffmpeg.run(stream, overwrite_output=True) return output_video st.title("Video Subtitle Creation") st.write("For API use please visit [this space](https://huggingface.co/spaces/Lenylvt/VideoSubtitleCreation-API)") uploaded_file = st.file_uploader("📹 Upload Video", type=["mp4", "avi", "mov"]) action = st.radio("🧷 Select Action", ["Transcribe and Add Subtitles", "Transcribe, Translate and Add Subtitles"]) source_language = st.selectbox("1️⃣ Source Language", options=language_options, index=language_options.index('en')) target_language = st.selectbox("2️⃣ Target Language", options=language_options, index=language_options.index('fr')) model_size = st.selectbox("📜 Model Size", options=model_size_options) if st.button("📁 Process Video"): if uploaded_file is not None: with st.spinner('Processing...'): audio_file_path = f"/tmp/{uploaded_file.name}" with open(audio_file_path, "wb") as f: f.write(uploaded_file.getvalue()) transcription = transcribe(audio_file_path, model_size) srt_path = text_to_srt(transcription) if action == "Transcribe and Add Subtitles": output_video_path = add_subtitle_to_video(audio_file_path, srt_path, subtitle_language="eng", soft_subtitle=False) else: # Transcribe, Translate and Add Subtitles translated_srt_path = translate_text(srt_path, source_language, target_language) output_video_path = add_subtitle_to_video(audio_file_path, translated_srt_path, target_language, soft_subtitle=False) st.video(output_video_path) st.success("🟢 Processing Completed")