Spaces:

Lenylvt
/

SRT_Translation-API

Runtime error

File size: 2,992 Bytes

41ec54b
 
7cb0b8e
41ec54b
 
9c9b591
7cb0b8e
41ec54b
 
 
 
 
 
9dc25d9
41ec54b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4af723
41ec54b
 
362c063
 
 
 
 
 
9c9b591
 
 
 
 
 
362c063
41ec54b
 
362c063
 
41ec54b
 
 
 
d4af723
9895fa7
41ec54b
 
608d9e3
41ec54b
 
 
 
 
 
9895fa7
 
41ec54b

import requests
import pandas as pd
import gradio as gr
from transformers import MarianMTModel, MarianTokenizer
import io
import pysrt

# Fetch and parse language options
url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
response = requests.get(url)
df = pd.read_csv(io.StringIO(response.text), delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
df['ISO 639-1'] = df['ISO 639-1'].str.strip()

# Prepare language options for the dropdown
language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']} - {row['Language Name'].strip()}") for index, row in df.iterrows()]

def translate_text(text, source_language_code, target_language_code):
    # Construct model name using ISO 639-1 codes
    model_name = f"Helsinki-NLP/opus-mt-{source_language_code}-{target_language_code}"

    # Check if source and target languages are the same
    if source_language_code == target_language_code:
        return "Translation between the same languages is not supported."

    # Load tokenizer and model
    try:
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
    except Exception as e:
        return f"Failed to load model for {source_language_code} to {target_language_code}: {str(e)}"

    # Translate text
    translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512))
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
    
    return translated_text

def translate_srt(file_info, source_language_code, target_language_code):
    # Assuming file_info is a dictionary with 'content' holding the file's bytes
    file_content = file_info['content']  # Correctly access the bytes content of the file

    # Use pysrt to load subtitles from the file content
    subs = pysrt.open(io.BytesIO(file_content))

    # Translate each subtitle
    for sub in subs:
        translated_text = translate_text(sub.text, source_language_code, target_language_code)
        sub.text = translated_text

    # Save the translated subtitles to a temporary file
    output_path = "/mnt/data/translated_srt.srt"
    with open(output_path, "w", encoding="utf-8") as file:
        subs.save(file, encoding='utf-8')

    return output_path

source_language_dropdown = gr.Dropdown(choices=language_options, label="Source Language")
target_language_dropdown = gr.Dropdown(choices=language_options, label="Target Language")

iface = gr.Interface(
    fn=translate_srt,
    inputs=[
        gr.File(label="Upload SRT File"),
        source_language_dropdown,
        target_language_dropdown
    ],
    outputs=gr.File(label="Download Translated SRT File"),
    title="SRT Translator",
    description="Translate SubRip Text (SRT) subtitle files. This tool uses models from the Language Technology Research Group at the University of Helsinki."
)

iface.launch()