from huggingface_hub import InferenceClient import gradio as gr import re client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1") def parse_srt(content): """Parse the SRT file content, yielding text lines and skipping timestamps, line numbers, or blank lines.""" lines = content.split("\n") for line in lines: if not line.isdigit() and not re.match(r'\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}', line) and line.strip(): yield line def translate_line(line, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0): generate_kwargs = dict( temperature=temperature, max_new_tokens=max_new_tokens, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42, ) stream = client.text_generation(f"Translate to English: {line}", **generate_kwargs, stream=True, details=True, return_full_text=False) output = "" for response in stream: output += response.token.text return output def translate_srt(file_info): # Correctly access the file content from the file_info dictionary content = file_info["content"].decode("utf-8") # Decode content from bytes to string translated_lines = [] for line in parse_srt(content): translated_line = translate_line(line) translated_lines.append(translated_line) return "\n".join(translated_lines) gr.Interface( fn=translate_srt, inputs=gr.File(label="Upload SRT File"), outputs=gr.Textbox(label="Translated Text"), title="SRT Translator", description="Upload an SRT file to translate its content line by line." ).launch()