Lenylvt commited on
Commit
9895fa7
·
verified ·
1 Parent(s): ed6aa07

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -35
app.py CHANGED
@@ -4,42 +4,55 @@ import re
4
 
5
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
6
 
7
- def parse_srt(content):
8
- """Parse the SRT file content, yielding text lines and skipping timestamps, line numbers, or blank lines."""
9
- lines = content.split("\n")
10
- for line in lines:
11
- if not line.isdigit() and not re.match(r'\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}', line) and line.strip():
12
- yield line
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- def translate_line(line, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
15
- generate_kwargs = dict(
16
- temperature=temperature,
17
- max_new_tokens=max_new_tokens,
18
- top_p=top_p,
19
- repetition_penalty=repetition_penalty,
20
- do_sample=True,
21
- seed=42,
22
- )
23
 
24
- stream = client.text_generation(f"Translate to English: {line}", **generate_kwargs, stream=True, details=True, return_full_text=False)
25
- output = ""
26
- for response in stream:
27
- output += response.token.text
28
- return output
 
29
 
30
- def translate_srt(file_info):
31
- # Correctly access the file content from the file_info dictionary
32
- content = file_info["content"].decode("utf-8") # Decode content from bytes to string
33
- translated_lines = []
34
- for line in parse_srt(content):
35
- translated_line = translate_line(line)
36
- translated_lines.append(translated_line)
37
- return "\n".join(translated_lines)
38
 
39
- gr.Interface(
40
- fn=translate_srt,
41
- inputs=gr.File(label="Upload SRT File"),
42
- outputs=gr.Textbox(label="Translated Text"),
43
- title="SRT Translator",
44
- description="Upload an SRT file to translate its content line by line."
45
- ).launch()
 
 
 
4
 
5
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
6
 
7
+ def translate_subtitles(srt_content, target_language):
8
+ """
9
+ Translate the subtitles in the SRT file content to the target language.
10
+ """
11
+ # Split the SRT content into blocks
12
+ blocks = srt_content.split('\n\n')
13
+ translated_blocks = []
14
+ for block in blocks:
15
+ if block.strip() == "":
16
+ continue
17
+ lines = block.split('\n')
18
+ if len(lines) >= 3:
19
+ index = lines[0]
20
+ time_range = lines[1]
21
+ subtitle_text = '\n'.join(lines[2:])
22
+ # Translate the subtitle text
23
+ translation = client.translation(subtitle_text, target_language=target_language)
24
+ translated_text = translation[0]['translation_text']
25
+ translated_blocks.append(f"{index}\n{time_range}\n{translated_text}")
26
+ return '\n\n'.join(translated_blocks)
27
 
28
+ def read_srt_file(file_path):
29
+ """
30
+ Read SRT file content.
31
+ """
32
+ with open(file_path, 'r', encoding='utf-8') as file:
33
+ return file.read()
 
 
 
34
 
35
+ def save_translated_srt(content, output_path):
36
+ """
37
+ Save the translated subtitles to a new SRT file.
38
+ """
39
+ with open(output_path, 'w', encoding='utf-8') as file:
40
+ file.write(content)
41
 
42
+ def translate_srt_interface(srt_file, target_language):
43
+ """
44
+ Gradio interface function to translate SRT file content.
45
+ """
46
+ srt_content = srt_file.read()
47
+ translated_content = translate_subtitles(srt_content, target_language)
48
+ return translated_content
 
49
 
50
+ iface = gr.Interface(
51
+ fn=translate_srt_interface,
52
+ inputs=[gr.File(file_type="srt"), gr.Textbox(label="Target Language Code")],
53
+ outputs="text",
54
+ title="SRT File Translator",
55
+ description="Translate SRT subtitle files to your desired language."
56
+ )
57
+
58
+ iface.launch()