Spaces:
Sleeping
Sleeping
Yuhan-Lu
commited on
Commit
·
090e123
1
Parent(s):
ac6e110
update docString of translate, set default value for 'attempts_count' of translate
Browse files- pipeline.py +23 -7
pipeline.py
CHANGED
@@ -233,7 +233,24 @@ def get_response(model_name, sentence):
|
|
233 |
|
234 |
|
235 |
# Translate and save
|
236 |
-
def translate(srt, script_arr, range_arr, model_name, video_name, video_link):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
logging.info("start translating...")
|
238 |
previous_length = 0
|
239 |
for sentence, range in tqdm(zip(script_arr, range_arr)):
|
@@ -249,15 +266,14 @@ def translate(srt, script_arr, range_arr, model_name, video_name, video_link):
|
|
249 |
try:
|
250 |
translate = get_response(model_name, sentence)
|
251 |
# detect merge sentence issue and try to solve for five times:
|
252 |
-
|
253 |
-
while not check_translation(sentence, translate) and attempt_left > 0:
|
254 |
translate = get_response(model_name, sentence)
|
255 |
-
|
256 |
|
257 |
# if failure still happen, split into smaller tokens
|
258 |
-
if
|
259 |
single_sentences = sentence.split("\n\n")
|
260 |
-
|
261 |
translate = ""
|
262 |
for i, single_sentence in enumerate(single_sentences):
|
263 |
if i == len(single_sentences) - 1:
|
@@ -265,7 +281,7 @@ def translate(srt, script_arr, range_arr, model_name, video_name, video_link):
|
|
265 |
else:
|
266 |
translate += get_response(model_name, single_sentence) + "\n\n"
|
267 |
# print(single_sentence, translate.split("\n\n")[-2])
|
268 |
-
|
269 |
|
270 |
except Exception as e:
|
271 |
logging.debug("An error has occurred during translation:",e)
|
|
|
233 |
|
234 |
|
235 |
# Translate and save
|
236 |
+
def translate(srt, script_arr, range_arr, model_name, video_name, video_link, attempts_count = 5):
|
237 |
+
"""
|
238 |
+
Translates the given script array into another language using the chatgpt and writes to the SRT file.
|
239 |
+
|
240 |
+
This function takes a script array, a range array, a model name, a video name, and a video link as input. It iterates
|
241 |
+
through sentences and range in the script and range arrays. If the translation check fails for five times, the function
|
242 |
+
will attempt to resolve merge sentence issues and split the sentence into smaller tokens for a better translation.
|
243 |
+
|
244 |
+
Args:
|
245 |
+
srt (Subtitle): An instance of the Subtitle class representing the SRT file.
|
246 |
+
script_arr (list): A list of strings representing the original script sentences to be translated.
|
247 |
+
range_arr (list): A list of tuples representing the start and end positions of sentences in the script.
|
248 |
+
model_name (str): The name of the translation model to be used.
|
249 |
+
video_name (str): The name of the video.
|
250 |
+
video_link (str): The link to the video.
|
251 |
+
attempts_count (int): Number of attemps of failures for unmatched sentences.
|
252 |
+
"""
|
253 |
+
|
254 |
logging.info("start translating...")
|
255 |
previous_length = 0
|
256 |
for sentence, range in tqdm(zip(script_arr, range_arr)):
|
|
|
266 |
try:
|
267 |
translate = get_response(model_name, sentence)
|
268 |
# detect merge sentence issue and try to solve for five times:
|
269 |
+
while not check_translation(sentence, translate) and attempts_count > 0:
|
|
|
270 |
translate = get_response(model_name, sentence)
|
271 |
+
attempts_count -= 1
|
272 |
|
273 |
# if failure still happen, split into smaller tokens
|
274 |
+
if attempts_count == 0:
|
275 |
single_sentences = sentence.split("\n\n")
|
276 |
+
logging.info("merge sentence issue found for range", range)
|
277 |
translate = ""
|
278 |
for i, single_sentence in enumerate(single_sentences):
|
279 |
if i == len(single_sentences) - 1:
|
|
|
281 |
else:
|
282 |
translate += get_response(model_name, single_sentence) + "\n\n"
|
283 |
# print(single_sentence, translate.split("\n\n")[-2])
|
284 |
+
logging.info("solved by individually translation!")
|
285 |
|
286 |
except Exception as e:
|
287 |
logging.debug("An error has occurred during translation:",e)
|