Spaces:

StarPigeon
/

ViDove

Sleeping

App Files Files Community

Yuhan-Lu commited on Apr 16, 2023

Commit

090e123

1 Parent(s): ac6e110

update docString of translate, set default value for 'attempts_count' of translate

Browse files

Files changed (1) hide show

pipeline.py +23 -7

pipeline.py CHANGED Viewed

@@ -233,7 +233,24 @@ def get_response(model_name, sentence):
 # Translate and save
-def translate(srt, script_arr, range_arr, model_name, video_name, video_link):
     logging.info("start translating...")
     previous_length = 0
     for sentence, range in tqdm(zip(script_arr, range_arr)):
@@ -249,15 +266,14 @@ def translate(srt, script_arr, range_arr, model_name, video_name, video_link):
             try:
                 translate = get_response(model_name, sentence)
                 # detect merge sentence issue and try to solve for five times:
-                attempt_left = 5
-                while not check_translation(sentence, translate) and attempt_left > 0:
                     translate = get_response(model_name, sentence)
-                    attempt_left -= 1
                 # if failure still happen, split into smaller tokens
-                if attempt_left == 0:
                     single_sentences = sentence.split("\n\n")
-                    print("merge sentence issue found for range", range)
                     translate = ""
                     for i, single_sentence in enumerate(single_sentences):
                         if i == len(single_sentences) - 1:
@@ -265,7 +281,7 @@ def translate(srt, script_arr, range_arr, model_name, video_name, video_link):
                         else:
                             translate += get_response(model_name, single_sentence) + "\n\n"
                             # print(single_sentence, translate.split("\n\n")[-2])
-                    print("solved by individually translation!")
             except Exception as e:
                 logging.debug("An error has occurred during translation:",e)

 # Translate and save
+def translate(srt, script_arr, range_arr, model_name, video_name, video_link, attempts_count = 5):
+    """
+    Translates the given script array into another language using the chatgpt and writes to the SRT file.
+    This function takes a script array, a range array, a model name, a video name, and a video link as input. It iterates
+    through sentences and range in the script and range arrays. If the translation check fails for five times, the function
+    will attempt to resolve merge sentence issues and split the sentence into smaller tokens for a better translation.
+    Args:
+    srt (Subtitle): An instance of the Subtitle class representing the SRT file.
+    script_arr (list): A list of strings representing the original script sentences to be translated.
+    range_arr (list): A list of tuples representing the start and end positions of sentences in the script.
+    model_name (str): The name of the translation model to be used.
+    video_name (str): The name of the video.
+    video_link (str): The link to the video.
+    attempts_count (int): Number of attemps of failures for unmatched sentences.
+    """
     logging.info("start translating...")
     previous_length = 0
     for sentence, range in tqdm(zip(script_arr, range_arr)):
             try:
                 translate = get_response(model_name, sentence)
                 # detect merge sentence issue and try to solve for five times:
+                while not check_translation(sentence, translate) and attempts_count > 0:
                     translate = get_response(model_name, sentence)
+                    attempts_count -= 1
                 # if failure still happen, split into smaller tokens
+                if attempts_count == 0:
                     single_sentences = sentence.split("\n\n")
+                    logging.info("merge sentence issue found for range", range)
                     translate = ""
                     for i, single_sentence in enumerate(single_sentences):
                         if i == len(single_sentences) - 1:
                         else:
                             translate += get_response(model_name, single_sentence) + "\n\n"
                             # print(single_sentence, translate.split("\n\n")[-2])
+                    logging.info("solved by individually translation!")
             except Exception as e:
                 logging.debug("An error has occurred during translation:",e)