Spaces:
Sleeping
Sleeping
worldqwq
commited on
Commit
•
259f806
1
Parent(s):
491821e
Prompt update and removed sentence number passing with prompt
Browse files- SRT.py +11 -10
- pipeline.py +9 -6
SRT.py
CHANGED
@@ -417,19 +417,20 @@ class SRT_script():
|
|
417 |
[real_word, pos] = self.get_real_word(word)
|
418 |
if not dict.check(word[:pos]):
|
419 |
suggest = term_spellDict.suggest(real_word)
|
420 |
-
|
421 |
if suggest and enchant.utils.levenshtein(word, suggest[0]) < (len(word)+len(suggest[0]))/4: # relax spell check
|
422 |
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
print(word + ":" + suggest[0] + ":---:levenshtein:" + str(enchant.utils.levenshtein(word, suggest[0])))
|
428 |
-
|
429 |
-
|
430 |
new_word = word.replace(word[:pos],suggest[0])
|
431 |
else:
|
432 |
new_word = word
|
|
|
|
|
|
|
|
|
433 |
else:
|
434 |
new_word = word
|
435 |
ready_words[i] = new_word
|
@@ -489,8 +490,8 @@ class SRT_script():
|
|
489 |
# return a string with pure source text
|
490 |
result = ""
|
491 |
for i, seg in enumerate(self.segments):
|
492 |
-
result
|
493 |
-
|
494 |
return result
|
495 |
|
496 |
def reform_src_str(self):
|
|
|
417 |
[real_word, pos] = self.get_real_word(word)
|
418 |
if not dict.check(word[:pos]):
|
419 |
suggest = term_spellDict.suggest(real_word)
|
|
|
420 |
if suggest and enchant.utils.levenshtein(word, suggest[0]) < (len(word)+len(suggest[0]))/4: # relax spell check
|
421 |
|
422 |
+
with open("dislog.log","a") as log:
|
423 |
+
if not os.path.exists("dislog.log"):
|
424 |
+
log.write("word \t suggest \t levenshtein \n")
|
425 |
+
log.write(word + "\t" + suggest[0] + "\t" + str(enchant.utils.levenshtein(word, suggest[0]))+'\n')
|
426 |
+
#print(word + ":" + suggest[0] + ":---:levenshtein:" + str(enchant.utils.levenshtein(word, suggest[0])))
|
|
|
|
|
427 |
new_word = word.replace(word[:pos],suggest[0])
|
428 |
else:
|
429 |
new_word = word
|
430 |
+
else:
|
431 |
+
new_word = word
|
432 |
+
else:
|
433 |
+
new_word = word
|
434 |
else:
|
435 |
new_word = word
|
436 |
ready_words[i] = new_word
|
|
|
490 |
# return a string with pure source text
|
491 |
result = ""
|
492 |
for i, seg in enumerate(self.segments):
|
493 |
+
result+=f'{seg.source_text}\n\n\n'#f'SENTENCE {i+1}: {seg.source_text}\n\n\n'
|
494 |
+
|
495 |
return result
|
496 |
|
497 |
def reform_src_str(self):
|
pipeline.py
CHANGED
@@ -82,7 +82,7 @@ def get_sources(args, download_path, result_path, video_name):
|
|
82 |
|
83 |
return audio_path, audio_file, video_path, video_name
|
84 |
|
85 |
-
def get_srt_class(srt_file_en, result_path, video_name, audio_path, audio_file = None, whisper_model = '
|
86 |
# Instead of using the script_en variable directly, we'll use script_input
|
87 |
if srt_file_en is not None:
|
88 |
srt = SRT_script.parse_from_srt_file(srt_file_en)
|
@@ -181,11 +181,14 @@ def get_response(model_name, sentence):
|
|
181 |
response = openai.ChatCompletion.create(
|
182 |
model=model_name,
|
183 |
messages = [
|
184 |
-
{"role": "system", "content": "You are a helpful assistant that translates English to Chinese and have decent background in starcraft2."},
|
185 |
-
{"role": "system", "content": "Your translation has to keep the orginal format and be as accurate as possible."},
|
186 |
-
{"role": "system", "content": "Your translation needs to be consistent with the number of sentences in the original."},
|
187 |
-
{"role": "system", "content": "There is no need for you to add any comments or notes."},
|
188 |
-
{"role": "user", "content": 'Translate the following English text to Chinese: "{}"'.format(sentence)}
|
|
|
|
|
|
|
189 |
],
|
190 |
temperature=0.15
|
191 |
)
|
|
|
82 |
|
83 |
return audio_path, audio_file, video_path, video_name
|
84 |
|
85 |
+
def get_srt_class(srt_file_en, result_path, video_name, audio_path, audio_file = None, whisper_model = 'large', method = "stable"):
|
86 |
# Instead of using the script_en variable directly, we'll use script_input
|
87 |
if srt_file_en is not None:
|
88 |
srt = SRT_script.parse_from_srt_file(srt_file_en)
|
|
|
181 |
response = openai.ChatCompletion.create(
|
182 |
model=model_name,
|
183 |
messages = [
|
184 |
+
#{"role": "system", "content": "You are a helpful assistant that translates English to Chinese and have decent background in starcraft2."},
|
185 |
+
#{"role": "system", "content": "Your translation has to keep the orginal format and be as accurate as possible."},
|
186 |
+
#{"role": "system", "content": "Your translation needs to be consistent with the number of sentences in the original."},
|
187 |
+
#{"role": "system", "content": "There is no need for you to add any comments or notes."},
|
188 |
+
#{"role": "user", "content": 'Translate the following English text to Chinese: "{}"'.format(sentence)}
|
189 |
+
|
190 |
+
{"role": "system", "content": "你是一个翻译助理,你的任务是翻译星际争霸视频,你会被提供一个按行分割的英文段落,你需要在保证句意和行数的情况下输出翻译后的文本。"},
|
191 |
+
{"role": "user", "content": sentence}
|
192 |
],
|
193 |
temperature=0.15
|
194 |
)
|