Spaces:
Sleeping
Sleeping
JiaenLiu
commited on
Commit
•
ae0ed1b
1
Parent(s):
87a205e
add gpt3.5 and text input
Browse filesFormer-commit-id: 0a72cc1d3f69a74fc35bb0981462370dbe566518
- README.md +2 -0
- pipeline.py +61 -36
README.md
CHANGED
@@ -16,6 +16,8 @@ example online: python3 pipeline.py --link https://www.youtube.com/watch?v=XbgFI
|
|
16 |
|
17 |
example offline: python3 pipeline.py --local_path test_translation.m4a --result ./results --video_name test_translation
|
18 |
|
|
|
|
|
19 |
options:
|
20 |
-h, --help show this help message and exit
|
21 |
--link LINK youtube video link here
|
|
|
16 |
|
17 |
example offline: python3 pipeline.py --local_path test_translation.m4a --result ./results --video_name test_translation
|
18 |
|
19 |
+
example text input: python pipeline.py --text_file "/home/jiaenliu/project-t/results/huanghe_translation_en.txt" --result "/home/jiaenliu/project-t/results" --video_name "huanghe_test"
|
20 |
+
|
21 |
options:
|
22 |
-h, --help show this help message and exit
|
23 |
--link LINK youtube video link here
|
pipeline.py
CHANGED
@@ -4,32 +4,33 @@ import argparse
|
|
4 |
import os
|
5 |
import io
|
6 |
|
|
|
|
|
7 |
parser = argparse.ArgumentParser()
|
8 |
parser.add_argument("--link", help="youtube video link here", default=None, type=str, required=False)
|
9 |
parser.add_argument("--local_path", help="local video path here", default=None, type=str, required=False)
|
|
|
10 |
parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
|
11 |
parser.add_argument("--result", help="translate result path", default='./results', type=str, required=False)
|
12 |
parser.add_argument("--video_name", help="video name", default='placeholder', type=str, required=False)
|
|
|
13 |
args = parser.parse_args()
|
14 |
|
15 |
-
if args.link is None and args.local_path is None:
|
16 |
-
print("need video source")
|
17 |
exit()
|
18 |
|
19 |
-
|
20 |
-
openai.api_key = "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"
|
21 |
-
# openai.api_key = os.getenv("OPENAI_API_KEY")
|
22 |
-
|
23 |
DOWNLOAD_PATH = args.download
|
24 |
RESULT_PATH = args.result
|
25 |
VIDEO_NAME = args.video_name
|
26 |
-
n_threshold =
|
27 |
# model_name = "text-davinci-003" # replace this to our own fintune model
|
28 |
-
model_name =
|
29 |
-
|
30 |
|
31 |
# get source audio
|
32 |
-
if args.link is not None:
|
33 |
# Download audio from YouTube
|
34 |
video_link = args.link
|
35 |
try:
|
@@ -42,25 +43,33 @@ if args.link is not None:
|
|
42 |
print(e)
|
43 |
audio_file = open('{}/{}'.format(DOWNLOAD_PATH, audio.default_filename), "rb")
|
44 |
VIDEO_NAME = audio.default_filename.split('.')[0]
|
45 |
-
|
46 |
# Read from local
|
47 |
audio_file= open(args.local_path, "rb")
|
48 |
|
49 |
-
# perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
|
50 |
-
if not os.path.exists("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME)):
|
51 |
-
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
52 |
-
with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'w') as f:
|
53 |
-
f.write(transcript['text'])
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
# Split the video script by sentences and create chunks within the token limit
|
62 |
-
n_threshold =
|
63 |
-
script_split =
|
64 |
|
65 |
script_arr = []
|
66 |
script = ""
|
@@ -75,17 +84,33 @@ if script.strip():
|
|
75 |
|
76 |
# Translate and save
|
77 |
for s in script_arr:
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
4 |
import os
|
5 |
import io
|
6 |
|
7 |
+
|
8 |
+
|
9 |
parser = argparse.ArgumentParser()
|
10 |
parser.add_argument("--link", help="youtube video link here", default=None, type=str, required=False)
|
11 |
parser.add_argument("--local_path", help="local video path here", default=None, type=str, required=False)
|
12 |
+
parser.add_argument("--text_file", help="text file path here", default=None, type=str, required=False) # New argument
|
13 |
parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
|
14 |
parser.add_argument("--result", help="translate result path", default='./results', type=str, required=False)
|
15 |
parser.add_argument("--video_name", help="video name", default='placeholder', type=str, required=False)
|
16 |
+
parser.add_argument("--model_name", help="model name only support text-davinci-003 and gpt-3.5-turbo", default='placeholder', type=str, required=False, default="gpt-3.5-turbo")
|
17 |
args = parser.parse_args()
|
18 |
|
19 |
+
if args.link is None and args.local_path is None and args.text_file is None:
|
20 |
+
print("need video source or text file")
|
21 |
exit()
|
22 |
|
23 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
|
|
|
|
|
|
24 |
DOWNLOAD_PATH = args.download
|
25 |
RESULT_PATH = args.result
|
26 |
VIDEO_NAME = args.video_name
|
27 |
+
n_threshold = 1000 # Token limit for the GPT-3.5 model
|
28 |
# model_name = "text-davinci-003" # replace this to our own fintune model
|
29 |
+
model_name = args.model_name
|
30 |
+
# model_name = "davinci"
|
31 |
|
32 |
# get source audio
|
33 |
+
if args.link is not None and args.local_path is None:
|
34 |
# Download audio from YouTube
|
35 |
video_link = args.link
|
36 |
try:
|
|
|
43 |
print(e)
|
44 |
audio_file = open('{}/{}'.format(DOWNLOAD_PATH, audio.default_filename), "rb")
|
45 |
VIDEO_NAME = audio.default_filename.split('.')[0]
|
46 |
+
elif args.local_path is not None:
|
47 |
# Read from local
|
48 |
audio_file= open(args.local_path, "rb")
|
49 |
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
+
|
52 |
+
# Instead of using the script_en variable directly, we'll use script_input
|
53 |
+
if args.text_file is not None:
|
54 |
+
with open(args.text_file, 'r') as f:
|
55 |
+
script_input = f.read()
|
56 |
+
else:
|
57 |
+
# perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
|
58 |
+
if not os.path.exists("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME)):
|
59 |
+
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
60 |
+
with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'w') as f:
|
61 |
+
f.write(transcript['text'])
|
62 |
+
|
63 |
+
# split the video script(open ai prompt limit: about 5000)
|
64 |
+
with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'r') as f:
|
65 |
+
script_en = f.read()
|
66 |
+
# N = len(script_en)
|
67 |
+
# script_split = script_en.split('.')
|
68 |
+
script_input = script_en
|
69 |
|
70 |
# Split the video script by sentences and create chunks within the token limit
|
71 |
+
n_threshold = 4096 # Token limit for the GPT-3 model
|
72 |
+
script_split = script_input.split('.')
|
73 |
|
74 |
script_arr = []
|
75 |
script = ""
|
|
|
84 |
|
85 |
# Translate and save
|
86 |
for s in script_arr:
|
87 |
+
# using chatgpt model
|
88 |
+
if model_name == "gpt-3.5-turbo":
|
89 |
+
print(s + "\n")
|
90 |
+
response = openai.ChatCompletion.create(
|
91 |
+
model=model_name,
|
92 |
+
messages = [
|
93 |
+
{"role": "system", "content": "You are a helpful assistant that translates English to Chinese and have decent background in starcraft2."},
|
94 |
+
{"role": "user", "content": 'Translate the following English text to Chinese: "{}"'.format(s)}
|
95 |
+
],
|
96 |
+
temperature=0.1
|
97 |
+
)
|
98 |
+
with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.txt", 'a+') as f:
|
99 |
+
f.write(response['choices'][0]['message']['content'].strip())
|
100 |
+
f.write('\n')
|
101 |
+
if model_name == "text-davinci-003":
|
102 |
+
prompt = f"Please help me translate this into Chinese:\n\n{s}\n\n"
|
103 |
+
print(prompt)
|
104 |
+
response = openai.Completion.create(
|
105 |
+
model=model_name,
|
106 |
+
prompt=prompt,
|
107 |
+
temperature=0.1,
|
108 |
+
max_tokens=2000,
|
109 |
+
top_p=1.0,
|
110 |
+
frequency_penalty=0.0,
|
111 |
+
presence_penalty=0.0
|
112 |
+
)
|
113 |
|
114 |
+
with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.txt", 'a+') as f:
|
115 |
+
f.write(response['choices'][0]['text'].strip())
|
116 |
+
f.write('\n')
|