Spaces:
Running
Running
solve conflict
Browse filesFormer-commit-id: e28f8408800aef82cd6bb3e5d4e980e96f286297
- README.md +8 -2
- pipeline.py +11 -8
README.md
CHANGED
@@ -8,11 +8,11 @@ pip install -r requirement.txt
|
|
8 |
|
9 |
## Usage
|
10 |
```
|
11 |
-
usage: pipeline.py [-h] [--link LINK] [--local_path LOCAL_PATH] [--download DOWNLOAD] [--result RESULT] [--video_name VIDEO_NAME]
|
12 |
|
13 |
quick start:
|
14 |
|
15 |
-
example online: python3 pipeline.py --link https://www.youtube.com/watch?v=
|
16 |
|
17 |
example offline: python3 pipeline.py --local_path test_translation.m4a --result ./results --video_name test_translation
|
18 |
|
@@ -27,4 +27,10 @@ options:
|
|
27 |
--result RESULT translate result path
|
28 |
--video_name VIDEO_NAME
|
29 |
video name
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
```
|
|
|
8 |
|
9 |
## Usage
|
10 |
```
|
11 |
+
usage: pipeline.py [-h] [--link LINK] [--local_path LOCAL_PATH] [--download DOWNLOAD] [--result RESULT] [--video_name VIDEO_NAME] [--model_name]
|
12 |
|
13 |
quick start:
|
14 |
|
15 |
+
example online: python3 pipeline.py --link https://www.youtube.com/watch?v=61c4dn6851g --download ./downloads --result ./results --video_name SO_I_CHOSE_RANDOM
|
16 |
|
17 |
example offline: python3 pipeline.py --local_path test_translation.m4a --result ./results --video_name test_translation
|
18 |
|
|
|
27 |
--result RESULT translate result path
|
28 |
--video_name VIDEO_NAME
|
29 |
video name
|
30 |
+
--model_name MODEL_NAME
|
31 |
+
model name
|
32 |
+
|
33 |
+
if you cannot download youtube video, please follow the link below.
|
34 |
+
https://github.com/pytube/pytube/issues/1498
|
35 |
+
|
36 |
```
|
pipeline.py
CHANGED
@@ -21,24 +21,27 @@ if args.link is None and args.local_path is None and args.srt_file is None:
|
|
21 |
print("need video source or srt file")
|
22 |
exit()
|
23 |
|
|
|
24 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
25 |
DOWNLOAD_PATH = args.download
|
26 |
RESULT_PATH = args.output_dir
|
27 |
VIDEO_NAME = args.video_name
|
28 |
-
n_threshold = 1000 # Token limit for the GPT-3.5 model
|
29 |
-
# model_name = "text-davinci-003" # replace this to our own fintune model
|
30 |
model_name = args.model_name
|
31 |
-
# model_name = "davinci"
|
32 |
|
33 |
# get source audio
|
34 |
if args.link is not None and args.local_path is None:
|
35 |
# Download audio from YouTube
|
36 |
video_link = args.link
|
|
|
|
|
37 |
try:
|
38 |
video = YouTube(video_link)
|
39 |
audio = video.streams.filter(only_audio=True, file_extension='mp4').first()
|
40 |
-
audio
|
41 |
-
|
|
|
|
|
|
|
42 |
except Exception as e:
|
43 |
print("Connection Error")
|
44 |
print(e)
|
@@ -84,7 +87,7 @@ assSub_en = srt2ass(srt_file_en, "default", "No", "Modest")
|
|
84 |
print('ASS subtitle saved as: ' + assSub_en)
|
85 |
|
86 |
# Split the video script by sentences and create chunks within the token limit
|
87 |
-
n_threshold =
|
88 |
script_split = script_input.split('.')
|
89 |
|
90 |
script_arr = []
|
@@ -109,7 +112,7 @@ for s in script_arr:
|
|
109 |
{"role": "system", "content": "You are a helpful assistant that translates English to Chinese and have decent background in starcraft2."},
|
110 |
{"role": "user", "content": 'Translate the following English text to Chinese: "{}"'.format(s)}
|
111 |
],
|
112 |
-
temperature=0.
|
113 |
)
|
114 |
with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.srt", 'a+') as f:
|
115 |
f.write(response['choices'][0]['message']['content'].strip())
|
@@ -131,4 +134,4 @@ for s in script_arr:
|
|
131 |
f.write(response['choices'][0]['text'].strip())
|
132 |
|
133 |
assSub_zh = srt2ass(f"{RESULT_PATH}/{VIDEO_NAME}_zh.srt", "default", "No", "Modest")
|
134 |
-
print('ASS subtitle saved as: ' + assSub_zh)
|
|
|
21 |
print("need video source or srt file")
|
22 |
exit()
|
23 |
|
24 |
+
# set openai api key
|
25 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
26 |
DOWNLOAD_PATH = args.download
|
27 |
RESULT_PATH = args.output_dir
|
28 |
VIDEO_NAME = args.video_name
|
|
|
|
|
29 |
model_name = args.model_name
|
|
|
30 |
|
31 |
# get source audio
|
32 |
if args.link is not None and args.local_path is None:
|
33 |
# Download audio from YouTube
|
34 |
video_link = args.link
|
35 |
+
video = None
|
36 |
+
audio = None
|
37 |
try:
|
38 |
video = YouTube(video_link)
|
39 |
audio = video.streams.filter(only_audio=True, file_extension='mp4').first()
|
40 |
+
if audio:
|
41 |
+
audio.download(DOWNLOAD_PATH)
|
42 |
+
print('Download Completed!')
|
43 |
+
else:
|
44 |
+
print("Error: Audio stream not found")
|
45 |
except Exception as e:
|
46 |
print("Connection Error")
|
47 |
print(e)
|
|
|
87 |
print('ASS subtitle saved as: ' + assSub_en)
|
88 |
|
89 |
# Split the video script by sentences and create chunks within the token limit
|
90 |
+
n_threshold = 1500 # Token limit for the GPT-3 model
|
91 |
script_split = script_input.split('.')
|
92 |
|
93 |
script_arr = []
|
|
|
112 |
{"role": "system", "content": "You are a helpful assistant that translates English to Chinese and have decent background in starcraft2."},
|
113 |
{"role": "user", "content": 'Translate the following English text to Chinese: "{}"'.format(s)}
|
114 |
],
|
115 |
+
temperature=0.15
|
116 |
)
|
117 |
with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.srt", 'a+') as f:
|
118 |
f.write(response['choices'][0]['message']['content'].strip())
|
|
|
134 |
f.write(response['choices'][0]['text'].strip())
|
135 |
|
136 |
assSub_zh = srt2ass(f"{RESULT_PATH}/{VIDEO_NAME}_zh.srt", "default", "No", "Modest")
|
137 |
+
print('ASS subtitle saved as: ' + assSub_zh)
|