Spaces:
Sleeping
Sleeping
Eason Lu
commited on
Commit
·
4f95b2f
1
Parent(s):
4ef2b46
change file dirs settings/add -ass option
Browse filesFormer-commit-id: 52f749ea859ba9779df851eae14a00cfe295fbed
- pipeline.py +27 -14
pipeline.py
CHANGED
@@ -12,8 +12,9 @@ parser.add_argument("--local_path", help="local video path here", default=None,
|
|
12 |
parser.add_argument("--srt_file", help="srt file input path here", default=None, type=str, required=False) # New argument
|
13 |
parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
|
14 |
parser.add_argument("--output_dir", help="translate result path", default='./results', type=str, required=False)
|
15 |
-
parser.add_argument("--video_name", help="video name", default='placeholder', type=str, required=False)
|
16 |
parser.add_argument("--model_name", help="model name only support text-davinci-003 and gpt-3.5-turbo", type=str, required=False, default="gpt-3.5-turbo")
|
|
|
17 |
args = parser.parse_args()
|
18 |
|
19 |
# input should be either video file or youtube video link.
|
@@ -21,10 +22,18 @@ if args.link is None and args.local_path is None and args.srt_file is None:
|
|
21 |
print("need video source or srt file")
|
22 |
exit()
|
23 |
|
24 |
-
# set
|
25 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
26 |
DOWNLOAD_PATH = args.download
|
|
|
|
|
|
|
|
|
|
|
27 |
RESULT_PATH = args.output_dir
|
|
|
|
|
|
|
28 |
VIDEO_NAME = args.video_name
|
29 |
model_name = args.model_name
|
30 |
|
@@ -38,21 +47,24 @@ if args.link is not None and args.local_path is None:
|
|
38 |
video = YouTube(video_link)
|
39 |
audio = video.streams.filter(only_audio=True, file_extension='mp4').first()
|
40 |
if audio:
|
41 |
-
audio.download(DOWNLOAD_PATH)
|
42 |
print('Download Completed!')
|
43 |
else:
|
44 |
print("Error: Audio stream not found")
|
45 |
except Exception as e:
|
46 |
print("Connection Error")
|
47 |
print(e)
|
48 |
-
audio_path = '{}/{}'.format(DOWNLOAD_PATH, audio.default_filename)
|
49 |
audio_file = open(audio_path, "rb")
|
50 |
-
VIDEO_NAME
|
|
|
51 |
elif args.local_path is not None:
|
52 |
# Read from local
|
53 |
audio_file= open(args.local_path, "rb")
|
54 |
audio_path = args.local_path
|
55 |
|
|
|
|
|
56 |
|
57 |
# Instead of using the script_en variable directly, we'll use script_input
|
58 |
srt_file_en = args.srt_file
|
@@ -61,7 +73,7 @@ if srt_file_en is not None:
|
|
61 |
script_input = f.read()
|
62 |
else:
|
63 |
# using whisper to perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
|
64 |
-
srt_file_en = "{}/{}_en.srt".format(RESULT_PATH, VIDEO_NAME)
|
65 |
if not os.path.exists(srt_file_en):
|
66 |
# use OpenAI API for transcribe
|
67 |
# transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
@@ -82,9 +94,10 @@ else:
|
|
82 |
script_en = f.read()
|
83 |
script_input = script_en
|
84 |
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
88 |
|
89 |
# Split the video script by sentences and create chunks within the token limit
|
90 |
n_threshold = 1500 # Token limit for the GPT-3 model
|
@@ -114,7 +127,7 @@ for s in script_arr:
|
|
114 |
],
|
115 |
temperature=0.15
|
116 |
)
|
117 |
-
with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.srt", 'a+') as f:
|
118 |
f.write(response['choices'][0]['message']['content'].strip())
|
119 |
|
120 |
if model_name == "text-davinci-003":
|
@@ -130,8 +143,8 @@ for s in script_arr:
|
|
130 |
presence_penalty=0.0
|
131 |
)
|
132 |
|
133 |
-
with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.srt", 'a+') as f:
|
134 |
f.write(response['choices'][0]['text'].strip())
|
135 |
-
|
136 |
-
assSub_zh = srt2ass(f"{RESULT_PATH}/{VIDEO_NAME}_zh.srt", "default", "No", "Modest")
|
137 |
-
print('ASS subtitle saved as: ' + assSub_zh)
|
|
|
12 |
parser.add_argument("--srt_file", help="srt file input path here", default=None, type=str, required=False) # New argument
|
13 |
parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
|
14 |
parser.add_argument("--output_dir", help="translate result path", default='./results', type=str, required=False)
|
15 |
+
parser.add_argument("--video_name", help="video name, if use video link as input, the name will auto-filled by youtube video name", default='placeholder', type=str, required=False)
|
16 |
parser.add_argument("--model_name", help="model name only support text-davinci-003 and gpt-3.5-turbo", type=str, required=False, default="gpt-3.5-turbo")
|
17 |
+
parser.add_argument("-ass", help="set script output to .ass file", action='store_true')
|
18 |
args = parser.parse_args()
|
19 |
|
20 |
# input should be either video file or youtube video link.
|
|
|
22 |
print("need video source or srt file")
|
23 |
exit()
|
24 |
|
25 |
+
# set up
|
26 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
27 |
DOWNLOAD_PATH = args.download
|
28 |
+
if not os.path.exists(DOWNLOAD_PATH):
|
29 |
+
os.mkdir(DOWNLOAD_PATH)
|
30 |
+
os.mkdir(f'{DOWNLOAD_PATH}/audio')
|
31 |
+
os.mkdir(f'{DOWNLOAD_PATH}/video')
|
32 |
+
|
33 |
RESULT_PATH = args.output_dir
|
34 |
+
if not os.path.exists(RESULT_PATH):
|
35 |
+
os.mkdir(RESULT_PATH)
|
36 |
+
|
37 |
VIDEO_NAME = args.video_name
|
38 |
model_name = args.model_name
|
39 |
|
|
|
47 |
video = YouTube(video_link)
|
48 |
audio = video.streams.filter(only_audio=True, file_extension='mp4').first()
|
49 |
if audio:
|
50 |
+
audio.download(f'{DOWNLOAD_PATH}/audio')
|
51 |
print('Download Completed!')
|
52 |
else:
|
53 |
print("Error: Audio stream not found")
|
54 |
except Exception as e:
|
55 |
print("Connection Error")
|
56 |
print(e)
|
57 |
+
audio_path = '{}/audio/{}'.format(DOWNLOAD_PATH, audio.default_filename)
|
58 |
audio_file = open(audio_path, "rb")
|
59 |
+
if VIDEO_NAME == 'placeholder':
|
60 |
+
VIDEO_NAME = audio.default_filename.split('.')[0]
|
61 |
elif args.local_path is not None:
|
62 |
# Read from local
|
63 |
audio_file= open(args.local_path, "rb")
|
64 |
audio_path = args.local_path
|
65 |
|
66 |
+
if not os.path.exists(f'{RESULT_PATH}/{VIDEO_NAME}'):
|
67 |
+
os.mkdir(f'{RESULT_PATH}/{VIDEO_NAME}')
|
68 |
|
69 |
# Instead of using the script_en variable directly, we'll use script_input
|
70 |
srt_file_en = args.srt_file
|
|
|
73 |
script_input = f.read()
|
74 |
else:
|
75 |
# using whisper to perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
|
76 |
+
srt_file_en = "{}/{}/{}_en.srt".format(RESULT_PATH, VIDEO_NAME, VIDEO_NAME)
|
77 |
if not os.path.exists(srt_file_en):
|
78 |
# use OpenAI API for transcribe
|
79 |
# transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
|
|
94 |
script_en = f.read()
|
95 |
script_input = script_en
|
96 |
|
97 |
+
if args.ass:
|
98 |
+
from srt2ass import srt2ass
|
99 |
+
assSub_en = srt2ass(srt_file_en, "default", "No", "Modest")
|
100 |
+
print('ASS subtitle saved as: ' + assSub_en)
|
101 |
|
102 |
# Split the video script by sentences and create chunks within the token limit
|
103 |
n_threshold = 1500 # Token limit for the GPT-3 model
|
|
|
127 |
],
|
128 |
temperature=0.15
|
129 |
)
|
130 |
+
with open(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt", 'a+') as f:
|
131 |
f.write(response['choices'][0]['message']['content'].strip())
|
132 |
|
133 |
if model_name == "text-davinci-003":
|
|
|
143 |
presence_penalty=0.0
|
144 |
)
|
145 |
|
146 |
+
with open(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt", 'a+') as f:
|
147 |
f.write(response['choices'][0]['text'].strip())
|
148 |
+
if args.ass:
|
149 |
+
assSub_zh = srt2ass(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt", "default", "No", "Modest")
|
150 |
+
print('ASS subtitle saved as: ' + assSub_zh)
|