Spaces:
Running
Running
JiaenLiu
commited on
Commit
·
66791b6
1
Parent(s):
7a7c7ac
small_fix increase token size
Browse filesFormer-commit-id: 98d31138827b8cd27577205c3c21155a6f686772
- README.md +6 -0
- pipeline.py +28 -20
- requirement.txt +0 -1
README.md
CHANGED
@@ -10,6 +10,12 @@ pip install -r requirement.txt
|
|
10 |
```
|
11 |
usage: pipeline.py [-h] [--link LINK] [--local_path LOCAL_PATH] [--download DOWNLOAD] [--result RESULT] [--video_name VIDEO_NAME]
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
options:
|
14 |
-h, --help show this help message and exit
|
15 |
--link LINK youtube video link here
|
|
|
10 |
```
|
11 |
usage: pipeline.py [-h] [--link LINK] [--local_path LOCAL_PATH] [--download DOWNLOAD] [--result RESULT] [--video_name VIDEO_NAME]
|
12 |
|
13 |
+
quick start:
|
14 |
+
|
15 |
+
example online: python3 pipeline.py --link https://www.youtube.com/watch?v=XbgFIkhMM3s --download ./downloads --result ./results --video_name uncle_roger_test
|
16 |
+
|
17 |
+
example offline: python3 pipeline.py --local_path test_translation.m4a --result ./results --video_name test_translation
|
18 |
+
|
19 |
options:
|
20 |
-h, --help show this help message and exit
|
21 |
--link LINK youtube video link here
|
pipeline.py
CHANGED
@@ -16,8 +16,8 @@ if args.link is None and args.local_path is None:
|
|
16 |
print("need video source")
|
17 |
exit()
|
18 |
|
19 |
-
|
20 |
-
openai.api_key = os.getenv("OPENAI_API_KEY")
|
21 |
|
22 |
DOWNLOAD_PATH = args.download
|
23 |
RESULT_PATH = args.result
|
@@ -25,6 +25,7 @@ VIDEO_NAME = args.video_name
|
|
25 |
n_threshold = 5000
|
26 |
model_name = "text-davinci-003" # replace this to our own fintune model
|
27 |
|
|
|
28 |
# get source audio
|
29 |
if args.link is not None:
|
30 |
# Download audio from YouTube
|
@@ -34,8 +35,9 @@ if args.link is not None:
|
|
34 |
audio = video.streams.filter(only_audio=True, file_extension='mp4').first()
|
35 |
audio.download(DOWNLOAD_PATH)
|
36 |
print('Download Completed!')
|
37 |
-
except:
|
38 |
-
print("Connection Error")
|
|
|
39 |
audio_file = open('{}/{}'.format(DOWNLOAD_PATH, audio.default_filename), "rb")
|
40 |
VIDEO_NAME = audio.default_filename.split('.')[0]
|
41 |
else:
|
@@ -54,28 +56,34 @@ with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'r') as f:
|
|
54 |
N = len(script_en)
|
55 |
script_split = script_en.split('.')
|
56 |
|
|
|
|
|
|
|
|
|
57 |
script_arr = []
|
58 |
script = ""
|
59 |
for sentence in script_split:
|
60 |
-
if len(script) <= n_threshold:
|
61 |
-
|
62 |
-
script+=sentence
|
63 |
else:
|
64 |
-
script_arr.append(script)
|
65 |
-
script =
|
66 |
-
|
|
|
67 |
|
68 |
-
#
|
69 |
for s in script_arr:
|
|
|
70 |
response = openai.Completion.create(
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
)
|
79 |
|
80 |
-
with open("{}/{}_zh.txt"
|
81 |
-
f.write(response['choices'][0]['text'])
|
|
|
|
16 |
print("need video source")
|
17 |
exit()
|
18 |
|
19 |
+
openai.api_key = "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"
|
20 |
+
# openai.api_key = os.getenv("OPENAI_API_KEY")
|
21 |
|
22 |
DOWNLOAD_PATH = args.download
|
23 |
RESULT_PATH = args.result
|
|
|
25 |
n_threshold = 5000
|
26 |
model_name = "text-davinci-003" # replace this to our own fintune model
|
27 |
|
28 |
+
|
29 |
# get source audio
|
30 |
if args.link is not None:
|
31 |
# Download audio from YouTube
|
|
|
35 |
audio = video.streams.filter(only_audio=True, file_extension='mp4').first()
|
36 |
audio.download(DOWNLOAD_PATH)
|
37 |
print('Download Completed!')
|
38 |
+
except Exception as e:
|
39 |
+
print("Connection Error")
|
40 |
+
print(e)
|
41 |
audio_file = open('{}/{}'.format(DOWNLOAD_PATH, audio.default_filename), "rb")
|
42 |
VIDEO_NAME = audio.default_filename.split('.')[0]
|
43 |
else:
|
|
|
56 |
N = len(script_en)
|
57 |
script_split = script_en.split('.')
|
58 |
|
59 |
+
# Split the video script by sentences and create chunks within the token limit
|
60 |
+
n_threshold = 4096 # Token limit for the GPT-3 model
|
61 |
+
script_split = script_en.split('.')
|
62 |
+
|
63 |
script_arr = []
|
64 |
script = ""
|
65 |
for sentence in script_split:
|
66 |
+
if len(script) + len(sentence) + 1 <= n_threshold:
|
67 |
+
script += sentence + '.'
|
|
|
68 |
else:
|
69 |
+
script_arr.append(script.strip())
|
70 |
+
script = sentence + '.'
|
71 |
+
if script.strip():
|
72 |
+
script_arr.append(script.strip())
|
73 |
|
74 |
+
# Translate and save
|
75 |
for s in script_arr:
|
76 |
+
prompt = f"Please help me translate this into Chinese:\n\n{s}\n\n"
|
77 |
response = openai.Completion.create(
|
78 |
+
model=model_name,
|
79 |
+
prompt=prompt,
|
80 |
+
temperature=0.1,
|
81 |
+
max_tokens=2000,
|
82 |
+
top_p=1.0,
|
83 |
+
frequency_penalty=0.0,
|
84 |
+
presence_penalty=0.0
|
85 |
)
|
86 |
|
87 |
+
with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.txt", 'a+') as f:
|
88 |
+
f.write(response['choices'][0]['text'].strip())
|
89 |
+
f.write('\n')
|
requirement.txt
CHANGED
@@ -13,7 +13,6 @@ panda==0.3.1
|
|
13 |
pandas==1.5.3
|
14 |
python-dateutil==2.8.2
|
15 |
pytube==12.1.2
|
16 |
-
pytube3==9.6.4
|
17 |
pytz==2022.7.1
|
18 |
requests==2.28.2
|
19 |
six==1.16.0
|
|
|
13 |
pandas==1.5.3
|
14 |
python-dateutil==2.8.2
|
15 |
pytube==12.1.2
|
|
|
16 |
pytz==2022.7.1
|
17 |
requests==2.28.2
|
18 |
six==1.16.0
|