Eason Lu commited on
Commit
54d9321
·
2 Parent(s): 04ae3b4 147a645

solve conflict

Browse files

Former-commit-id: e28f8408800aef82cd6bb3e5d4e980e96f286297

Files changed (2) hide show
  1. README.md +8 -2
  2. pipeline.py +11 -8
README.md CHANGED
@@ -8,11 +8,11 @@ pip install -r requirement.txt
8
 
9
  ## Usage
10
  ```
11
- usage: pipeline.py [-h] [--link LINK] [--local_path LOCAL_PATH] [--download DOWNLOAD] [--result RESULT] [--video_name VIDEO_NAME]
12
 
13
  quick start:
14
 
15
- example online: python3 pipeline.py --link https://www.youtube.com/watch?v=XbgFIkhMM3s --download ./downloads --result ./results --video_name uncle_roger_test
16
 
17
  example offline: python3 pipeline.py --local_path test_translation.m4a --result ./results --video_name test_translation
18
 
@@ -27,4 +27,10 @@ options:
27
  --result RESULT translate result path
28
  --video_name VIDEO_NAME
29
  video name
 
 
 
 
 
 
30
  ```
 
8
 
9
  ## Usage
10
  ```
11
+ usage: pipeline.py [-h] [--link LINK] [--local_path LOCAL_PATH] [--download DOWNLOAD] [--result RESULT] [--video_name VIDEO_NAME] [--model_name]
12
 
13
  quick start:
14
 
15
+ example online: python3 pipeline.py --link https://www.youtube.com/watch?v=61c4dn6851g --download ./downloads --result ./results --video_name SO_I_CHOSE_RANDOM
16
 
17
  example offline: python3 pipeline.py --local_path test_translation.m4a --result ./results --video_name test_translation
18
 
 
27
  --result RESULT translate result path
28
  --video_name VIDEO_NAME
29
  video name
30
+ --model_name MODEL_NAME
31
+ model name
32
+
33
+ if you cannot download youtube video, please follow the link below.
34
+ https://github.com/pytube/pytube/issues/1498
35
+
36
  ```
pipeline.py CHANGED
@@ -21,24 +21,27 @@ if args.link is None and args.local_path is None and args.srt_file is None:
21
  print("need video source or srt file")
22
  exit()
23
 
 
24
  openai.api_key = os.getenv("OPENAI_API_KEY")
25
  DOWNLOAD_PATH = args.download
26
  RESULT_PATH = args.output_dir
27
  VIDEO_NAME = args.video_name
28
- n_threshold = 1000 # Token limit for the GPT-3.5 model
29
- # model_name = "text-davinci-003" # replace this to our own fintune model
30
  model_name = args.model_name
31
- # model_name = "davinci"
32
 
33
  # get source audio
34
  if args.link is not None and args.local_path is None:
35
  # Download audio from YouTube
36
  video_link = args.link
 
 
37
  try:
38
  video = YouTube(video_link)
39
  audio = video.streams.filter(only_audio=True, file_extension='mp4').first()
40
- audio.download(DOWNLOAD_PATH)
41
- print('Download Completed!')
 
 
 
42
  except Exception as e:
43
  print("Connection Error")
44
  print(e)
@@ -84,7 +87,7 @@ assSub_en = srt2ass(srt_file_en, "default", "No", "Modest")
84
  print('ASS subtitle saved as: ' + assSub_en)
85
 
86
  # Split the video script by sentences and create chunks within the token limit
87
- n_threshold = 4096 # Token limit for the GPT-3 model
88
  script_split = script_input.split('.')
89
 
90
  script_arr = []
@@ -109,7 +112,7 @@ for s in script_arr:
109
  {"role": "system", "content": "You are a helpful assistant that translates English to Chinese and have decent background in starcraft2."},
110
  {"role": "user", "content": 'Translate the following English text to Chinese: "{}"'.format(s)}
111
  ],
112
- temperature=0.1
113
  )
114
  with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.srt", 'a+') as f:
115
  f.write(response['choices'][0]['message']['content'].strip())
@@ -131,4 +134,4 @@ for s in script_arr:
131
  f.write(response['choices'][0]['text'].strip())
132
 
133
  assSub_zh = srt2ass(f"{RESULT_PATH}/{VIDEO_NAME}_zh.srt", "default", "No", "Modest")
134
- print('ASS subtitle saved as: ' + assSub_zh)
 
21
  print("need video source or srt file")
22
  exit()
23
 
24
+ # set openai api key
25
  openai.api_key = os.getenv("OPENAI_API_KEY")
26
  DOWNLOAD_PATH = args.download
27
  RESULT_PATH = args.output_dir
28
  VIDEO_NAME = args.video_name
 
 
29
  model_name = args.model_name
 
30
 
31
  # get source audio
32
  if args.link is not None and args.local_path is None:
33
  # Download audio from YouTube
34
  video_link = args.link
35
+ video = None
36
+ audio = None
37
  try:
38
  video = YouTube(video_link)
39
  audio = video.streams.filter(only_audio=True, file_extension='mp4').first()
40
+ if audio:
41
+ audio.download(DOWNLOAD_PATH)
42
+ print('Download Completed!')
43
+ else:
44
+ print("Error: Audio stream not found")
45
  except Exception as e:
46
  print("Connection Error")
47
  print(e)
 
87
  print('ASS subtitle saved as: ' + assSub_en)
88
 
89
  # Split the video script by sentences and create chunks within the token limit
90
+ n_threshold = 1500 # Token limit for the GPT-3 model
91
  script_split = script_input.split('.')
92
 
93
  script_arr = []
 
112
  {"role": "system", "content": "You are a helpful assistant that translates English to Chinese and have decent background in starcraft2."},
113
  {"role": "user", "content": 'Translate the following English text to Chinese: "{}"'.format(s)}
114
  ],
115
+ temperature=0.15
116
  )
117
  with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.srt", 'a+') as f:
118
  f.write(response['choices'][0]['message']['content'].strip())
 
134
  f.write(response['choices'][0]['text'].strip())
135
 
136
  assSub_zh = srt2ass(f"{RESULT_PATH}/{VIDEO_NAME}_zh.srt", "default", "No", "Modest")
137
+ print('ASS subtitle saved as: ' + assSub_zh)