JiaenLiu commited on
Commit
66791b6
·
1 Parent(s): 7a7c7ac

small_fix increase token size

Browse files

Former-commit-id: 98d31138827b8cd27577205c3c21155a6f686772

Files changed (3) hide show
  1. README.md +6 -0
  2. pipeline.py +28 -20
  3. requirement.txt +0 -1
README.md CHANGED
@@ -10,6 +10,12 @@ pip install -r requirement.txt
10
  ```
11
  usage: pipeline.py [-h] [--link LINK] [--local_path LOCAL_PATH] [--download DOWNLOAD] [--result RESULT] [--video_name VIDEO_NAME]
12
 
 
 
 
 
 
 
13
  options:
14
  -h, --help show this help message and exit
15
  --link LINK youtube video link here
 
10
  ```
11
  usage: pipeline.py [-h] [--link LINK] [--local_path LOCAL_PATH] [--download DOWNLOAD] [--result RESULT] [--video_name VIDEO_NAME]
12
 
13
+ quick start:
14
+
15
+ example online: python3 pipeline.py --link https://www.youtube.com/watch?v=XbgFIkhMM3s --download ./downloads --result ./results --video_name uncle_roger_test
16
+
17
+ example offline: python3 pipeline.py --local_path test_translation.m4a --result ./results --video_name test_translation
18
+
19
  options:
20
  -h, --help show this help message and exit
21
  --link LINK youtube video link here
pipeline.py CHANGED
@@ -16,8 +16,8 @@ if args.link is None and args.local_path is None:
16
  print("need video source")
17
  exit()
18
 
19
- # openai.api_key = "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"
20
- openai.api_key = os.getenv("OPENAI_API_KEY")
21
 
22
  DOWNLOAD_PATH = args.download
23
  RESULT_PATH = args.result
@@ -25,6 +25,7 @@ VIDEO_NAME = args.video_name
25
  n_threshold = 5000
26
  model_name = "text-davinci-003" # replace this to our own fintune model
27
 
 
28
  # get source audio
29
  if args.link is not None:
30
  # Download audio from YouTube
@@ -34,8 +35,9 @@ if args.link is not None:
34
  audio = video.streams.filter(only_audio=True, file_extension='mp4').first()
35
  audio.download(DOWNLOAD_PATH)
36
  print('Download Completed!')
37
- except:
38
- print("Connection Error")
 
39
  audio_file = open('{}/{}'.format(DOWNLOAD_PATH, audio.default_filename), "rb")
40
  VIDEO_NAME = audio.default_filename.split('.')[0]
41
  else:
@@ -54,28 +56,34 @@ with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'r') as f:
54
  N = len(script_en)
55
  script_split = script_en.split('.')
56
 
 
 
 
 
57
  script_arr = []
58
  script = ""
59
  for sentence in script_split:
60
- if len(script) <= n_threshold:
61
- n = len(sentence)
62
- script+=sentence
63
  else:
64
- script_arr.append(script)
65
- script = ""
66
- script_arr.append(script)
 
67
 
68
- # translate and save
69
  for s in script_arr:
 
70
  response = openai.Completion.create(
71
- model=model_name,
72
- prompt="Please healp me translate this into Chinese:\n\n{}\n\n".format(s),
73
- temperature=0.1,
74
- max_tokens=2000,
75
- top_p=1.0,
76
- frequency_penalty=0.0,
77
- presence_penalty=0.0
78
  )
79
 
80
- with open("{}/{}_zh.txt".format(RESULT_PATH, VIDEO_NAME), 'a+') as f:
81
- f.write(response['choices'][0]['text'])
 
 
16
  print("need video source")
17
  exit()
18
 
19
+ openai.api_key = "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"
20
+ # openai.api_key = os.getenv("OPENAI_API_KEY")
21
 
22
  DOWNLOAD_PATH = args.download
23
  RESULT_PATH = args.result
 
25
  n_threshold = 5000
26
  model_name = "text-davinci-003" # replace this to our own fintune model
27
 
28
+
29
  # get source audio
30
  if args.link is not None:
31
  # Download audio from YouTube
 
35
  audio = video.streams.filter(only_audio=True, file_extension='mp4').first()
36
  audio.download(DOWNLOAD_PATH)
37
  print('Download Completed!')
38
+ except Exception as e:
39
+ print("Connection Error")
40
+ print(e)
41
  audio_file = open('{}/{}'.format(DOWNLOAD_PATH, audio.default_filename), "rb")
42
  VIDEO_NAME = audio.default_filename.split('.')[0]
43
  else:
 
56
  N = len(script_en)
57
  script_split = script_en.split('.')
58
 
59
+ # Split the video script by sentences and create chunks within the token limit
60
+ n_threshold = 4096 # Token limit for the GPT-3 model
61
+ script_split = script_en.split('.')
62
+
63
  script_arr = []
64
  script = ""
65
  for sentence in script_split:
66
+ if len(script) + len(sentence) + 1 <= n_threshold:
67
+ script += sentence + '.'
 
68
  else:
69
+ script_arr.append(script.strip())
70
+ script = sentence + '.'
71
+ if script.strip():
72
+ script_arr.append(script.strip())
73
 
74
+ # Translate and save
75
  for s in script_arr:
76
+ prompt = f"Please help me translate this into Chinese:\n\n{s}\n\n"
77
  response = openai.Completion.create(
78
+ model=model_name,
79
+ prompt=prompt,
80
+ temperature=0.1,
81
+ max_tokens=2000,
82
+ top_p=1.0,
83
+ frequency_penalty=0.0,
84
+ presence_penalty=0.0
85
  )
86
 
87
+ with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.txt", 'a+') as f:
88
+ f.write(response['choices'][0]['text'].strip())
89
+ f.write('\n')
requirement.txt CHANGED
@@ -13,7 +13,6 @@ panda==0.3.1
13
  pandas==1.5.3
14
  python-dateutil==2.8.2
15
  pytube==12.1.2
16
- pytube3==9.6.4
17
  pytz==2022.7.1
18
  requests==2.28.2
19
  six==1.16.0
 
13
  pandas==1.5.3
14
  python-dateutil==2.8.2
15
  pytube==12.1.2
 
16
  pytz==2022.7.1
17
  requests==2.28.2
18
  six==1.16.0