Spaces:

StarPigeon
/

ViDove

Sleeping

App Files Files Community

Eason Lu commited on Mar 20, 2023

Commit

4f95b2f

1 Parent(s): 4ef2b46

change file dirs settings/add -ass option

Browse files

Former-commit-id: 52f749ea859ba9779df851eae14a00cfe295fbed

Files changed (1) hide show

pipeline.py +27 -14

pipeline.py CHANGED Viewed

@@ -12,8 +12,9 @@ parser.add_argument("--local_path", help="local video path here", default=None,
 parser.add_argument("--srt_file", help="srt file input path here", default=None, type=str, required=False)  # New argument
 parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
 parser.add_argument("--output_dir", help="translate result path", default='./results', type=str, required=False)
-parser.add_argument("--video_name", help="video name", default='placeholder', type=str, required=False)
 parser.add_argument("--model_name", help="model name only support text-davinci-003 and gpt-3.5-turbo", type=str, required=False, default="gpt-3.5-turbo")
 args = parser.parse_args()
 # input should be either video file or youtube video link.
@@ -21,10 +22,18 @@ if args.link is None and args.local_path is None and args.srt_file is None:
     print("need video source or srt file")
     exit()
-# set openai api key
 openai.api_key = os.getenv("OPENAI_API_KEY")
 DOWNLOAD_PATH = args.download
 RESULT_PATH = args.output_dir
 VIDEO_NAME = args.video_name
 model_name = args.model_name
@@ -38,21 +47,24 @@ if args.link is not None and args.local_path is None:
         video = YouTube(video_link)
         audio = video.streams.filter(only_audio=True, file_extension='mp4').first()
         if audio:
-            audio.download(DOWNLOAD_PATH)
             print('Download Completed!')
         else:
             print("Error: Audio stream not found")
     except Exception as e:
         print("Connection Error")
         print(e)
-    audio_path = '{}/{}'.format(DOWNLOAD_PATH, audio.default_filename)
     audio_file = open(audio_path, "rb")
-    VIDEO_NAME = audio.default_filename.split('.')[0]
 elif args.local_path is not None:
     # Read from local
     audio_file= open(args.local_path, "rb")
     audio_path = args.local_path
 # Instead of using the script_en variable directly, we'll use script_input
 srt_file_en = args.srt_file
@@ -61,7 +73,7 @@ if srt_file_en is not None:
         script_input = f.read()
 else:
     # using whisper to perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
-    srt_file_en = "{}/{}_en.srt".format(RESULT_PATH, VIDEO_NAME)
     if not os.path.exists(srt_file_en):
         # use OpenAI API for transcribe
         # transcript = openai.Audio.transcribe("whisper-1", audio_file)
@@ -82,9 +94,10 @@ else:
         script_en = f.read()
         script_input = script_en
-from srt2ass import srt2ass
-assSub_en = srt2ass(srt_file_en, "default", "No", "Modest")
-print('ASS subtitle saved as: ' + assSub_en)
 # Split the video script by sentences and create chunks within the token limit
 n_threshold = 1500  # Token limit for the GPT-3 model
@@ -114,7 +127,7 @@ for s in script_arr:
             ],
             temperature=0.15
         )
-        with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.srt", 'a+') as f:
             f.write(response['choices'][0]['message']['content'].strip())
     if model_name == "text-davinci-003":
@@ -130,8 +143,8 @@ for s in script_arr:
             presence_penalty=0.0
         )
-        with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.srt", 'a+') as f:
             f.write(response['choices'][0]['text'].strip())
-assSub_zh = srt2ass(f"{RESULT_PATH}/{VIDEO_NAME}_zh.srt", "default", "No", "Modest")
-print('ASS subtitle saved as: ' + assSub_zh)

 parser.add_argument("--srt_file", help="srt file input path here", default=None, type=str, required=False)  # New argument
 parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
 parser.add_argument("--output_dir", help="translate result path", default='./results', type=str, required=False)
+parser.add_argument("--video_name", help="video name, if use video link as input, the name will auto-filled by youtube video name", default='placeholder', type=str, required=False)
 parser.add_argument("--model_name", help="model name only support text-davinci-003 and gpt-3.5-turbo", type=str, required=False, default="gpt-3.5-turbo")
+parser.add_argument("-ass", help="set script output to .ass file", action='store_true')
 args = parser.parse_args()
 # input should be either video file or youtube video link.
     print("need video source or srt file")
     exit()
+# set up
 openai.api_key = os.getenv("OPENAI_API_KEY")
 DOWNLOAD_PATH = args.download
+if not os.path.exists(DOWNLOAD_PATH):
+    os.mkdir(DOWNLOAD_PATH)
+    os.mkdir(f'{DOWNLOAD_PATH}/audio')
+    os.mkdir(f'{DOWNLOAD_PATH}/video')
 RESULT_PATH = args.output_dir
+if not os.path.exists(RESULT_PATH):
+    os.mkdir(RESULT_PATH)
 VIDEO_NAME = args.video_name
 model_name = args.model_name
         video = YouTube(video_link)
         audio = video.streams.filter(only_audio=True, file_extension='mp4').first()
         if audio:
+            audio.download(f'{DOWNLOAD_PATH}/audio')
             print('Download Completed!')
         else:
             print("Error: Audio stream not found")
     except Exception as e:
         print("Connection Error")
         print(e)
+    audio_path = '{}/audio/{}'.format(DOWNLOAD_PATH, audio.default_filename)
     audio_file = open(audio_path, "rb")
+    if VIDEO_NAME == 'placeholder':
+        VIDEO_NAME = audio.default_filename.split('.')[0]
 elif args.local_path is not None:
     # Read from local
     audio_file= open(args.local_path, "rb")
     audio_path = args.local_path
+if not os.path.exists(f'{RESULT_PATH}/{VIDEO_NAME}'):
+    os.mkdir(f'{RESULT_PATH}/{VIDEO_NAME}')
 # Instead of using the script_en variable directly, we'll use script_input
 srt_file_en = args.srt_file
         script_input = f.read()
 else:
     # using whisper to perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
+    srt_file_en = "{}/{}/{}_en.srt".format(RESULT_PATH, VIDEO_NAME, VIDEO_NAME)
     if not os.path.exists(srt_file_en):
         # use OpenAI API for transcribe
         # transcript = openai.Audio.transcribe("whisper-1", audio_file)
         script_en = f.read()
         script_input = script_en
+if args.ass:
+    from srt2ass import srt2ass
+    assSub_en = srt2ass(srt_file_en, "default", "No", "Modest")
+    print('ASS subtitle saved as: ' + assSub_en)
 # Split the video script by sentences and create chunks within the token limit
 n_threshold = 1500  # Token limit for the GPT-3 model
             ],
             temperature=0.15
         )
+        with open(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt", 'a+') as f:
             f.write(response['choices'][0]['message']['content'].strip())
     if model_name == "text-davinci-003":
             presence_penalty=0.0
         )
+        with open(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt", 'a+') as f:
             f.write(response['choices'][0]['text'].strip())
+if args.ass:
+    assSub_zh = srt2ass(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt", "default", "No", "Modest")
+    print('ASS subtitle saved as: ' + assSub_zh)