Spaces:

StarPigeon
/

ViDove

Sleeping

TheAnsIs42 commited on Apr 17, 2023

Commit

de46850

2 Parent(s): 045f1d7 ab2fa9d

Merge branch 'eason/main' of https://github.com/project-kxkg/project-t into eason/main

Files changed (1) hide show

pipeline.py CHANGED Viewed

@@ -98,21 +98,21 @@ def get_srt_class(srt_file_en, result_path, video_name, audio_path, audio_file =
         # using whisper to perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
         srt_file_en = "{}/{}/{}_en.srt".format(result_path, video_name, video_name)
         if not os.path.exists(srt_file_en):
             # use OpenAI API for transcribe
             if method == "api":
                 transcript = openai.Audio.transcribe("whisper-1", audio_file)
             # use local whisper model
             elif method == "basic":
-                model = whisper.load_model(whisper_model) # using base model in local machine (may use large model on our server)
                 transcript = model.transcribe(audio_path)
             # use stable-whisper
             elif method == "stable":
                 # use cuda if available
-                devices = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
                 model = stable_whisper.load_model(whisper_model, device = devices)
                 transcript = model.transcribe(audio_path, regroup = False, initial_prompt="Hello, welcome to my lecture. Are you good my friend?")
                 (

         # using whisper to perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
         srt_file_en = "{}/{}/{}_en.srt".format(result_path, video_name, video_name)
         if not os.path.exists(srt_file_en):
+            devices = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
             # use OpenAI API for transcribe
             if method == "api":
                 transcript = openai.Audio.transcribe("whisper-1", audio_file)
             # use local whisper model
             elif method == "basic":
+                model = whisper.load_model(whisper_model, device = devices) # using base model in local machine (may use large model on our server)
                 transcript = model.transcribe(audio_path)
             # use stable-whisper
             elif method == "stable":
                 # use cuda if available
                 model = stable_whisper.load_model(whisper_model, device = devices)
                 transcript = model.transcribe(audio_path, regroup = False, initial_prompt="Hello, welcome to my lecture. Are you good my friend?")
                 (