Spaces:

StarPigeon
/

ViDove

Sleeping

File size: 2,824 Bytes

63ecb0d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a30dacb
63ecb0d
 
 
 
 
 
1b7b90e
63ecb0d

import openai
from pytube import YouTube
import argparse
import os
import io

parser = argparse.ArgumentParser()
parser.add_argument("--link", help="youtube video link here", default=None, type=str, required=False)
parser.add_argument("--local_path", help="local video path here", default=None, type=str, required=False)
parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
parser.add_argument("--result", help="translate result path", default='./results', type=str, required=False)
parser.add_argument("--video_name", help="video name", default='placeholder', type=str, required=False)
args = parser.parse_args()

if args.link is None and args.local_path is None:
    print("need video source")
    exit()

# openai.api_key = "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"  
openai.api_key = os.getenv("OPENAI_API_KEY")

DOWNLOAD_PATH = args.download
RESULT_PATH = args.result
VIDEO_NAME = args.video_name
n_threshold = 5000
model_name = "text-davinci-003" # replace this to our own fintune model

# get source audio
if args.link is not None:
    # Download audio from YouTube
    video_link = args.link
    try:
        video = YouTube(video_link)
        audio = video.streams.filter(only_audio=True, file_extension='mp4').first()
        audio.download(DOWNLOAD_PATH)
        print('Download Completed!')
    except:
        print("Connection Error") 
    audio_file = open('{}/{}'.format(DOWNLOAD_PATH, audio.default_filename), "rb")
    VIDEO_NAME = audio.default_filename.split('.')[0]
else:
    # Read from local
    audio_file= open(args.local_path, "rb")

# perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
if not os.path.exists("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME)):
    transcript = openai.Audio.transcribe("whisper-1", audio_file)
    with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'w') as f:
        f.write(transcript['text'])

# split the video script(open ai prompt limit: about 5000)
with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'r') as f:
    script_en = f.read()
    N = len(script_en)
    script_split = script_en.split('.')

script_arr = []
script = ""
for sentence in script_split:
    if len(script) <= n_threshold:
        n = len(sentence)
        script+=sentence
    else:
        script_arr.append(script)
        script = ""
script_arr.append(script)

# translate and save
for s in script_arr:
    response = openai.Completion.create(
    model=model_name,
    prompt="Please healp me translate this into Chinese:\n\n{}\n\n".format(s),
    temperature=0.1,
    max_tokens=2000,
    top_p=1.0,
    frequency_penalty=0.0,
    presence_penalty=0.0
    )

    with open("{}/{}_zh.txt".format(RESULT_PATH, VIDEO_NAME), 'a+') as f:
        f.write(response['choices'][0]['text'])