File size: 2,622 Bytes

0b43703

from pytube import YouTube
import whisper
import os
import subprocess
from openai import OpenAI
import ssl


def download_youtube_audio(url, destination="."):
    # Create a YouTube object
    yt = YouTube(url)

    ssl._create_default_https_context = ssl._create_unverified_context
    # Select the audio stream
    audio_stream = yt.streams.filter(only_audio=True).first()

    # Download the audio stream
    out_file = audio_stream.download(output_path=destination)

    # Set up new filename
    base, ext = os.path.splitext(out_file)
    audio_file = base + '.mp3'

    # Convert file to mp3
    subprocess.run(['ffmpeg', '-i', out_file, audio_file])

    # Remove the original file
    os.remove(out_file)

    print(f"Downloaded and converted to MP3: {audio_file}")
    return audio_file


def transcribe_audio(audio_file):
    model = whisper.load_model("base")
    result = model.transcribe(audio_file)
    return result["text"]


def write_text_to_file(text, filename="transcribed_text.txt"):
    # Write the text to the file
    with open(filename, "w") as file:
        file.write(text)


def delete_file(file_path):
    os.remove(file_path)


def process(url):
    # Set the destination path for the download
    file_path = download_youtube_audio(url)

    prompt = transcribe_audio(file_path)
    delete_file(file_path)
    result_summary = summarize_text(prompt)

    return result_summary


def summarize_text(prompt):
    pre_prompt = 'You are a model that receives a transcription of a YouTube video. Your task is to correct any words ' \
                 'that ' \
                 'may be incorrect based on the context, and transform it into a well-structured summary of the entire ' \
                 'video. Your summary should highlight important details and provide additional context when ' \
                 'necessary. ' \
                 'Aim to be detailed, particularly when addressing non-trivial aspects of the content. The summary ' \
                 'should ' \
                 'encompass at least 20-30% of the original text length.'
    client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

    response = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {"role": "system", "content": pre_prompt},
            {"role": "user", "content": prompt},
        ]
    )

    # The 'response' will contain the completion from the model
    summary_result = response.choices[0].message.content
    return summary_result


#def main():
#    print(process("https://www.youtube.com/watch?v=reUZRyXxUs4"))


#if __name__ == "__main__":
#    main()