File size: 1,041 Bytes
b7b243c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os

from langchain.document_loaders.parsers.audio import (
    OpenAIWhisperParser,
    OpenAIWhisperParserLocal,
)
from langchain_community.document_loaders.blob_loaders.youtube_audio import (
    YoutubeAudioLoader,
)
from langchain_community.document_loaders.generic import GenericLoader


def youtube_transcriber(youtube_video_link, local=True):
    urls = [youtube_video_link]

    save_dir = os.path.expanduser("~/Downloads/YouTube")
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    if local:
        loader = GenericLoader(
            YoutubeAudioLoader(urls, save_dir), OpenAIWhisperParserLocal()
        )
    else:
        loader = GenericLoader(
            YoutubeAudioLoader(urls, save_dir), OpenAIWhisperParser()
        )

    docs = loader.load()

    for file_name in os.listdir(save_dir):
        file_path = os.path.join(save_dir, file_name)
        if os.path.isfile(file_path):
            os.remove(file_path)

    if not os.listdir(save_dir):
        os.rmdir(save_dir)

    return docs