Chat_with_youtube / src /youtube_audio_loader.py
Boltuzamaki's picture
init
b7b243c
import os
from langchain.document_loaders.parsers.audio import (
OpenAIWhisperParser,
OpenAIWhisperParserLocal,
)
from langchain_community.document_loaders.blob_loaders.youtube_audio import (
YoutubeAudioLoader,
)
from langchain_community.document_loaders.generic import GenericLoader
def youtube_transcriber(youtube_video_link, local=True):
urls = [youtube_video_link]
save_dir = os.path.expanduser("~/Downloads/YouTube")
if not os.path.exists(save_dir):
os.makedirs(save_dir)
if local:
loader = GenericLoader(
YoutubeAudioLoader(urls, save_dir), OpenAIWhisperParserLocal()
)
else:
loader = GenericLoader(
YoutubeAudioLoader(urls, save_dir), OpenAIWhisperParser()
)
docs = loader.load()
for file_name in os.listdir(save_dir):
file_path = os.path.join(save_dir, file_name)
if os.path.isfile(file_path):
os.remove(file_path)
if not os.listdir(save_dir):
os.rmdir(save_dir)
return docs