Spaces:
Runtime error
Runtime error
File size: 4,826 Bytes
54af26b 926eaab 54af26b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import warnings
warnings.filterwarnings("ignore")
import os, requests, openai, cohere
import gradio as gr
from pathlib import Path
from langchain.document_loaders import YoutubeLoader
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import CohereEmbeddings
from langchain.vectorstores import Qdrant
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.chains.summarize import load_summarize_chain
COHERE_API_KEY = os.environ["COHERE_API_KEY"]
QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]
QDRANT_CLUSTER_URL = os.environ["QDRANT_CLUSTER_URL"]
QDRANT_COLLECTION_NAME = os.environ["QDRANT_COLLECTION_NAME"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
prompt_file = "prompt_template.txt"
def yt_loader(yt_url):
res = requests.get(f"https://www.youtube.com/oembed?url={yt_url}")
if res.status_code != 200:
yield "Invalid Youtube URL. Kindly, paste here a valid Youtube URL."
return
yield "Extracting transcript from youtube url..."
loader = YoutubeLoader.from_youtube_url(yt_url, add_video_info=True)
transcript = loader.load()
video_id = transcript[0].metadata["source"]
title = transcript[0].metadata["title"]
author = transcript[0].metadata["author"]
docs = []
for i in range(len(transcript)):
doc = Document(page_content=transcript[i].page_content)
docs.append(doc)
yield "Splitting transcript into chunks of text..."
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
model_name="gpt-3.5-turbo",
chunk_size=1024,
chunk_overlap=64,
separators=["\n\n", "\n", " "],
)
docs_splitter = text_splitter.split_documents(docs)
cohere_embeddings = CohereEmbeddings(model="large", cohere_api_key=COHERE_API_KEY)
yield "Uploading chunks of text into Qdrant..."
qdrant = Qdrant.from_documents(
docs_splitter,
cohere_embeddings,
url=QDRANT_CLUSTER_URL,
prefer_grpc=True,
api_key=QDRANT_API_KEY,
collection_name=QDRANT_COLLECTION_NAME,
)
with open(prompt_file, "r") as file:
prompt_template = file.read()
PROMPT = PromptTemplate(
template=prompt_template, input_variables=["question", "context"]
)
llm = ChatOpenAI(
model_name="gpt-3.5-turbo", temperature=0, openai_api_key=OPENAI_API_KEY
)
global qa
qa = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=qdrant.as_retriever(),
chain_type_kwargs={"prompt": PROMPT},
)
yield "Generating summarized text from transcript..."
chain = load_summarize_chain(llm=llm, chain_type="map_reduce")
summarized_text = chain.run(docs_splitter)
res = (
"Video ID: "
+ video_id
+ "\n"
+ "Video Title: "
+ title
+ "\n"
+ "Channel Name: "
+ author
+ "\n"
+ "Summarized Text: "
+ summarized_text
)
yield res
def chat(chat_history, query):
res = qa.run(query)
progressive_response = ""
for ele in "".join(res):
progressive_response += ele + ""
yield chat_history + [(query, progressive_response)]
with gr.Blocks() as demo:
gr.HTML("""<h1>Welcome to AI Youtube Assistant</h1>""")
gr.Markdown(
"Generate transcript from youtube url. Get a summarized text of the video transcript and also ask questions to AI Youtube Assistant.<br>"
"Click on 'Build AI Bot' to extract transcript from youtube url and get a summarized text.<br>"
"After summarized text is generated, click on 'AI Assistant' tab and ask queries to the AI Assistant regarding information in the youtube video."
)
with gr.Tab("Load/Summarize Youtube Video"):
text_input = gr.Textbox(
label="Paste a valid youtube url",
placeholder="https://www.youtube.com/watch?v=AeJ9q45PfD0",
)
text_output = gr.Textbox(label="Summarized transcript of the youtube video")
text_button = gr.Button(value="Build AI Bot!")
text_button.click(yt_loader, text_input, text_output)
with gr.Tab("AI Assistant"):
chatbot = gr.Chatbot()
query = gr.Textbox(
label="Type your query here, then press 'enter' and scroll up for response"
)
chat_button = gr.Button(value="Submit Query!")
clear = gr.Button(value="Clear Chat History!")
# clear.style(size="sm")
query.submit(chat, [chatbot, query], chatbot)
chat_button.click(chat, [chatbot, query], chatbot)
clear.click(lambda: None, None, chatbot, queue=False)
demo.queue().launch()
|