Update app.py
Browse files
app.py
CHANGED
@@ -1,39 +1,42 @@
|
|
1 |
import gradio as gr
|
2 |
import shutil, openai, os
|
3 |
|
|
|
|
|
|
|
4 |
from langchain.document_loaders.generic import GenericLoader
|
5 |
from langchain.document_loaders.parsers import OpenAIWhisperParser
|
6 |
-
from langchain.
|
|
|
|
|
|
|
7 |
|
8 |
from dotenv import load_dotenv, find_dotenv
|
9 |
_ = load_dotenv(find_dotenv())
|
10 |
|
11 |
#openai.api_key = os.environ["OPENAI_API_KEY"]
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
Question: {question}
|
18 |
-
Helpful Answer:"""
|
19 |
QA_CHAIN_PROMPT = PromptTemplate(input_variables = ["context", "question"], template = template)
|
20 |
-
###
|
21 |
|
22 |
def invoke(openai_api_key, youtube_url, prompt):
|
23 |
openai.api_key = openai_api_key
|
24 |
-
|
25 |
-
|
26 |
-
loader = GenericLoader(
|
27 |
-
YoutubeAudioLoader([url], save_dir),
|
28 |
-
OpenAIWhisperParser()
|
29 |
-
)
|
30 |
docs = loader.load()
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
37 |
|
38 |
description = """The app demonstrates how to use a <strong>Large Language Model</strong> (LLM) with <strong>Retrieval Augmented Generation</strong> (RAG) on external data.
|
39 |
Enter an OpenAI API key, YouTube URL (external data), and prompt to search the video, analyse its sentiment, summarize it, and/or translate it, etc.\n\n
|
|
|
1 |
import gradio as gr
|
2 |
import shutil, openai, os
|
3 |
|
4 |
+
from langchain.chains import RetrievalQA
|
5 |
+
from langchain.chat_models import ChatOpenAI
|
6 |
+
from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
|
7 |
from langchain.document_loaders.generic import GenericLoader
|
8 |
from langchain.document_loaders.parsers import OpenAIWhisperParser
|
9 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
10 |
+
from langchain.prompts import PromptTemplate
|
11 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
12 |
+
from langchain.vectorstores import Chroma
|
13 |
|
14 |
from dotenv import load_dotenv, find_dotenv
|
15 |
_ = load_dotenv(find_dotenv())
|
16 |
|
17 |
#openai.api_key = os.environ["OPENAI_API_KEY"]
|
18 |
|
19 |
+
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know,
|
20 |
+
don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "Thanks, Bernd Straehle 🚀"
|
21 |
+
at the end of the answer. {context} Question: {question} Helpful Answer: """
|
22 |
+
|
|
|
|
|
23 |
QA_CHAIN_PROMPT = PromptTemplate(input_variables = ["context", "question"], template = template)
|
|
|
24 |
|
25 |
def invoke(openai_api_key, youtube_url, prompt):
|
26 |
openai.api_key = openai_api_key
|
27 |
+
youtube_dir = "docs/youtube/"
|
28 |
+
loader = GenericLoader(YoutubeAudioLoader([youtube_url], youtube_dir), OpenAIWhisperParser())
|
|
|
|
|
|
|
|
|
29 |
docs = loader.load()
|
30 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500, chunk_overlap = 150)
|
31 |
+
splits = text_splitter.split_documents(docs)
|
32 |
+
chroma_dir = "docs/chroma/"
|
33 |
+
vectordb = Chroma.from_documents(documents = splits, embedding = OpenAIEmbeddings(), persist_directory = chroma_dir)
|
34 |
+
llm = ChatOpenAI(model_name = "gpt-4", temperature = 0)
|
35 |
+
qa_chain = RetrievalQA.from_chain_type(llm, retriever = vectordb.as_retriever(), return_source_documents = True, chain_type_kwargs = {"prompt": QA_CHAIN_PROMPT})
|
36 |
+
result = qa_chain({"query": prompt})
|
37 |
+
shutil.rmtree(youtube_dir)
|
38 |
+
shutil.rmtree(chroma_dir)
|
39 |
+
return result["result"]
|
40 |
|
41 |
description = """The app demonstrates how to use a <strong>Large Language Model</strong> (LLM) with <strong>Retrieval Augmented Generation</strong> (RAG) on external data.
|
42 |
Enter an OpenAI API key, YouTube URL (external data), and prompt to search the video, analyse its sentiment, summarize it, and/or translate it, etc.\n\n
|