bstraehle commited on
Commit
6f02f68
1 Parent(s): 752918c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -20
app.py CHANGED
@@ -1,39 +1,42 @@
1
  import gradio as gr
2
  import shutil, openai, os
3
 
 
 
 
4
  from langchain.document_loaders.generic import GenericLoader
5
  from langchain.document_loaders.parsers import OpenAIWhisperParser
6
- from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
 
 
 
7
 
8
  from dotenv import load_dotenv, find_dotenv
9
  _ = load_dotenv(find_dotenv())
10
 
11
  #openai.api_key = os.environ["OPENAI_API_KEY"]
12
 
13
- ###
14
- from langchain.prompts import PromptTemplate
15
- template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "Thanks, Bernd Straehle 🚀" at the end of the answer.
16
- {context}
17
- Question: {question}
18
- Helpful Answer:"""
19
  QA_CHAIN_PROMPT = PromptTemplate(input_variables = ["context", "question"], template = template)
20
- ###
21
 
22
  def invoke(openai_api_key, youtube_url, prompt):
23
  openai.api_key = openai_api_key
24
- url = youtube_url
25
- save_dir = "docs/youtube/"
26
- loader = GenericLoader(
27
- YoutubeAudioLoader([url], save_dir),
28
- OpenAIWhisperParser()
29
- )
30
  docs = loader.load()
31
- shutil.rmtree(save_dir)
32
- content = docs[0].page_content
33
- #####
34
- #TODO
35
- #####
36
- return content
 
 
 
 
37
 
38
  description = """The app demonstrates how to use a <strong>Large Language Model</strong> (LLM) with <strong>Retrieval Augmented Generation</strong> (RAG) on external data.
39
  Enter an OpenAI API key, YouTube URL (external data), and prompt to search the video, analyse its sentiment, summarize it, and/or translate it, etc.\n\n
 
1
  import gradio as gr
2
  import shutil, openai, os
3
 
4
+ from langchain.chains import RetrievalQA
5
+ from langchain.chat_models import ChatOpenAI
6
+ from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
7
  from langchain.document_loaders.generic import GenericLoader
8
  from langchain.document_loaders.parsers import OpenAIWhisperParser
9
+ from langchain.embeddings.openai import OpenAIEmbeddings
10
+ from langchain.prompts import PromptTemplate
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+ from langchain.vectorstores import Chroma
13
 
14
  from dotenv import load_dotenv, find_dotenv
15
  _ = load_dotenv(find_dotenv())
16
 
17
  #openai.api_key = os.environ["OPENAI_API_KEY"]
18
 
19
+ template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know,
20
+ don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "Thanks, Bernd Straehle 🚀"
21
+ at the end of the answer. {context} Question: {question} Helpful Answer: """
22
+
 
 
23
  QA_CHAIN_PROMPT = PromptTemplate(input_variables = ["context", "question"], template = template)
 
24
 
25
  def invoke(openai_api_key, youtube_url, prompt):
26
  openai.api_key = openai_api_key
27
+ youtube_dir = "docs/youtube/"
28
+ loader = GenericLoader(YoutubeAudioLoader([youtube_url], youtube_dir), OpenAIWhisperParser())
 
 
 
 
29
  docs = loader.load()
30
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500, chunk_overlap = 150)
31
+ splits = text_splitter.split_documents(docs)
32
+ chroma_dir = "docs/chroma/"
33
+ vectordb = Chroma.from_documents(documents = splits, embedding = OpenAIEmbeddings(), persist_directory = chroma_dir)
34
+ llm = ChatOpenAI(model_name = "gpt-4", temperature = 0)
35
+ qa_chain = RetrievalQA.from_chain_type(llm, retriever = vectordb.as_retriever(), return_source_documents = True, chain_type_kwargs = {"prompt": QA_CHAIN_PROMPT})
36
+ result = qa_chain({"query": prompt})
37
+ shutil.rmtree(youtube_dir)
38
+ shutil.rmtree(chroma_dir)
39
+ return result["result"]
40
 
41
  description = """The app demonstrates how to use a <strong>Large Language Model</strong> (LLM) with <strong>Retrieval Augmented Generation</strong> (RAG) on external data.
42
  Enter an OpenAI API key, YouTube URL (external data), and prompt to search the video, analyse its sentiment, summarize it, and/or translate it, etc.\n\n