Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ import openai, os
|
|
3 |
|
4 |
from langchain.chains import LLMChain, RetrievalQA
|
5 |
from langchain.chat_models import ChatOpenAI
|
|
|
6 |
from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
|
7 |
from langchain.document_loaders.generic import GenericLoader
|
8 |
from langchain.document_loaders.parsers import OpenAIWhisperParser
|
@@ -38,6 +39,8 @@ YOUTUBE_URL_5 = "https://www.youtube.com/shorts/3x95mw35dJY"
|
|
38 |
YOUTUBE_URL_6 = "https://www.youtube.com/shorts/zg-DS23wq0c"
|
39 |
YOUTUBE_URL_7 = "https://www.youtube.com/shorts/cS4fyhKZ8bQ"
|
40 |
|
|
|
|
|
41 |
MODEL_NAME = "gpt-4"
|
42 |
|
43 |
def invoke(openai_api_key, use_rag, prompt):
|
@@ -46,6 +49,7 @@ def invoke(openai_api_key, use_rag, prompt):
|
|
46 |
temperature = 0)
|
47 |
if (use_rag):
|
48 |
# Document loading, splitting, and storage
|
|
|
49 |
#loader = GenericLoader(YoutubeAudioLoader([YOUTUBE_URL_1,
|
50 |
# YOUTUBE_URL_2,
|
51 |
# YOUTUBE_URL_3,
|
@@ -55,12 +59,16 @@ def invoke(openai_api_key, use_rag, prompt):
|
|
55 |
# YOUTUBE_URL_7], YOUTUBE_DIR),
|
56 |
# OpenAIWhisperParser())
|
57 |
#docs = loader.load()
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
#
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
64 |
# Document retrieval
|
65 |
vector_db = Chroma(embedding_function = OpenAIEmbeddings(),
|
66 |
persist_directory = CHROMA_DIR)
|
|
|
3 |
|
4 |
from langchain.chains import LLMChain, RetrievalQA
|
5 |
from langchain.chat_models import ChatOpenAI
|
6 |
+
from langchain.document_loaders import PyPDFLoader
|
7 |
from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader
|
8 |
from langchain.document_loaders.generic import GenericLoader
|
9 |
from langchain.document_loaders.parsers import OpenAIWhisperParser
|
|
|
39 |
YOUTUBE_URL_6 = "https://www.youtube.com/shorts/zg-DS23wq0c"
|
40 |
YOUTUBE_URL_7 = "https://www.youtube.com/shorts/cS4fyhKZ8bQ"
|
41 |
|
42 |
+
PDF_URL = "https://arxiv.org/pdf/2303.08774.pdf"
|
43 |
+
|
44 |
MODEL_NAME = "gpt-4"
|
45 |
|
46 |
def invoke(openai_api_key, use_rag, prompt):
|
|
|
49 |
temperature = 0)
|
50 |
if (use_rag):
|
51 |
# Document loading, splitting, and storage
|
52 |
+
docs = []
|
53 |
#loader = GenericLoader(YoutubeAudioLoader([YOUTUBE_URL_1,
|
54 |
# YOUTUBE_URL_2,
|
55 |
# YOUTUBE_URL_3,
|
|
|
59 |
# YOUTUBE_URL_7], YOUTUBE_DIR),
|
60 |
# OpenAIWhisperParser())
|
61 |
#docs = loader.load()
|
62 |
+
###docs.extend(loader.load())
|
63 |
+
loader = PyPDFLoader(PDF_URL)
|
64 |
+
docs.extend(loader.load())
|
65 |
+
#
|
66 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_overlap = 150,
|
67 |
+
chunk_size = 1500)
|
68 |
+
splits = text_splitter.split_documents(docs)
|
69 |
+
vector_db = Chroma.from_documents(documents = splits,
|
70 |
+
embedding = OpenAIEmbeddings(),
|
71 |
+
persist_directory = CHROMA_DIR)
|
72 |
# Document retrieval
|
73 |
vector_db = Chroma(embedding_function = OpenAIEmbeddings(),
|
74 |
persist_directory = CHROMA_DIR)
|