Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -14,18 +14,13 @@ from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLo
|
|
14 |
from langchain.document_loaders.generic import GenericLoader
|
15 |
from langchain.document_loaders.parsers import OpenAIWhisperParser
|
16 |
from langchain.schema import AIMessage, HumanMessage
|
17 |
-
from langchain.llms import HuggingFaceHub
|
18 |
-
from langchain.llms import HuggingFaceTextGenInference
|
19 |
from langchain.embeddings import HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
|
20 |
|
21 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
22 |
-
from langchain.prompts import PromptTemplate
|
23 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
24 |
from langchain.vectorstores import Chroma
|
25 |
from chromadb.errors import InvalidDimensionException
|
26 |
|
27 |
-
#from langchain.vectorstores import MongoDBAtlasVectorSearch
|
28 |
-
#from pymongo import MongoClient
|
29 |
|
30 |
from dotenv import load_dotenv, find_dotenv
|
31 |
_ = load_dotenv(find_dotenv())
|
@@ -44,14 +39,6 @@ template = """Antworte in deutsch, wenn es nicht explizit anders gefordert wird.
|
|
44 |
llm_template = "Beantworte die Frage am Ende. " + template + "Frage: {question} Hilfreiche Antwort: "
|
45 |
rag_template = "Nutze die folgenden Kontext Teile, um die Frage zu beantworten am Ende. " + template + "{context} Frage: {question} Hilfreiche Antwort: "
|
46 |
|
47 |
-
#################################################
|
48 |
-
#Prompts - Zusammensetzung
|
49 |
-
#################################################
|
50 |
-
LLM_CHAIN_PROMPT = PromptTemplate(input_variables = ["question"],
|
51 |
-
template = llm_template)
|
52 |
-
#mit RAG
|
53 |
-
RAG_CHAIN_PROMPT = PromptTemplate(input_variables = ["context", "question"],
|
54 |
-
template = rag_template)
|
55 |
|
56 |
#################################################
|
57 |
# Konstanten
|
@@ -170,12 +157,6 @@ def document_storage_chroma(splits):
|
|
170 |
#HF embeddings--------------------------------------
|
171 |
#Chroma.from_documents(documents = splits, embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False}), persist_directory = PATH_WORK + CHROMA_DIR)
|
172 |
|
173 |
-
#Mongo DB die splits ablegen - vektorisiert...
|
174 |
-
def document_storage_mongodb(splits):
|
175 |
-
MongoDBAtlasVectorSearch.from_documents(documents = splits,
|
176 |
-
embedding = OpenAIEmbeddings(disallowed_special = ()),
|
177 |
-
collection = MONGODB_COLLECTION,
|
178 |
-
index_name = MONGODB_INDEX_NAME)
|
179 |
|
180 |
#Vektorstore vorbereiten...
|
181 |
#dokumente in chroma db vektorisiert ablegen können - die Db vorbereiten daüfur
|
@@ -193,14 +174,6 @@ def document_retrieval_chroma(llm, prompt):
|
|
193 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
194 |
return db
|
195 |
|
196 |
-
#dokumente in mongo db vektorisiert ablegen können - die Db vorbereiten daüfür
|
197 |
-
def document_retrieval_mongodb(llm, prompt):
|
198 |
-
db = MongoDBAtlasVectorSearch.from_connection_string(MONGODB_URI,
|
199 |
-
MONGODB_DB_NAME + "." + MONGODB_COLLECTION_NAME,
|
200 |
-
OpenAIEmbeddings(disallowed_special = ()),
|
201 |
-
index_name = MONGODB_INDEX_NAME)
|
202 |
-
return db
|
203 |
-
|
204 |
|
205 |
###############################################
|
206 |
#Langchain anlegen
|
|
|
14 |
from langchain.document_loaders.generic import GenericLoader
|
15 |
from langchain.document_loaders.parsers import OpenAIWhisperParser
|
16 |
from langchain.schema import AIMessage, HumanMessage
|
|
|
|
|
17 |
from langchain.embeddings import HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
|
18 |
|
19 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
|
|
20 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
21 |
from langchain.vectorstores import Chroma
|
22 |
from chromadb.errors import InvalidDimensionException
|
23 |
|
|
|
|
|
24 |
|
25 |
from dotenv import load_dotenv, find_dotenv
|
26 |
_ = load_dotenv(find_dotenv())
|
|
|
39 |
llm_template = "Beantworte die Frage am Ende. " + template + "Frage: {question} Hilfreiche Antwort: "
|
40 |
rag_template = "Nutze die folgenden Kontext Teile, um die Frage zu beantworten am Ende. " + template + "{context} Frage: {question} Hilfreiche Antwort: "
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
#################################################
|
44 |
# Konstanten
|
|
|
157 |
#HF embeddings--------------------------------------
|
158 |
#Chroma.from_documents(documents = splits, embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False}), persist_directory = PATH_WORK + CHROMA_DIR)
|
159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
#Vektorstore vorbereiten...
|
162 |
#dokumente in chroma db vektorisiert ablegen können - die Db vorbereiten daüfur
|
|
|
174 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
175 |
return db
|
176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
|
178 |
###############################################
|
179 |
#Langchain anlegen
|