alexkueck commited on
Commit
f981623
·
1 Parent(s): 670e2d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -27
app.py CHANGED
@@ -14,18 +14,13 @@ from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLo
14
  from langchain.document_loaders.generic import GenericLoader
15
  from langchain.document_loaders.parsers import OpenAIWhisperParser
16
  from langchain.schema import AIMessage, HumanMessage
17
- from langchain.llms import HuggingFaceHub
18
- from langchain.llms import HuggingFaceTextGenInference
19
  from langchain.embeddings import HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
20
 
21
  from langchain.embeddings.openai import OpenAIEmbeddings
22
- from langchain.prompts import PromptTemplate
23
  from langchain.text_splitter import RecursiveCharacterTextSplitter
24
  from langchain.vectorstores import Chroma
25
  from chromadb.errors import InvalidDimensionException
26
 
27
- #from langchain.vectorstores import MongoDBAtlasVectorSearch
28
- #from pymongo import MongoClient
29
 
30
  from dotenv import load_dotenv, find_dotenv
31
  _ = load_dotenv(find_dotenv())
@@ -44,14 +39,6 @@ template = """Antworte in deutsch, wenn es nicht explizit anders gefordert wird.
44
  llm_template = "Beantworte die Frage am Ende. " + template + "Frage: {question} Hilfreiche Antwort: "
45
  rag_template = "Nutze die folgenden Kontext Teile, um die Frage zu beantworten am Ende. " + template + "{context} Frage: {question} Hilfreiche Antwort: "
46
 
47
- #################################################
48
- #Prompts - Zusammensetzung
49
- #################################################
50
- LLM_CHAIN_PROMPT = PromptTemplate(input_variables = ["question"],
51
- template = llm_template)
52
- #mit RAG
53
- RAG_CHAIN_PROMPT = PromptTemplate(input_variables = ["context", "question"],
54
- template = rag_template)
55
 
56
  #################################################
57
  # Konstanten
@@ -170,12 +157,6 @@ def document_storage_chroma(splits):
170
  #HF embeddings--------------------------------------
171
  #Chroma.from_documents(documents = splits, embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False}), persist_directory = PATH_WORK + CHROMA_DIR)
172
 
173
- #Mongo DB die splits ablegen - vektorisiert...
174
- def document_storage_mongodb(splits):
175
- MongoDBAtlasVectorSearch.from_documents(documents = splits,
176
- embedding = OpenAIEmbeddings(disallowed_special = ()),
177
- collection = MONGODB_COLLECTION,
178
- index_name = MONGODB_INDEX_NAME)
179
 
180
  #Vektorstore vorbereiten...
181
  #dokumente in chroma db vektorisiert ablegen können - die Db vorbereiten daüfur
@@ -193,14 +174,6 @@ def document_retrieval_chroma(llm, prompt):
193
  db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
194
  return db
195
 
196
- #dokumente in mongo db vektorisiert ablegen können - die Db vorbereiten daüfür
197
- def document_retrieval_mongodb(llm, prompt):
198
- db = MongoDBAtlasVectorSearch.from_connection_string(MONGODB_URI,
199
- MONGODB_DB_NAME + "." + MONGODB_COLLECTION_NAME,
200
- OpenAIEmbeddings(disallowed_special = ()),
201
- index_name = MONGODB_INDEX_NAME)
202
- return db
203
-
204
 
205
  ###############################################
206
  #Langchain anlegen
 
14
  from langchain.document_loaders.generic import GenericLoader
15
  from langchain.document_loaders.parsers import OpenAIWhisperParser
16
  from langchain.schema import AIMessage, HumanMessage
 
 
17
  from langchain.embeddings import HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings, HuggingFaceInferenceAPIEmbeddings
18
 
19
  from langchain.embeddings.openai import OpenAIEmbeddings
 
20
  from langchain.text_splitter import RecursiveCharacterTextSplitter
21
  from langchain.vectorstores import Chroma
22
  from chromadb.errors import InvalidDimensionException
23
 
 
 
24
 
25
  from dotenv import load_dotenv, find_dotenv
26
  _ = load_dotenv(find_dotenv())
 
39
  llm_template = "Beantworte die Frage am Ende. " + template + "Frage: {question} Hilfreiche Antwort: "
40
  rag_template = "Nutze die folgenden Kontext Teile, um die Frage zu beantworten am Ende. " + template + "{context} Frage: {question} Hilfreiche Antwort: "
41
 
 
 
 
 
 
 
 
 
42
 
43
  #################################################
44
  # Konstanten
 
157
  #HF embeddings--------------------------------------
158
  #Chroma.from_documents(documents = splits, embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False}), persist_directory = PATH_WORK + CHROMA_DIR)
159
 
 
 
 
 
 
 
160
 
161
  #Vektorstore vorbereiten...
162
  #dokumente in chroma db vektorisiert ablegen können - die Db vorbereiten daüfur
 
174
  db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
175
  return db
176
 
 
 
 
 
 
 
 
 
177
 
178
  ###############################################
179
  #Langchain anlegen