heikowagner commited on
Commit
aeb550e
·
1 Parent(s): 39b12fb
Dockerfile CHANGED
@@ -9,14 +9,14 @@ RUN pip install -r requirements.txt
9
  #RUN python load_docs.py
10
  RUN --mount=type=secret,id=OPENAI_API_KEY \
11
  cat /run/secrets/OPENAI_API_KEY > .openaiapikey
12
- RUN mkdir /app/.cache
13
  RUN mkdir /nltk_data
14
  RUN mkdir /VectorStore
15
  RUN ls -la
16
  RUN python run.py
17
  RUN chmod 777 /VectorStore
18
  RUN chmod 777 /nltk_data
19
- RUN chmod 777 /app/.cache
20
  CMD ["streamlit", "run", "app.py", "--server.port=7860"]
21
  #CMD ls -la
22
  EXPOSE 7860
 
9
  #RUN python load_docs.py
10
  RUN --mount=type=secret,id=OPENAI_API_KEY \
11
  cat /run/secrets/OPENAI_API_KEY > .openaiapikey
12
+ RUN mkdir /.cache
13
  RUN mkdir /nltk_data
14
  RUN mkdir /VectorStore
15
  RUN ls -la
16
  RUN python run.py
17
  RUN chmod 777 /VectorStore
18
  RUN chmod 777 /nltk_data
19
+ RUN chmod 777 /.cache
20
  CMD ["streamlit", "run", "app.py", "--server.port=7860"]
21
  #CMD ls -la
22
  EXPOSE 7860
app/VectorStore/chroma-collections.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79cf0a7bde715ca62bcfb8bf4f9a737f550dc282abdbde3a3d861114be54c984
3
  size 967
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9770f8b53664f3a358faee66aa23720c091943c176225f0bf2487bd1767d872a
3
  size 967
app/app.py CHANGED
@@ -44,7 +44,7 @@ else:
44
 
45
  st.write('You selected:', option['name'])
46
 
47
- chain = load_model.create_chain(llm, collection=option['name'], model_name=option['model_name'])
48
  try:
49
  query = st.text_area('Ask a question:', 'Hallo how are you today?')
50
  result = chain({"query": query})
 
44
 
45
  st.write('You selected:', option['name'])
46
 
47
+ chain = load_model.create_chain(llm, collection=option['name'], model_name=option['model_name'], metadata= option['metadata'])
48
  try:
49
  query = st.text_area('Ask a question:', 'Hallo how are you today?')
50
  result = chain({"query": query})
app/load_model.py CHANGED
@@ -88,7 +88,7 @@ def load_openai_model():
88
  def load_openai_embedding():
89
  return OpenAIEmbeddings()
90
 
91
- @st.cache_resource
92
  def load_embedding(model_name):
93
  embeddings = HuggingFaceInstructEmbeddings(
94
  query_instruction="Represent the query for retrieval: ",
@@ -113,7 +113,7 @@ def load_vectorstore(model_name, collection, metadata):
113
  )
114
  return vectorstore
115
 
116
- def create_chain(_llm, collection, model_name, metadata=None):
117
  vectorstore = load_vectorstore(model_name, collection, metadata=metadata)
118
  retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
119
  chain = RetrievalQA.from_chain_type(llm=_llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
 
88
  def load_openai_embedding():
89
  return OpenAIEmbeddings()
90
 
91
+ #@st.cache_resource
92
  def load_embedding(model_name):
93
  embeddings = HuggingFaceInstructEmbeddings(
94
  query_instruction="Represent the query for retrieval: ",
 
113
  )
114
  return vectorstore
115
 
116
+ def create_chain(_llm, collection, model_name, metadata):
117
  vectorstore = load_vectorstore(model_name, collection, metadata=metadata)
118
  retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
119
  chain = RetrievalQA.from_chain_type(llm=_llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
app/load_vectors.py CHANGED
@@ -52,7 +52,7 @@ def create_and_add(collection_name, sub_docs, model_name, metadata):
52
  vectorstore2 = load_vectorstore(model_name, collection_name, metadata = metadata)
53
  print( vectorstore2.similarity_search_with_score(query="What are AXAs green Goals?", k=4) )
54
 
55
- return vectorstore
56
 
57
  def load_from_file(files):
58
 
 
52
  vectorstore2 = load_vectorstore(model_name, collection_name, metadata = metadata)
53
  print( vectorstore2.similarity_search_with_score(query="What are AXAs green Goals?", k=4) )
54
 
55
+ return True
56
 
57
  def load_from_file(files):
58
 
app/utils.py CHANGED
@@ -22,7 +22,7 @@ def format_result_set(result):
22
  for document in source_documents:
23
  st.write(format_document(document))
24
 
25
- #@st.cache_resource
26
  def get_chroma_client():
27
  return chromadb.Client(Settings(chroma_db_impl="duckdb+parquet",
28
  persist_directory=persist_directory
@@ -31,9 +31,7 @@ def get_chroma_client():
31
  def retrieve_collections():
32
  client = get_chroma_client()
33
  all_collections = client.list_collections()
34
- print(all_collections)
35
- print(all_collections[0].metadata)
36
- collections = tuple( [{'name': collection.name, 'model_name': collection.metadata['model_name']} for collection in all_collections] )
37
  return collections
38
 
39
  def load_files():
@@ -69,11 +67,7 @@ def load_files():
69
  if st.button('Upload'):
70
  docs = load_from_file(uploaded_files)
71
  sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
72
- print(sub_docs)
73
- #create_and_add(selected_collection["name"], sub_docs, selected_collection['model_name'], {"model_name": selected_collection['model_name']})
74
- create_and_add(selected_collection["name"], sub_docs, "hkunlp/instructor-large", metadata={"model_name": "hkunlp/instructor-large"})
75
-
76
- uploaded_files=None
77
  st.write("Upload succesful")
78
  else:
79
  st.write('Urls of Source Documents (Comma separated):')
@@ -84,9 +78,7 @@ def load_files():
84
  if st.button('Upload'):
85
  docs = load_from_web(urls)
86
  sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
87
- print(selected_collection['model_name'])
88
- create_and_add(selected_collection["name"], sub_docs, selected_collection['model_name'], {"model_name": selected_collection['model_name']})
89
- uploaded_files=None
90
  st.write("Upload succesful")
91
  else:
92
  collection = st.text_area('Name of your new collection:', '')
 
22
  for document in source_documents:
23
  st.write(format_document(document))
24
 
25
+ @st.cache_resource
26
  def get_chroma_client():
27
  return chromadb.Client(Settings(chroma_db_impl="duckdb+parquet",
28
  persist_directory=persist_directory
 
31
  def retrieve_collections():
32
  client = get_chroma_client()
33
  all_collections = client.list_collections()
34
+ collections = tuple( [{'name': collection.name, 'model_name': collection.metadata['model_name'], "metadata": collection.metadata} for collection in all_collections] )
 
 
35
  return collections
36
 
37
  def load_files():
 
67
  if st.button('Upload'):
68
  docs = load_from_file(uploaded_files)
69
  sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
70
+ vec1 = create_and_add(selected_collection["name"], sub_docs, selected_collection['model_name'], selected_collection['metadata'])
 
 
 
 
71
  st.write("Upload succesful")
72
  else:
73
  st.write('Urls of Source Documents (Comma separated):')
 
78
  if st.button('Upload'):
79
  docs = load_from_web(urls)
80
  sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
81
+ vec2 = create_and_add(selected_collection["name"], sub_docs, selected_collection['model_name'], selected_collection['metadata'])
 
 
82
  st.write("Upload succesful")
83
  else:
84
  collection = st.text_area('Name of your new collection:', '')