Spaces:

MarioBarbeque
/

VanderbiltGlossary

Sleeping

App Files Files Community

John Graham Reynolds commited on Nov 12, 2024

Commit

f8977f5

1 Parent(s): a1495e2

only cache chat model, vector store retriever, and embedding model for retriever

Browse files

Files changed (1) hide show

chain.py +42 -30

chain.py CHANGED Viewed

@@ -33,31 +33,41 @@ class ChainBuilder:
     def extract_chat_history(chat_messages_array):
         return chat_messages_array[:-1]
-    # ** working logic for querying glossary embeddings
-    # Same embedding model we used to create embeddings of terms
-    # make sure we cache this so that it doesnt redownload each time, hindering Space start time if sleeping
-    # try adding this st caching decorator to ensure the embeddings class gets cached after downloading the entirety of the model
-    # does this cache to the given folder though? It does appear to populate the folder as expected after being run
-    @st.cache_resource # will this work here? https://docs.streamlit.io/develop/concepts/architecture/caching
     def load_embedding_model(self):
-        embeddings = HuggingFaceEmbeddings(model_name=self.retriever_config.get("embedding_model"), cache_folder="./langchain_cache/") # this cache isnt working because were in the Docker container
         # update this to read from a presaved cache of bge-large
-        return embeddings
     def get_retriever(self):
         embeddings = self.load_embedding_model()
-        # instantiate the vector store for similarity search in our chain
-        # need to make this a function and decorate it with @st.experimental_memo as above?
-        # We are only calling this initiatially when the Space starts and builds the chain. Can we expedite this process for users when opening up this Space?
-        # @st.cache_data # TODO add this in
-        vector_search_as_retriever = DatabricksVectorSearch(
-            endpoint=self.databricks_resources.get("vector_search_endpoint_name"),
-            index_name=self.retriever_config.get("vector_search_index"),
-            embedding=embeddings,
-            text_column="name",
-            columns=["name", "description"],
-        ).as_retriever(search_kwargs=self.retriever_config.get("parameters"))
-        return vector_search_as_retriever
     # # *** TODO Evaluate this block as it relates to "RAG Studio Review App" ***
     # # Enable the RAG Studio Review App to properly display retrieved chunks and evaluation suite to measure the retriever
@@ -70,7 +80,6 @@ class ChainBuilder:
     # )
     # Method to format the terms and definitions returned by the retriever into the prompt
-    # TODO double check the contents here
     def format_context(self, retrieved_terms):
         chunk_template = self.retriever_config.get("chunk_template")
         chunk_contents = [
@@ -125,16 +134,20 @@ class ChainBuilder:
         )
         return query_rewrite_prompt
-    @st.cache_resource
     def get_model(self):
-        # Foundation Model for generation
-        model = ChatDatabricks(
-            endpoint=self.databricks_resources.get("llm_endpoint_name"),
-            extra_params=self.llm_config.get("llm_parameters"),
-        )
-        return model
-    @st.cache_resource
     def build_chain(self):
         model = self.get_model()
         prompt = self.get_prompt()
@@ -169,7 +182,6 @@ class ChainBuilder:
             | model # prompt passed to model
             | StrOutputParser()
         )
         return chain
     # ## Tell MLflow logging where to find your chain.

     def extract_chat_history(chat_messages_array):
         return chat_messages_array[:-1]
     def load_embedding_model(self):
+        model_name = self.retriever_config.get("embedding_model")
+        # make sure we cache this so that it doesnt redownload each time, hindering Space start time if sleeping
+        # try adding this st caching decorator to ensure the embeddings class gets cached after downloading the entirety of the model
+        # cannot directly use @st.cache_resource on a method (function within a class) that has a self argument
+        # does this cache to the given folder though? It does appear to populate the folder as expected after being run
+        @st.cache_resource # will this work here? https://docs.streamlit.io/develop/concepts/architecture/caching
+        def load_and_cache_embedding_model(model_name):
+            embeddings = HuggingFaceEmbeddings(model_name=model_name, cache_folder="./langchain_cache/") # this cache isnt working because were in the Docker container
         # update this to read from a presaved cache of bge-large
+        return load_and_cache_embedding_model(model_name)
     def get_retriever(self):
+        endpoint=self.databricks_resources.get("vector_search_endpoint_name")
+        index_name=self.retriever_config.get("vector_search_index")
         embeddings = self.load_embedding_model()
+        search_kwargs=self.retriever_config.get("parameters")
+        # you cannot directly use @st.cache_resource on a method (function within a class) that has a self argument.
+        # This is because Streamlit's caching mechanism relies on hashing the function's code and input parameters, and the self argument represents the instance of the class, which is not hashable by default.
+        @st.cache_resource # cache the Databricks vector store retriever
+        def get_and_cache_retriever(endpoint, index_name, embeddings, search_kwargs):
+            vector_search_as_retriever = DatabricksVectorSearch(
+                endpoint=endpoint,
+                index_name=index_name,
+                embedding=embeddings,
+                text_column="name",
+                columns=["name", "description"],
+            ).as_retriever(search_kwargs=search_kwargs)
+            return vector_search_as_retriever
+        return get_and_cache_retriever(endpoint, index_name, embeddings, search_kwargs)
     # # *** TODO Evaluate this block as it relates to "RAG Studio Review App" ***
     # # Enable the RAG Studio Review App to properly display retrieved chunks and evaluation suite to measure the retriever
     # )
     # Method to format the terms and definitions returned by the retriever into the prompt
     def format_context(self, retrieved_terms):
         chunk_template = self.retriever_config.get("chunk_template")
         chunk_contents = [
         )
         return query_rewrite_prompt
     def get_model(self):
+        endpoint = self.databricks_resources.get("llm_endpoint_name")
+        extra_params=self.llm_config.get("llm_parameters")
+        @st.cache_resource # cache the DBRX Instruct model we are loading for repeated use in our chain for chat completion
+        def get_and_cache_model(endpoint, extra_params):
+            model = ChatDatabricks(
+                endpoint=endpoint,
+                extra_params=extra_params,
+            )
+            return model
+        return get_and_cache_model(endpoint, extra_params)
     def build_chain(self):
         model = self.get_model()
         prompt = self.get_prompt()
             | model # prompt passed to model
             | StrOutputParser()
         )
         return chain
     # ## Tell MLflow logging where to find your chain.