Spaces:

ruslanmv
/

WatsonX-WebChat

Running

App Files Files Community

ruslanmv commited on May 28

Commit

af69422

•

1 Parent(s): 7e496b6

minor fixes

Browse files

Files changed (3) hide show

app.py +3 -2
utils.py +13 -2
webchat.py +11 -24

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ watsonx_project_id = ""
 api_key = ""
 def main():
     utils.get_credentials()
     st.set_page_config(layout="wide", page_title="RAG Web Demo", page_icon="")
     utils.load_css("styles.css")
     # Streamlit app title with style
@@ -44,7 +45,7 @@ def main():
         #collection_name = utils.create_collection_name(user_url)
         if button_clicked and user_url:
             # Invoke the LLM when the button is clicked
-            response = webchat.answer_questions_from_web(api_key, watsonx_project_id, user_url, question, collection_name)
             st.write(response)
     else:
         st.warning("Please provide API Key and Project ID in the sidebar.")
@@ -55,7 +56,7 @@ def main():
     clean_button_clicked = st.sidebar.button("Clean Memory")
     if clean_button_clicked :
         if collection_name:  # Check if collection_name is defined and not empty
-            utils.clear_collection(collection_name)
             st.sidebar.success("Memory cleared successfully!")
             print("Memory cleared successfully!")
         else:

 api_key = ""
 def main():
     utils.get_credentials()
+    client=utils.chromadb_client()
     st.set_page_config(layout="wide", page_title="RAG Web Demo", page_icon="")
     utils.load_css("styles.css")
     # Streamlit app title with style
         #collection_name = utils.create_collection_name(user_url)
         if button_clicked and user_url:
             # Invoke the LLM when the button is clicked
+            response = webchat.answer_questions_from_web(api_key, watsonx_project_id, user_url, question, collection_name,client)
             st.write(response)
     else:
         st.warning("Please provide API Key and Project ID in the sidebar.")
     clean_button_clicked = st.sidebar.button("Clean Memory")
     if clean_button_clicked :
         if collection_name:  # Check if collection_name is defined and not empty
+            utils.clear_collection(collection_name, client)
             st.sidebar.success("Memory cleared successfully!")
             print("Memory cleared successfully!")
         else:

utils.py CHANGED Viewed

@@ -19,9 +19,20 @@ def create_collection_name(url):
         return domain_parts[-2]  # Extracting the second-level domain
     else:
         return "base"
-def clear_collection(collection_name):
-    client = chromadb.Client()
     try:
         collection = client.get_collection(collection_name)
         if collection:

         return domain_parts[-2]  # Extracting the second-level domain
     else:
         return "base"
+def chromadb_client():
+    import chromadb
+    # Set up cache directory (consider user-defined location)
+    current_dir = os.getcwd()
+    # Replace 'my_custom_cache_path' with your desired location
+    custom_cache_path = os.path.join(current_dir, ".cache")
+    # Create settings object with custom cache path
+    settings = chromadb.Settings(persist_directory=custom_cache_path)
+     # Initialize client with custom settings
+    client = chromadb.Client(settings)
+    return client
+def clear_collection(collection_name,client):
     try:
         collection = client.get_collection(collection_name)
         if collection:

webchat.py CHANGED Viewed

@@ -15,6 +15,7 @@ from bs4 import BeautifulSoup
 import spacy
 import chromadb
 import en_core_web_md
 # Important: hardcoding the API key in Python code is not a best practice. We are using
 # this approach for the ease of demo setup. In a production application these variables
@@ -79,8 +80,6 @@ def get_model_test(model_type, max_tokens, min_tokens, decoding, temperature):
     return model
 # Set up cache directory (consider user-defined location)
 current_dir = os.getcwd()
 cache_dir = os.path.join(current_dir, ".cache")
@@ -95,6 +94,7 @@ model_name = 'sentence-transformers/all-MiniLM-L6-v2'
 model = SentenceTransformer(model_name, cache_folder=cache_dir)
 # Print confirmation message
 print(f"Model '{model_name}' downloaded and loaded from cache directory: {cache_dir}")
 # Embedding function
 class MiniLML6V2EmbeddingFunction(EmbeddingFunction):
     MODEL = model
@@ -122,7 +122,6 @@ def extract_text(url):
             # remove \xa0 which is used in html to avoid words break acorss lines.
             cleaned_text = raw_web_text.replace("\xa0", " ")
             return cleaned_text
         else:
             print(f"Failed to retrieve the page. Status code: {response.status_code}")
@@ -137,22 +136,10 @@ def split_text_into_sentences(text):
     cleaned_sentences = [s.strip() for s in sentences]
     return cleaned_sentences
-def create_embedding(url, collection_name):
-    # Set up cache directory (consider user-defined location)
-    current_dir = os.getcwd()
-    # Replace 'my_custom_cache_path' with your desired location
-    custom_cache_path = os.path.join(current_dir, ".cache")
-    # Create settings object with custom cache path
-    settings = chromadb.Settings(persist_directory=custom_cache_path)
     cleaned_text = extract_text(url)
     cleaned_sentences = split_text_into_sentences(cleaned_text)
-     # Initialize client with custom settings
-    client = chromadb.Client(settings)
     collection = client.get_or_create_collection(collection_name)
     # Upload text to chroma
     collection.upsert(
         documents=cleaned_sentences,
@@ -163,9 +150,9 @@ def create_embedding(url, collection_name):
     return collection
-def create_prompt_old(url, question, collection_name):
     # Create embeddings for the text file
-    collection = create_embedding(url, collection_name)
     # query relevant information
     relevant_chunks = collection.query(
@@ -181,10 +168,10 @@ def create_prompt_old(url, question, collection_name):
     return prompt
-def create_prompt(url, question, collection_name):
   try:
     # Create embeddings for the text file
-    collection = create_embedding(url, collection_name)
   except Exception as e:
     return f"Error creating embeddings: {e}"
@@ -222,7 +209,7 @@ def main():
     # Get the API key and project id and update global variables
     get_credentials()
     # Try diffrent URLs and questions
     url = "https://www.usbank.com/financialiq/manage-your-household/buy-a-car/own-electric-vehicles-learned-buying-driving-EVs.html"
@@ -231,10 +218,10 @@ def main():
     # question = "Can an EV be plugged in to a household outlet?"
     collection_name = "test_web_RAG"
-    answer_questions_from_web(api_key, watsonx_project_id, url, question, collection_name)
-def answer_questions_from_web(request_api_key, request_project_id, url, question, collection_name):
     # Update the global variable
     globals()["api_key"] = request_api_key
     globals()["watsonx_project_id"] = request_project_id
@@ -253,7 +240,7 @@ def answer_questions_from_web(request_api_key, request_project_id, url, question
     model = get_model(model_type, max_tokens, min_tokens, decoding, temperature, top_k, top_p)
     # Get the prompt
-    complete_prompt = create_prompt(url, question, collection_name)
     # Let's review the prompt
     print("----------------------------------------------------------------------------------------------------")

 import spacy
 import chromadb
 import en_core_web_md
+from utils import chromadb_client
 # Important: hardcoding the API key in Python code is not a best practice. We are using
 # this approach for the ease of demo setup. In a production application these variables
     return model
 # Set up cache directory (consider user-defined location)
 current_dir = os.getcwd()
 cache_dir = os.path.join(current_dir, ".cache")
 model = SentenceTransformer(model_name, cache_folder=cache_dir)
 # Print confirmation message
 print(f"Model '{model_name}' downloaded and loaded from cache directory: {cache_dir}")
 # Embedding function
 class MiniLML6V2EmbeddingFunction(EmbeddingFunction):
     MODEL = model
             # remove \xa0 which is used in html to avoid words break acorss lines.
             cleaned_text = raw_web_text.replace("\xa0", " ")
             return cleaned_text
         else:
             print(f"Failed to retrieve the page. Status code: {response.status_code}")
     cleaned_sentences = [s.strip() for s in sentences]
     return cleaned_sentences
+def create_embedding(url, collection_name,client):
     cleaned_text = extract_text(url)
     cleaned_sentences = split_text_into_sentences(cleaned_text)
     collection = client.get_or_create_collection(collection_name)
     # Upload text to chroma
     collection.upsert(
         documents=cleaned_sentences,
     return collection
+def create_prompt_old(url, question, collection_name, client):
     # Create embeddings for the text file
+    collection = create_embedding(url, collection_name, client)
     # query relevant information
     relevant_chunks = collection.query(
     return prompt
+def create_prompt(url, question, collection_name,client):
   try:
     # Create embeddings for the text file
+    collection = create_embedding(url, collection_name,client)
   except Exception as e:
     return f"Error creating embeddings: {e}"
     # Get the API key and project id and update global variables
     get_credentials()
+    client=chromadb_client()
     # Try diffrent URLs and questions
     url = "https://www.usbank.com/financialiq/manage-your-household/buy-a-car/own-electric-vehicles-learned-buying-driving-EVs.html"
     # question = "Can an EV be plugged in to a household outlet?"
     collection_name = "test_web_RAG"
+    answer_questions_from_web(api_key, watsonx_project_id, url, question, collection_name,client)
+def answer_questions_from_web(request_api_key, request_project_id, url, question, collection_name,client):
     # Update the global variable
     globals()["api_key"] = request_api_key
     globals()["watsonx_project_id"] = request_project_id
     model = get_model(model_type, max_tokens, min_tokens, decoding, temperature, top_k, top_p)
     # Get the prompt
+    complete_prompt = create_prompt(url, question, collection_name,client)
     # Let's review the prompt
     print("----------------------------------------------------------------------------------------------------")