ruslanmv commited on
Commit
af69422
1 Parent(s): 7e496b6

minor fixes

Browse files
Files changed (3) hide show
  1. app.py +3 -2
  2. utils.py +13 -2
  3. webchat.py +11 -24
app.py CHANGED
@@ -10,6 +10,7 @@ watsonx_project_id = ""
10
  api_key = ""
11
  def main():
12
  utils.get_credentials()
 
13
  st.set_page_config(layout="wide", page_title="RAG Web Demo", page_icon="")
14
  utils.load_css("styles.css")
15
  # Streamlit app title with style
@@ -44,7 +45,7 @@ def main():
44
  #collection_name = utils.create_collection_name(user_url)
45
  if button_clicked and user_url:
46
  # Invoke the LLM when the button is clicked
47
- response = webchat.answer_questions_from_web(api_key, watsonx_project_id, user_url, question, collection_name)
48
  st.write(response)
49
  else:
50
  st.warning("Please provide API Key and Project ID in the sidebar.")
@@ -55,7 +56,7 @@ def main():
55
  clean_button_clicked = st.sidebar.button("Clean Memory")
56
  if clean_button_clicked :
57
  if collection_name: # Check if collection_name is defined and not empty
58
- utils.clear_collection(collection_name)
59
  st.sidebar.success("Memory cleared successfully!")
60
  print("Memory cleared successfully!")
61
  else:
 
10
  api_key = ""
11
  def main():
12
  utils.get_credentials()
13
+ client=utils.chromadb_client()
14
  st.set_page_config(layout="wide", page_title="RAG Web Demo", page_icon="")
15
  utils.load_css("styles.css")
16
  # Streamlit app title with style
 
45
  #collection_name = utils.create_collection_name(user_url)
46
  if button_clicked and user_url:
47
  # Invoke the LLM when the button is clicked
48
+ response = webchat.answer_questions_from_web(api_key, watsonx_project_id, user_url, question, collection_name,client)
49
  st.write(response)
50
  else:
51
  st.warning("Please provide API Key and Project ID in the sidebar.")
 
56
  clean_button_clicked = st.sidebar.button("Clean Memory")
57
  if clean_button_clicked :
58
  if collection_name: # Check if collection_name is defined and not empty
59
+ utils.clear_collection(collection_name, client)
60
  st.sidebar.success("Memory cleared successfully!")
61
  print("Memory cleared successfully!")
62
  else:
utils.py CHANGED
@@ -19,9 +19,20 @@ def create_collection_name(url):
19
  return domain_parts[-2] # Extracting the second-level domain
20
  else:
21
  return "base"
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- def clear_collection(collection_name):
24
- client = chromadb.Client()
25
  try:
26
  collection = client.get_collection(collection_name)
27
  if collection:
 
19
  return domain_parts[-2] # Extracting the second-level domain
20
  else:
21
  return "base"
22
+
23
+ def chromadb_client():
24
+ import chromadb
25
+ # Set up cache directory (consider user-defined location)
26
+ current_dir = os.getcwd()
27
+ # Replace 'my_custom_cache_path' with your desired location
28
+ custom_cache_path = os.path.join(current_dir, ".cache")
29
+ # Create settings object with custom cache path
30
+ settings = chromadb.Settings(persist_directory=custom_cache_path)
31
+ # Initialize client with custom settings
32
+ client = chromadb.Client(settings)
33
+ return client
34
 
35
+ def clear_collection(collection_name,client):
 
36
  try:
37
  collection = client.get_collection(collection_name)
38
  if collection:
webchat.py CHANGED
@@ -15,6 +15,7 @@ from bs4 import BeautifulSoup
15
  import spacy
16
  import chromadb
17
  import en_core_web_md
 
18
 
19
  # Important: hardcoding the API key in Python code is not a best practice. We are using
20
  # this approach for the ease of demo setup. In a production application these variables
@@ -79,8 +80,6 @@ def get_model_test(model_type, max_tokens, min_tokens, decoding, temperature):
79
 
80
  return model
81
 
82
-
83
-
84
  # Set up cache directory (consider user-defined location)
85
  current_dir = os.getcwd()
86
  cache_dir = os.path.join(current_dir, ".cache")
@@ -95,6 +94,7 @@ model_name = 'sentence-transformers/all-MiniLM-L6-v2'
95
  model = SentenceTransformer(model_name, cache_folder=cache_dir)
96
  # Print confirmation message
97
  print(f"Model '{model_name}' downloaded and loaded from cache directory: {cache_dir}")
 
98
  # Embedding function
99
  class MiniLML6V2EmbeddingFunction(EmbeddingFunction):
100
  MODEL = model
@@ -122,7 +122,6 @@ def extract_text(url):
122
  # remove \xa0 which is used in html to avoid words break acorss lines.
123
  cleaned_text = raw_web_text.replace("\xa0", " ")
124
  return cleaned_text
125
-
126
  else:
127
  print(f"Failed to retrieve the page. Status code: {response.status_code}")
128
 
@@ -137,22 +136,10 @@ def split_text_into_sentences(text):
137
  cleaned_sentences = [s.strip() for s in sentences]
138
  return cleaned_sentences
139
 
140
-
141
- def create_embedding(url, collection_name):
142
- # Set up cache directory (consider user-defined location)
143
- current_dir = os.getcwd()
144
- # Replace 'my_custom_cache_path' with your desired location
145
- custom_cache_path = os.path.join(current_dir, ".cache")
146
- # Create settings object with custom cache path
147
- settings = chromadb.Settings(persist_directory=custom_cache_path)
148
-
149
  cleaned_text = extract_text(url)
150
  cleaned_sentences = split_text_into_sentences(cleaned_text)
151
- # Initialize client with custom settings
152
- client = chromadb.Client(settings)
153
-
154
  collection = client.get_or_create_collection(collection_name)
155
-
156
  # Upload text to chroma
157
  collection.upsert(
158
  documents=cleaned_sentences,
@@ -163,9 +150,9 @@ def create_embedding(url, collection_name):
163
  return collection
164
 
165
 
166
- def create_prompt_old(url, question, collection_name):
167
  # Create embeddings for the text file
168
- collection = create_embedding(url, collection_name)
169
 
170
  # query relevant information
171
  relevant_chunks = collection.query(
@@ -181,10 +168,10 @@ def create_prompt_old(url, question, collection_name):
181
 
182
  return prompt
183
 
184
- def create_prompt(url, question, collection_name):
185
  try:
186
  # Create embeddings for the text file
187
- collection = create_embedding(url, collection_name)
188
  except Exception as e:
189
  return f"Error creating embeddings: {e}"
190
 
@@ -222,7 +209,7 @@ def main():
222
 
223
  # Get the API key and project id and update global variables
224
  get_credentials()
225
-
226
  # Try diffrent URLs and questions
227
  url = "https://www.usbank.com/financialiq/manage-your-household/buy-a-car/own-electric-vehicles-learned-buying-driving-EVs.html"
228
 
@@ -231,10 +218,10 @@ def main():
231
  # question = "Can an EV be plugged in to a household outlet?"
232
  collection_name = "test_web_RAG"
233
 
234
- answer_questions_from_web(api_key, watsonx_project_id, url, question, collection_name)
235
 
236
 
237
- def answer_questions_from_web(request_api_key, request_project_id, url, question, collection_name):
238
  # Update the global variable
239
  globals()["api_key"] = request_api_key
240
  globals()["watsonx_project_id"] = request_project_id
@@ -253,7 +240,7 @@ def answer_questions_from_web(request_api_key, request_project_id, url, question
253
  model = get_model(model_type, max_tokens, min_tokens, decoding, temperature, top_k, top_p)
254
 
255
  # Get the prompt
256
- complete_prompt = create_prompt(url, question, collection_name)
257
 
258
  # Let's review the prompt
259
  print("----------------------------------------------------------------------------------------------------")
 
15
  import spacy
16
  import chromadb
17
  import en_core_web_md
18
+ from utils import chromadb_client
19
 
20
  # Important: hardcoding the API key in Python code is not a best practice. We are using
21
  # this approach for the ease of demo setup. In a production application these variables
 
80
 
81
  return model
82
 
 
 
83
  # Set up cache directory (consider user-defined location)
84
  current_dir = os.getcwd()
85
  cache_dir = os.path.join(current_dir, ".cache")
 
94
  model = SentenceTransformer(model_name, cache_folder=cache_dir)
95
  # Print confirmation message
96
  print(f"Model '{model_name}' downloaded and loaded from cache directory: {cache_dir}")
97
+
98
  # Embedding function
99
  class MiniLML6V2EmbeddingFunction(EmbeddingFunction):
100
  MODEL = model
 
122
  # remove \xa0 which is used in html to avoid words break acorss lines.
123
  cleaned_text = raw_web_text.replace("\xa0", " ")
124
  return cleaned_text
 
125
  else:
126
  print(f"Failed to retrieve the page. Status code: {response.status_code}")
127
 
 
136
  cleaned_sentences = [s.strip() for s in sentences]
137
  return cleaned_sentences
138
 
139
+ def create_embedding(url, collection_name,client):
 
 
 
 
 
 
 
 
140
  cleaned_text = extract_text(url)
141
  cleaned_sentences = split_text_into_sentences(cleaned_text)
 
 
 
142
  collection = client.get_or_create_collection(collection_name)
 
143
  # Upload text to chroma
144
  collection.upsert(
145
  documents=cleaned_sentences,
 
150
  return collection
151
 
152
 
153
+ def create_prompt_old(url, question, collection_name, client):
154
  # Create embeddings for the text file
155
+ collection = create_embedding(url, collection_name, client)
156
 
157
  # query relevant information
158
  relevant_chunks = collection.query(
 
168
 
169
  return prompt
170
 
171
+ def create_prompt(url, question, collection_name,client):
172
  try:
173
  # Create embeddings for the text file
174
+ collection = create_embedding(url, collection_name,client)
175
  except Exception as e:
176
  return f"Error creating embeddings: {e}"
177
 
 
209
 
210
  # Get the API key and project id and update global variables
211
  get_credentials()
212
+ client=chromadb_client()
213
  # Try diffrent URLs and questions
214
  url = "https://www.usbank.com/financialiq/manage-your-household/buy-a-car/own-electric-vehicles-learned-buying-driving-EVs.html"
215
 
 
218
  # question = "Can an EV be plugged in to a household outlet?"
219
  collection_name = "test_web_RAG"
220
 
221
+ answer_questions_from_web(api_key, watsonx_project_id, url, question, collection_name,client)
222
 
223
 
224
+ def answer_questions_from_web(request_api_key, request_project_id, url, question, collection_name,client):
225
  # Update the global variable
226
  globals()["api_key"] = request_api_key
227
  globals()["watsonx_project_id"] = request_project_id
 
240
  model = get_model(model_type, max_tokens, min_tokens, decoding, temperature, top_k, top_p)
241
 
242
  # Get the prompt
243
+ complete_prompt = create_prompt(url, question, collection_name,client)
244
 
245
  # Let's review the prompt
246
  print("----------------------------------------------------------------------------------------------------")