sayakpaul HF staff commited on
Commit
ec46884
1 Parent(s): 0876e5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import json
3
  import re
4
  from sentence_transformers import SentenceTransformer, CrossEncoder
 
5
  from openai import OpenAI
6
  import hnswlib
7
  import numpy as np
@@ -19,11 +20,12 @@ MAX_INPUT_TOKEN_LENGTH = 4000
19
  EMBED_DIM = 1024
20
  K = 10
21
  EF = 100
22
- SEARCH_INDEX = "search_index.bin"
23
- EMBEDDINGS_FILE = "embeddings.npy"
24
- DOCUMENT_DATASET = "chunked_data.parquet"
25
  COSINE_THRESHOLD = 0.7
26
 
 
 
 
 
27
  torch_device = "cuda" if torch.cuda.is_available() else "cpu"
28
  print("Running on device:", torch_device)
29
  print("CPU threads:", torch.get_num_threads())
@@ -294,7 +296,7 @@ def check_input_token_length(message: str, chat_history: list[tuple[str, str]],
294
  )
295
 
296
 
297
- search_index = create_hnsw_index(EMBEDDINGS_FILE) # load_hnsw_index(SEARCH_INDEX)
298
  data_df = pd.read_parquet(DOCUMENT_DATASET).reset_index()
299
  with gr.Blocks(css="style.css") as demo:
300
  gr.Markdown(DESCRIPTION)
 
2
  import json
3
  import re
4
  from sentence_transformers import SentenceTransformer, CrossEncoder
5
+ from huggingface_hub import hf_hub_download
6
  from openai import OpenAI
7
  import hnswlib
8
  import numpy as np
 
20
  EMBED_DIM = 1024
21
  K = 10
22
  EF = 100
 
 
 
23
  COSINE_THRESHOLD = 0.7
24
 
25
+ SEARCH_INDEX = hf_hub_download(repo_id="sayakpaul/diffusers-qa-chatbot-artifacts", filename="search_index.bin", repo_type="dataset")
26
+ EMBEDDINGS_FILE = hf_hub_download(repo_id="sayakpaul/diffusers-qa-chatbot-artifacts", filename="embeddings.npy", repo_type="dataset")
27
+ DOCUMENT_DATASET = hf_hub_download(repo_id="sayakpaul/diffusers-qa-chatbot-artifacts", filename="chunked_data.parquet", repo_type="dataset")
28
+
29
  torch_device = "cuda" if torch.cuda.is_available() else "cpu"
30
  print("Running on device:", torch_device)
31
  print("CPU threads:", torch.get_num_threads())
 
296
  )
297
 
298
 
299
+ search_index = load_hnsw_index(SEARCH_INDEX) # create_hnsw_index(EMBEDDINGS_FILE)
300
  data_df = pd.read_parquet(DOCUMENT_DATASET).reset_index()
301
  with gr.Blocks(css="style.css") as demo:
302
  gr.Markdown(DESCRIPTION)