yalaa commited on
Commit
ae1fd26
1 Parent(s): 7faa7d5

Update build_quora_index.py

Browse files
Files changed (1) hide show
  1. build_quora_index.py +4 -2
build_quora_index.py CHANGED
@@ -6,6 +6,9 @@ from sentence_transformers import SentenceTransformer
6
  from qdrant_client import QdrantClient, models
7
 
8
 
 
 
 
9
  def compute_embedding(sentences, emb_model):
10
  return emb_model.encode(sentences=sentences)
11
 
@@ -27,8 +30,6 @@ def build_index():
27
  api_key=os.environ['QDRANT_API_KEY'],
28
  )
29
  encoder = SentenceTransformer(model_name_or_path='BAAI/bge-small-en-v1.5')
30
- MAX_QUESTIONS = 1000
31
- BATCH_SIZE = 100
32
 
33
  quora_ds = load_dataset(path='quora', split='train', streaming=True)
34
  quora_questions = get_questions(ds=quora_ds)
@@ -41,6 +42,7 @@ def build_index():
41
  )
42
  )
43
 
 
44
  question_batch = []
45
  for idx, entry in enumerate(tqdm(quora_questions, desc='Uploading vector embeddings in batch size of {}'.format(BATCH_SIZE))):
46
  if len(question_batch) < BATCH_SIZE:
 
6
  from qdrant_client import QdrantClient, models
7
 
8
 
9
+ MAX_QUESTIONS = 1000
10
+
11
+
12
  def compute_embedding(sentences, emb_model):
13
  return emb_model.encode(sentences=sentences)
14
 
 
30
  api_key=os.environ['QDRANT_API_KEY'],
31
  )
32
  encoder = SentenceTransformer(model_name_or_path='BAAI/bge-small-en-v1.5')
 
 
33
 
34
  quora_ds = load_dataset(path='quora', split='train', streaming=True)
35
  quora_questions = get_questions(ds=quora_ds)
 
42
  )
43
  )
44
 
45
+ BATCH_SIZE = 100
46
  question_batch = []
47
  for idx, entry in enumerate(tqdm(quora_questions, desc='Uploading vector embeddings in batch size of {}'.format(BATCH_SIZE))):
48
  if len(question_batch) < BATCH_SIZE: