Spaces:
Runtime error
Runtime error
Update build_quora_index.py
Browse files- build_quora_index.py +4 -2
build_quora_index.py
CHANGED
@@ -6,6 +6,9 @@ from sentence_transformers import SentenceTransformer
|
|
6 |
from qdrant_client import QdrantClient, models
|
7 |
|
8 |
|
|
|
|
|
|
|
9 |
def compute_embedding(sentences, emb_model):
|
10 |
return emb_model.encode(sentences=sentences)
|
11 |
|
@@ -27,8 +30,6 @@ def build_index():
|
|
27 |
api_key=os.environ['QDRANT_API_KEY'],
|
28 |
)
|
29 |
encoder = SentenceTransformer(model_name_or_path='BAAI/bge-small-en-v1.5')
|
30 |
-
MAX_QUESTIONS = 1000
|
31 |
-
BATCH_SIZE = 100
|
32 |
|
33 |
quora_ds = load_dataset(path='quora', split='train', streaming=True)
|
34 |
quora_questions = get_questions(ds=quora_ds)
|
@@ -41,6 +42,7 @@ def build_index():
|
|
41 |
)
|
42 |
)
|
43 |
|
|
|
44 |
question_batch = []
|
45 |
for idx, entry in enumerate(tqdm(quora_questions, desc='Uploading vector embeddings in batch size of {}'.format(BATCH_SIZE))):
|
46 |
if len(question_batch) < BATCH_SIZE:
|
|
|
6 |
from qdrant_client import QdrantClient, models
|
7 |
|
8 |
|
9 |
+
MAX_QUESTIONS = 1000
|
10 |
+
|
11 |
+
|
12 |
def compute_embedding(sentences, emb_model):
|
13 |
return emb_model.encode(sentences=sentences)
|
14 |
|
|
|
30 |
api_key=os.environ['QDRANT_API_KEY'],
|
31 |
)
|
32 |
encoder = SentenceTransformer(model_name_or_path='BAAI/bge-small-en-v1.5')
|
|
|
|
|
33 |
|
34 |
quora_ds = load_dataset(path='quora', split='train', streaming=True)
|
35 |
quora_questions = get_questions(ds=quora_ds)
|
|
|
42 |
)
|
43 |
)
|
44 |
|
45 |
+
BATCH_SIZE = 100
|
46 |
question_batch = []
|
47 |
for idx, entry in enumerate(tqdm(quora_questions, desc='Uploading vector embeddings in batch size of {}'.format(BATCH_SIZE))):
|
48 |
if len(question_batch) < BATCH_SIZE:
|