Spaces:
Runtime error
Runtime error
File size: 844 Bytes
d6585f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
#!/bin/bash
#SHARD_NUM=8
SHARDED_CORPUS_PREFIX="/root/Corpus/CAsT22_msmarcov2_kilt_flattened_8shards"
SHARDED_EMBEDDING_PREFIX="/ssd3/geonminkim/indexes/CAsT_21_22_msmarcov2_kilt/dense"
START_IDX=2
END_IDX=3
SHARD_NUM=8
GPU_ID=1
for SHARD_IDX in $(seq $START_IDX $END_IDX); do
echo "encoding for SHARD_IDX = $SHARD_IDX"
python -m pyserini.encode \
input --corpus $SHARDED_CORPUS_PREFIX/shard_$SHARD_IDX \
--fields text \
--shard-id $SHARD_IDX \
--shard-num $SHARD_NUM \
output --embeddings $SHARDED_EMBEDDING_PREFIX/shard_$SHARD_IDX \
--to-faiss \
encoder --encoder castorini/tct_colbert-v2-msmarco-cqe \
--fields text \
--device cuda:$GPU_ID \
--batch 128 \
--fp16
done |