ArthurChen189
commited on
Commit
β’
23cefb2
1
Parent(s):
07df3b7
update app
Browse files
app.py
CHANGED
@@ -9,13 +9,14 @@ sys.path.append(str(path_root))
|
|
9 |
|
10 |
|
11 |
encoder_index_map = {
|
12 |
-
'uniCOIL': ('UniCoil', 'index-unicoil'),
|
13 |
-
'SPLADE++ Ensemble Distil': ('SpladePlusPlusEnsembleDistil', 'index-splade-pp-ed'),
|
14 |
-
'SPLADE++ Self Distil': ('SpladePlusPlusSelfDistil', 'index-splade-pp-sd')
|
15 |
}
|
16 |
|
17 |
index = 'index-splade-pp-ed'
|
18 |
encoder = 'SpladePlusPlusEnsembleDistil'
|
|
|
19 |
|
20 |
st.set_page_config(page_title="Pyserini with ONNX Runtime",
|
21 |
page_icon='πΈ', layout="centered")
|
@@ -24,14 +25,33 @@ cola, colb, colc = st.columns([5, 4, 5])
|
|
24 |
with colb:
|
25 |
st.image("logo.jpeg")
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
colaa, colbb, colcc = st.columns([1, 8, 1])
|
28 |
with colbb:
|
29 |
encoder = st.select_slider(
|
30 |
-
'Select a query encoder
|
31 |
options=['uniCOIL', 'SPLADE++ Ensemble Distil', 'SPLADE++ Self Distil'])
|
32 |
st.write('Now Running Encoder: ', encoder)
|
33 |
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
col1, col2 = st.columns([9, 1])
|
37 |
with col1:
|
@@ -41,8 +61,6 @@ with col2:
|
|
41 |
st.write('#')
|
42 |
button_clicked = st.button("π")
|
43 |
|
44 |
-
searcher = LuceneImpactSearcher(
|
45 |
-
f'indexes/{index}', f'{encoder}', encoder_type='onnx')
|
46 |
|
47 |
if search_query or button_clicked:
|
48 |
num_results = None
|
@@ -55,10 +73,13 @@ if search_query or button_clicked:
|
|
55 |
for i, result in enumerate(search_results[:10]):
|
56 |
result_score = result.score
|
57 |
result_id = result.docid
|
58 |
-
|
|
|
59 |
|
60 |
try:
|
61 |
st.write(output, unsafe_allow_html=True)
|
|
|
|
|
62 |
|
63 |
except:
|
64 |
pass
|
|
|
9 |
|
10 |
|
11 |
encoder_index_map = {
|
12 |
+
'uniCOIL': ('UniCoil', 'castorini/unicoil-noexp-msmarco-passage', 'index-unicoil'),
|
13 |
+
'SPLADE++ Ensemble Distil': ('SpladePlusPlusEnsembleDistil', 'naver/splade-cocondenser-ensembledistil', 'index-splade-pp-ed'),
|
14 |
+
'SPLADE++ Self Distil': ('SpladePlusPlusSelfDistil', 'naver/splade-cocondenser-ensembledistil', 'index-splade-pp-sd')
|
15 |
}
|
16 |
|
17 |
index = 'index-splade-pp-ed'
|
18 |
encoder = 'SpladePlusPlusEnsembleDistil'
|
19 |
+
encoder_index = 0
|
20 |
|
21 |
st.set_page_config(page_title="Pyserini with ONNX Runtime",
|
22 |
page_icon='πΈ', layout="centered")
|
|
|
25 |
with colb:
|
26 |
st.image("logo.jpeg")
|
27 |
|
28 |
+
|
29 |
+
colaa, colbb, colcc = st.columns([1, 8, 1])
|
30 |
+
with colbb:
|
31 |
+
runtime = st.select_slider(
|
32 |
+
'Select a runtime type',
|
33 |
+
options=['PyTorch', 'ONNX Runtime'])
|
34 |
+
st.write('Now using: ', runtime)
|
35 |
+
|
36 |
+
|
37 |
colaa, colbb, colcc = st.columns([1, 8, 1])
|
38 |
with colbb:
|
39 |
encoder = st.select_slider(
|
40 |
+
'Select a query encoder',
|
41 |
options=['uniCOIL', 'SPLADE++ Ensemble Distil', 'SPLADE++ Self Distil'])
|
42 |
st.write('Now Running Encoder: ', encoder)
|
43 |
|
44 |
+
if runtime == 'PyTorch':
|
45 |
+
runtime = 'pytorch'
|
46 |
+
runtime_index = 1
|
47 |
+
else:
|
48 |
+
runtime = 'onnx'
|
49 |
+
runtime_index = 0
|
50 |
+
|
51 |
+
encoder, index = encoder_index_map[encoder][runtime_index], encoder_index_map[encoder][2]
|
52 |
+
|
53 |
+
searcher = LuceneImpactSearcher(
|
54 |
+
f'indexes/{index}', f'{encoder}', encoder_type=f'{runtime}')
|
55 |
|
56 |
col1, col2 = st.columns([9, 1])
|
57 |
with col1:
|
|
|
61 |
st.write('#')
|
62 |
button_clicked = st.button("π")
|
63 |
|
|
|
|
|
64 |
|
65 |
if search_query or button_clicked:
|
66 |
num_results = None
|
|
|
73 |
for i, result in enumerate(search_results[:10]):
|
74 |
result_score = result.score
|
75 |
result_id = result.docid
|
76 |
+
contents = json.loads(result.raw)["contents"]
|
77 |
+
output = f'<div class="row"> <b>Rank</b>: {i+1} | <b>Document ID</b>: {result_id} | <b>Score</b>:{result_score:.2f}</div>'
|
78 |
|
79 |
try:
|
80 |
st.write(output, unsafe_allow_html=True)
|
81 |
+
st.write(
|
82 |
+
f'<div class="row">{contents}</div>', unsafe_allow_html=True)
|
83 |
|
84 |
except:
|
85 |
pass
|
pyserini/search/lucene/__pycache__/_impact_searcher.cpython-310.pyc
CHANGED
Binary files a/pyserini/search/lucene/__pycache__/_impact_searcher.cpython-310.pyc and b/pyserini/search/lucene/__pycache__/_impact_searcher.cpython-310.pyc differ
|
|
pyserini/search/lucene/_impact_searcher.py
CHANGED
@@ -142,7 +142,7 @@ class LuceneImpactSearcher:
|
|
142 |
|
143 |
encoded_query = self.encode(q)
|
144 |
|
145 |
-
jquery =
|
146 |
if self.encoder_type == 'pytorch':
|
147 |
for (token, weight) in encoded_query.items():
|
148 |
if token in self.idf and self.idf[token] > self.min_idf:
|
|
|
142 |
|
143 |
encoded_query = self.encode(q)
|
144 |
|
145 |
+
jquery = JHashMap()
|
146 |
if self.encoder_type == 'pytorch':
|
147 |
for (token, weight) in encoded_query.items():
|
148 |
if token in self.idf and self.idf[token] > self.min_idf:
|