ArthurChen189 commited on
Commit
23cefb2
β€’
1 Parent(s): 07df3b7

update app

Browse files
app.py CHANGED
@@ -9,13 +9,14 @@ sys.path.append(str(path_root))
9
 
10
 
11
  encoder_index_map = {
12
- 'uniCOIL': ('UniCoil', 'index-unicoil'),
13
- 'SPLADE++ Ensemble Distil': ('SpladePlusPlusEnsembleDistil', 'index-splade-pp-ed'),
14
- 'SPLADE++ Self Distil': ('SpladePlusPlusSelfDistil', 'index-splade-pp-sd')
15
  }
16
 
17
  index = 'index-splade-pp-ed'
18
  encoder = 'SpladePlusPlusEnsembleDistil'
 
19
 
20
  st.set_page_config(page_title="Pyserini with ONNX Runtime",
21
  page_icon='🌸', layout="centered")
@@ -24,14 +25,33 @@ cola, colb, colc = st.columns([5, 4, 5])
24
  with colb:
25
  st.image("logo.jpeg")
26
 
 
 
 
 
 
 
 
 
 
27
  colaa, colbb, colcc = st.columns([1, 8, 1])
28
  with colbb:
29
  encoder = st.select_slider(
30
- 'Select a query encoder with ONNX Runtime',
31
  options=['uniCOIL', 'SPLADE++ Ensemble Distil', 'SPLADE++ Self Distil'])
32
  st.write('Now Running Encoder: ', encoder)
33
 
34
- encoder, index = encoder_index_map[encoder]
 
 
 
 
 
 
 
 
 
 
35
 
36
  col1, col2 = st.columns([9, 1])
37
  with col1:
@@ -41,8 +61,6 @@ with col2:
41
  st.write('#')
42
  button_clicked = st.button("πŸ”Ž")
43
 
44
- searcher = LuceneImpactSearcher(
45
- f'indexes/{index}', f'{encoder}', encoder_type='onnx')
46
 
47
  if search_query or button_clicked:
48
  num_results = None
@@ -55,10 +73,13 @@ if search_query or button_clicked:
55
  for i, result in enumerate(search_results[:10]):
56
  result_score = result.score
57
  result_id = result.docid
58
- output = f'<div class="row"> <div class="column"> <b>Rank</b>: {i+1} </div><div class="column"><b>Document ID</b>: {result_id}</div><div class="column"><b>Score</b>:{result_score:.2f}</div></div>'
 
59
 
60
  try:
61
  st.write(output, unsafe_allow_html=True)
 
 
62
 
63
  except:
64
  pass
 
9
 
10
 
11
  encoder_index_map = {
12
+ 'uniCOIL': ('UniCoil', 'castorini/unicoil-noexp-msmarco-passage', 'index-unicoil'),
13
+ 'SPLADE++ Ensemble Distil': ('SpladePlusPlusEnsembleDistil', 'naver/splade-cocondenser-ensembledistil', 'index-splade-pp-ed'),
14
+ 'SPLADE++ Self Distil': ('SpladePlusPlusSelfDistil', 'naver/splade-cocondenser-ensembledistil', 'index-splade-pp-sd')
15
  }
16
 
17
  index = 'index-splade-pp-ed'
18
  encoder = 'SpladePlusPlusEnsembleDistil'
19
+ encoder_index = 0
20
 
21
  st.set_page_config(page_title="Pyserini with ONNX Runtime",
22
  page_icon='🌸', layout="centered")
 
25
  with colb:
26
  st.image("logo.jpeg")
27
 
28
+
29
+ colaa, colbb, colcc = st.columns([1, 8, 1])
30
+ with colbb:
31
+ runtime = st.select_slider(
32
+ 'Select a runtime type',
33
+ options=['PyTorch', 'ONNX Runtime'])
34
+ st.write('Now using: ', runtime)
35
+
36
+
37
  colaa, colbb, colcc = st.columns([1, 8, 1])
38
  with colbb:
39
  encoder = st.select_slider(
40
+ 'Select a query encoder',
41
  options=['uniCOIL', 'SPLADE++ Ensemble Distil', 'SPLADE++ Self Distil'])
42
  st.write('Now Running Encoder: ', encoder)
43
 
44
+ if runtime == 'PyTorch':
45
+ runtime = 'pytorch'
46
+ runtime_index = 1
47
+ else:
48
+ runtime = 'onnx'
49
+ runtime_index = 0
50
+
51
+ encoder, index = encoder_index_map[encoder][runtime_index], encoder_index_map[encoder][2]
52
+
53
+ searcher = LuceneImpactSearcher(
54
+ f'indexes/{index}', f'{encoder}', encoder_type=f'{runtime}')
55
 
56
  col1, col2 = st.columns([9, 1])
57
  with col1:
 
61
  st.write('#')
62
  button_clicked = st.button("πŸ”Ž")
63
 
 
 
64
 
65
  if search_query or button_clicked:
66
  num_results = None
 
73
  for i, result in enumerate(search_results[:10]):
74
  result_score = result.score
75
  result_id = result.docid
76
+ contents = json.loads(result.raw)["contents"]
77
+ output = f'<div class="row"> <b>Rank</b>: {i+1} | <b>Document ID</b>: {result_id} | <b>Score</b>:{result_score:.2f}</div>'
78
 
79
  try:
80
  st.write(output, unsafe_allow_html=True)
81
+ st.write(
82
+ f'<div class="row">{contents}</div>', unsafe_allow_html=True)
83
 
84
  except:
85
  pass
pyserini/search/lucene/__pycache__/_impact_searcher.cpython-310.pyc CHANGED
Binary files a/pyserini/search/lucene/__pycache__/_impact_searcher.cpython-310.pyc and b/pyserini/search/lucene/__pycache__/_impact_searcher.cpython-310.pyc differ
 
pyserini/search/lucene/_impact_searcher.py CHANGED
@@ -142,7 +142,7 @@ class LuceneImpactSearcher:
142
 
143
  encoded_query = self.encode(q)
144
 
145
- jquery = encoded_query
146
  if self.encoder_type == 'pytorch':
147
  for (token, weight) in encoded_query.items():
148
  if token in self.idf and self.idf[token] > self.min_idf:
 
142
 
143
  encoded_query = self.encode(q)
144
 
145
+ jquery = JHashMap()
146
  if self.encoder_type == 'pytorch':
147
  for (token, weight) in encoded_query.items():
148
  if token in self.idf and self.idf[token] > self.min_idf: