Spaces:

castorini
/

ONNX-Demo

Build error

App Files Files Community

ArthurChen189 commited on Aug 5, 2023

Commit

4c54fb1

•

1 Parent(s): 23cefb2

update app

Browse files

Files changed (3) hide show

app.py +7 -3
pyserini/search/lucene/__pycache__/_impact_searcher.cpython-310.pyc +0 -0
pyserini/search/lucene/_impact_searcher.py +2 -1

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ sys.path.append(str(path_root))
 encoder_index_map = {
     'uniCOIL': ('UniCoil', 'castorini/unicoil-noexp-msmarco-passage', 'index-unicoil'),
     'SPLADE++ Ensemble Distil': ('SpladePlusPlusEnsembleDistil', 'naver/splade-cocondenser-ensembledistil', 'index-splade-pp-ed'),
-    'SPLADE++ Self Distil': ('SpladePlusPlusSelfDistil', 'naver/splade-cocondenser-ensembledistil', 'index-splade-pp-sd')
 }
 index = 'index-splade-pp-ed'
@@ -65,7 +65,6 @@ with col2:
 if search_query or button_clicked:
     num_results = None
     t_0 = time.time()
-    print("search query is:\t", search_query)
     search_results = searcher.search(search_query, k=10)
     search_time = time.time() - t_0
     st.write(
@@ -73,7 +72,12 @@ if search_query or button_clicked:
     for i, result in enumerate(search_results[:10]):
         result_score = result.score
         result_id = result.docid
-        contents = json.loads(result.raw)["contents"]
         output = f'<div class="row"> <b>Rank</b>: {i+1} | <b>Document ID</b>: {result_id} | <b>Score</b>:{result_score:.2f}</div>'
         try:

 encoder_index_map = {
     'uniCOIL': ('UniCoil', 'castorini/unicoil-noexp-msmarco-passage', 'index-unicoil'),
     'SPLADE++ Ensemble Distil': ('SpladePlusPlusEnsembleDistil', 'naver/splade-cocondenser-ensembledistil', 'index-splade-pp-ed'),
+    'SPLADE++ Self Distil': ('SpladePlusPlusSelfDistil', 'naver/splade-cocondenser-selfdistil', 'index-splade-pp-sd')
 }
 index = 'index-splade-pp-ed'
 if search_query or button_clicked:
     num_results = None
     t_0 = time.time()
     search_results = searcher.search(search_query, k=10)
     search_time = time.time() - t_0
     st.write(
     for i, result in enumerate(search_results[:10]):
         result_score = result.score
         result_id = result.docid
+        if result.raw:
+            contents = json.loads(result.raw)
+            contents = contents['contents'] if 'contents' in contents else contents['content']
+        else:
+            contents = ''
         output = f'<div class="row"> <b>Rank</b>: {i+1} | <b>Document ID</b>: {result_id} | <b>Score</b>:{result_score:.2f}</div>'
         try:

pyserini/search/lucene/__pycache__/_impact_searcher.cpython-310.pyc CHANGED Viewed

Binary files a/pyserini/search/lucene/__pycache__/_impact_searcher.cpython-310.pyc and b/pyserini/search/lucene/__pycache__/_impact_searcher.cpython-310.pyc differ

pyserini/search/lucene/_impact_searcher.py CHANGED Viewed

@@ -142,8 +142,9 @@ class LuceneImpactSearcher:
         encoded_query = self.encode(q)
-        jquery = JHashMap()
         if self.encoder_type == 'pytorch':
             for (token, weight) in encoded_query.items():
                 if token in self.idf and self.idf[token] > self.min_idf:
                     jquery.put(token, JFloat(weight))

         encoded_query = self.encode(q)
+        jquery = encoded_query
         if self.encoder_type == 'pytorch':
+            jquery = JHashMap()
             for (token, weight) in encoded_query.items():
                 if token in self.idf and self.idf[token] > self.min_idf:
                     jquery.put(token, JFloat(weight))