File size: 1,134 Bytes
11383a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import gradio as gr
from typing import TypedDict, List
from nlp4web_codebase.ir.data_loaders.sciq import load_sciq

sciq = load_sciq()
sciq.corpus

class Hit(TypedDict):
  cid: str
  score: float
  text: str

return_type = List[Hit]

## YOUR_CODE_STARTS_HERE
def search(query: str) -> List[Hit]:
    bm25_index = BM25Index.build_from_documents(
        documents=iter(sciq.corpus),
        ndocs=12160,
        show_progress_bar=True
    )
    bm25_index.save("output/bm25_index")
    bm25_retriever = BM25Retriever(index_dir="output/bm25_index")
    ranking = bm25_retriever.retrieve(query=query)
    hits = []
    for cid, score in ranking.items():
        doc = next((doc for doc in sciq.corpus if doc.collection_id == cid), None)
        if doc:
            hits.append({"cid": cid, "score": score, "text": doc.text})
    return hits

demo = gr.Interface(
    fn=search,
    inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
    outputs=gr.JSON(label="Search Results"),
    title="SciQ Search Engine",
    description="Enter a query to search the SciQ dataset using BM25.",
)
## YOUR_CODE_ENDS_HERE
demo.launch()