File size: 1,134 Bytes
11383a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import gradio as gr
from typing import TypedDict, List
from nlp4web_codebase.ir.data_loaders.sciq import load_sciq
sciq = load_sciq()
sciq.corpus
class Hit(TypedDict):
cid: str
score: float
text: str
return_type = List[Hit]
## YOUR_CODE_STARTS_HERE
def search(query: str) -> List[Hit]:
bm25_index = BM25Index.build_from_documents(
documents=iter(sciq.corpus),
ndocs=12160,
show_progress_bar=True
)
bm25_index.save("output/bm25_index")
bm25_retriever = BM25Retriever(index_dir="output/bm25_index")
ranking = bm25_retriever.retrieve(query=query)
hits = []
for cid, score in ranking.items():
doc = next((doc for doc in sciq.corpus if doc.collection_id == cid), None)
if doc:
hits.append({"cid": cid, "score": score, "text": doc.text})
return hits
demo = gr.Interface(
fn=search,
inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
outputs=gr.JSON(label="Search Results"),
title="SciQ Search Engine",
description="Enter a query to search the SciQ dataset using BM25.",
)
## YOUR_CODE_ENDS_HERE
demo.launch() |