Spaces:
Sleeping
Sleeping
File size: 1,937 Bytes
8a06424 e87c5c0 8a06424 e87c5c0 8a06424 aeddad8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import gradio as gr
import pandas as pd
from model.search.hybrid import HybridSearchClient
from model.data.notion_db import fetch_sakurap_corpus
def search(search_client: HybridSearchClient):
def _search(query: str) -> pd.DataFrame:
results = search_client.search_top_n(query)
result = results[0]
result["rank"] = result["rank"] + 1
result = result[["rank", "title", "content", "rank_sparse", "rank_dense"]]
result.columns = ["rank", "title", "rap lyric", "rank: surface", "rank: vector"]
return result
return _search
if __name__ == "__main__":
# Load dataset
sakurap_df = fetch_sakurap_corpus("./data/sakurap_corpus.csv")
# Initialize search client
search_client = HybridSearchClient.from_dataframe(sakurap_df, "content")
with gr.Blocks() as search_interface:
gr.Markdown("""
# π Cobalt
Demo app for hybrid search with vector and surface search using [Ruri](https://huggingface.co/cl-nagoya/ruri-large), [BM25](https://github.com/dorianbrown/rank_bm25) and [Voyager](https://spotify.github.io/voyager/).
You can search ARASHI's songs with rap lyrics by Sho Sakurai.
""")
# Input query
search_query = gr.Textbox(label="Sakurap Words", submit_btn=True)
gr.Markdown("""
## Search Results
""")
# Search result
result_table = gr.DataFrame(label="Result",
column_widths=["5%", "20%", "65%", "5%", "5%"],
wrap=True,
datatype=["str", "str", "markdown", "str", "str"],
interactive=False)
# Event handler
search_query.submit(fn=search(search_client), inputs=search_query, outputs=result_table)
# App launch
search_interface.queue()
search_interface.launch(server_name="0.0.0.0")
|