File size: 1,937 Bytes
8a06424
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e87c5c0
 
8a06424
 
e87c5c0
8a06424
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aeddad8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import gradio as gr
import pandas as pd

from model.search.hybrid import HybridSearchClient
from model.data.notion_db import fetch_sakurap_corpus


def search(search_client: HybridSearchClient):
    def _search(query: str) -> pd.DataFrame:
        results = search_client.search_top_n(query)
        result = results[0]
        result["rank"] = result["rank"] + 1
        result = result[["rank", "title", "content", "rank_sparse", "rank_dense"]]
        result.columns = ["rank", "title", "rap lyric", "rank: surface", "rank: vector"]
        return result

    return _search


if __name__ == "__main__":
    # Load dataset
    sakurap_df = fetch_sakurap_corpus("./data/sakurap_corpus.csv")
    # Initialize search client
    search_client = HybridSearchClient.from_dataframe(sakurap_df, "content")

    with gr.Blocks() as search_interface:
        gr.Markdown("""
        # πŸ’Ž Cobalt
        Demo app for hybrid search with vector and surface search using [Ruri](https://huggingface.co/cl-nagoya/ruri-large), [BM25](https://github.com/dorianbrown/rank_bm25) and [Voyager](https://spotify.github.io/voyager/).
        
        You can search ARASHI's songs with rap lyrics by Sho Sakurai.
        """)
        # Input query
        search_query = gr.Textbox(label="Sakurap Words", submit_btn=True)

        gr.Markdown("""
        ## Search Results

        """)
        # Search result
        result_table = gr.DataFrame(label="Result",
                                    column_widths=["5%", "20%", "65%", "5%", "5%"],
                                    wrap=True,
                                    datatype=["str", "str", "markdown", "str", "str"],
                                    interactive=False)

        # Event handler
        search_query.submit(fn=search(search_client), inputs=search_query, outputs=result_table)

    # App launch
    search_interface.queue()
    search_interface.launch(server_name="0.0.0.0")