File size: 1,918 Bytes
d82542d
45d6b11
0d70765
2014880
d82542d
2014880
45d6b11
2014880
0d70765
 
 
 
45d6b11
39e199d
45d6b11
2014880
9a26bab
45d6b11
2014880
 
 
d69bc63
45d6b11
 
2014880
 
 
 
 
 
 
 
 
45d6b11
 
d82542d
 
45d6b11
 
cb97f50
 
410ed66
73129f8
cb97f50
5e0ab83
2014880
 
9a26bab
d82542d
73129f8
804f1ab
d82542d
73129f8
d82542d
 
9a26bab
d82542d
2014880
 
d82542d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import argparse
import logging
import sentence_transformers
import datasets
import gradio as gr

logging.disable(logging.CRITICAL)


model = sentence_transformers.SentenceTransformer(
    "dangvantuan/sentence-camembert-large", device="cpu")


dataset = datasets.load_dataset("json", data_files=["./dataset.json"], split="train")
dataset.load_faiss_index("embeddings", "index.faiss")

def search(query, k):
    query_embedding = model.encode(query)
    _, retrieved_examples = dataset.get_nearest_examples(
        "embeddings",
        query_embedding,
        k=int(k),
    )
    results = []
    for text, start, end, title, url in zip(
        retrieved_examples["text"],
        retrieved_examples["start"],
        retrieved_examples["end"],
        retrieved_examples["title"],
        retrieved_examples["url"],
    ):
        start = start
        end = end
        result = {
            "title": title,
            "transcript": f"[{str(start)} ====> {str(end)}] {text}",
            "link": url,
        }
        results.append(result)
    
    # Format results as a single string for single textbox output
    results_text = ("<hr/>").join([f'Title: {r["title"]}<br/>Transcript: {r["transcript"]}<br/>Link: {r["link"]}' for r in results])
    #results_text = ("\n" + "*" * 15 + "\n").join([f'Title: {r["title"]}\nTranscript: {r["transcript"]}\nLink: {r["link"]}' for r in results])
    
    return results_text

iface = gr.Interface(
    search,
    inputs=[
        gr.inputs.Textbox(label="Query", default="Qu'est-ce qui t'a le plus fait progresser ?"),  # Adding a default example
        gr.inputs.Number(label="K", default=3),
    ],
    outputs=gr.outputs.HTML(label="Result"),  # Using single textbox for output
    title="Camembert and Faiss-powered Search Engine",
    description="Search through a dataset using Camembert and Faiss",
    theme="light",
    layout="vertical",
)

iface.launch()