import argparse import logging import sentence_transformers import datasets import gradio as gr logging.disable(logging.CRITICAL) model = sentence_transformers.SentenceTransformer( "dangvantuan/sentence-camembert-large", device="cpu") dataset = datasets.load_dataset("json", data_files=["./dataset.json"], split="train") dataset.load_faiss_index("embeddings", "index.faiss") def search(query, k): query_embedding = model.encode(query) _, retrieved_examples = dataset.get_nearest_examples( "embeddings", query_embedding, k=int(k), ) results = [] for text, start, end, title, url in zip( retrieved_examples["text"], retrieved_examples["start"], retrieved_examples["end"], retrieved_examples["title"], retrieved_examples["url"], ): start = start end = end result = { "title": title, "transcript": f"[{str(start)} ====> {str(end)}] {text}", "link": url, } results.append(result) # Format results as a single string for single textbox output results_text = ("
").join([f'Title: {r["title"]}
Transcript: {r["transcript"]}
Link: {r["link"]}' for r in results]) #results_text = ("\n" + "*" * 15 + "\n").join([f'Title: {r["title"]}\nTranscript: {r["transcript"]}\nLink: {r["link"]}' for r in results]) return results_text iface = gr.Interface( search, inputs=[ gr.inputs.Textbox(label="Query", default="Qu'est-ce qui t'a le plus fait progresser ?"), # Adding a default example gr.inputs.Number(label="K", default=3), ], outputs=gr.outputs.HTML(label="Result"), # Using single textbox for output title="Camembert and Faiss-powered Search Engine", description="Search through a dataset using Camembert and Faiss", theme="light", layout="vertical", ) iface.launch()