madoss commited on
Commit
fe0547a
1 Parent(s): 45d6b11

Delete query_index.py

Browse files
Files changed (1) hide show
  1. query_index.py +0 -54
query_index.py DELETED
@@ -1,54 +0,0 @@
1
- import logging
2
- import gradio as gr
3
- import datasets
4
- import sentence_transformers
5
-
6
- logging.disable(logging.CRITICAL)
7
-
8
- model = sentence_transformers.SentenceTransformer(
9
- "dangvantuan/sentence-camembert-large", device="cuda"
10
- )
11
-
12
- dataset = datasets.load_dataset("json", data_files=["./data/dataset.json"], split="train")
13
- dataset.load_faiss_index("embeddings", "index.faiss")
14
-
15
- def search(query: str, k: int):
16
- query_embedding = model.encode(query)
17
- _, retrieved_examples = dataset.get_nearest_examples(
18
- "embeddings",
19
- query_embedding,
20
- k=k,
21
- )
22
-
23
- results = []
24
- for text, start, end, title, url in zip(
25
- retrieved_examples["text"],
26
- retrieved_examples["start"],
27
- retrieved_examples["end"],
28
- retrieved_examples["title"],
29
- retrieved_examples["url"],
30
- ):
31
- start = start
32
- end = end
33
- result = {
34
- "title": title,
35
- "transcript": f"[{str(start)+' ====> '+str(end)}] {text}",
36
- "link": url
37
- }
38
- results.append(result)
39
- return results
40
-
41
- iface = gr.Interface(
42
- fn=search,
43
- inputs=["text", "number"],
44
- outputs=gr.outputs.JSON(),
45
- title="Search Dataset",
46
- description="Search a dataset using Camembert and Faiss.",
47
- example=[
48
- "Enter a query to search for.",
49
- 5
50
- ]
51
- )
52
-
53
- if __name__ == "__main__":
54
- iface.launch()