Spaces:
Sleeping
Sleeping
File size: 2,179 Bytes
c532148 702c8d6 c532148 702c8d6 4e8a334 702c8d6 c532148 46d9ba6 c532148 4bfcca9 c532148 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import gradio as gr
import duckdb
from annoy import AnnoyIndex
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("sentence-transformers/LaBSE")
annoy_index = AnnoyIndex(768, "angular")
annoy_index.load("definitions.ann")
conn = duckdb.connect("sonajaht.db")
def search_query(query, top_k=10):
query_vector = model.encode(query)
similar_item_ids, distances = annoy_index.get_nns_by_vector(
query_vector, top_k, include_distances=True
)
id_list = ", ".join(map(str, similar_item_ids))
sql_query = f"""
SELECT w.value AS sõna, d.value AS definitsioon
FROM definitions d
JOIN words w ON d.word_id = w.word_id
WHERE d.entry_id IN ({id_list})
ORDER BY CASE d.entry_id
{' '.join([f'WHEN {_id} THEN {i}' for i, _id in enumerate(similar_item_ids)])}
END
"""
results = conn.execute(sql_query).fetchdf()
results["#"] = list(range(1, len(results) + 1))
new_order = ["#", "sõna", "definitsioon"]
results = results[new_order]
# results["relevance_score"] = [1 - d for d in distances]
return results
examples = [
"väga vana mees",
"очень старый дед",
"un très vieil homme",
"a clear material that you can see through used to make windows",
"to have a rule that you need a specific object or thing in some situation",
"something that makes you happy or makes you laugh",
"when an event happens or takes place",
"часть стерео системы, из которой исходит музыка",
"кто-то, кто использует что-то",
]
def handle_example(example):
return example, search_query(example)
with gr.Blocks() as demo:
gr.Markdown("# Sõnajaht Demo")
query_input = gr.Textbox(label="Sisestage teie otsingupäring")
search_button = gr.Button("Otsi")
gr.Examples(
examples=examples,
inputs=query_input,
label="Otsi päringunäiteid"
)
results_output = gr.Dataframe(label="Otsingutulemused")
search_button.click(search_query, inputs=query_input, outputs=results_output)
if __name__ == "__main__":
demo.launch()
|