Spaces:
Sleeping
Sleeping
import gradio as gr | |
import duckdb | |
from annoy import AnnoyIndex | |
from sentence_transformers import SentenceTransformer | |
model = SentenceTransformer("sentence-transformers/LaBSE") | |
annoy_index = AnnoyIndex(768, "angular") | |
annoy_index.load("definitions.ann") | |
conn = duckdb.connect("sonajaht.db") | |
def search_query(query, top_k=10): | |
query_vector = model.encode(query) | |
similar_item_ids, distances = annoy_index.get_nns_by_vector( | |
query_vector, top_k, include_distances=True | |
) | |
id_list = ", ".join(map(str, similar_item_ids)) | |
sql_query = f""" | |
SELECT w.value AS sõna, d.value AS definitsioon | |
FROM definitions d | |
JOIN words w ON d.word_id = w.word_id | |
WHERE d.entry_id IN ({id_list}) | |
ORDER BY CASE d.entry_id | |
{' '.join([f'WHEN {_id} THEN {i}' for i, _id in enumerate(similar_item_ids)])} | |
END | |
""" | |
results = conn.execute(sql_query).fetchdf() | |
results["#"] = list(range(1, len(results) + 1)) | |
new_order = ["#", "sõna", "definitsioon"] | |
results = results[new_order] | |
# results["relevance_score"] = [1 - d for d in distances] | |
return results | |
examples = [ | |
"väga vana mees", | |
"очень старый дед", | |
"un très vieil homme", | |
"a clear material that you can see through used to make windows", | |
"to have a rule that you need a specific object or thing in some situation", | |
"something that makes you happy or makes you laugh", | |
"when an event happens or takes place", | |
"часть стерео системы, из которой исходит музыка", | |
"кто-то, кто использует что-то", | |
] | |
def handle_example(example): | |
return example, search_query(example) | |
with gr.Blocks() as demo: | |
gr.Markdown("# Sõnajaht Demo") | |
query_input = gr.Textbox(label="Sisestage teie otsingupäring") | |
search_button = gr.Button("Otsi") | |
gr.Examples( | |
examples=examples, | |
inputs=query_input, | |
label="Otsi päringunäiteid" | |
) | |
results_output = gr.Dataframe(label="Otsingutulemused") | |
search_button.click(search_query, inputs=query_input, outputs=results_output) | |
if __name__ == "__main__": | |
demo.launch() | |