from sentence_transformers import SentenceTransformer, util import os from tqdm import tqdm import pandas as pd import json import pickle import torch import gradio as gr with open('new_transcript.json', 'r', encoding='utf-8') as openfile: # Reading from json file json_object1 = json.load(openfile) json_object1[0] model = SentenceTransformer('keepitreal/vietnamese-sbert', device='cpu') #Load sentences & embeddings from disc with open('embeddings.pkl', "rb") as fIn: stored_data = pickle.load(fIn) stored_sentences = stored_data['sentences'] stored_embeddings = stored_data['embeddings'] emb = torch.from_numpy(stored_embeddings) def semantic_search(query, top_k=20): query_embedding = model.encode(query, convert_to_tensor=True) # We use cosine-similarity and torch.topk to find the highest 5 scores cos_scores = util.cos_sim(query_embedding, emb)[0] top_results = torch.topk(cos_scores, k=top_k) str_results = "" for score, idx in zip(top_results[0], top_results[1]): str_results += str(json_object1[idx]) + " - (Score: {:.4f})".format(score) + "\n" return str_results demo = gr.Interface( fn=semantic_search, inputs=gr.Textbox(lines=2, placeholder="Input text query..."), outputs="text", ) demo.launch(share=True)