Spaces:
Runtime error
Runtime error
init application
Browse files- app.py +48 -0
- embeddings.pkl +3 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer, util
|
2 |
+
import os
|
3 |
+
from tqdm import tqdm
|
4 |
+
import pandas as pd
|
5 |
+
import json
|
6 |
+
import pickle
|
7 |
+
import torch
|
8 |
+
import gradio as gr
|
9 |
+
|
10 |
+
|
11 |
+
with open('new_transcript.json', 'r', encoding='utf-8') as openfile:
|
12 |
+
|
13 |
+
# Reading from json file
|
14 |
+
json_object1 = json.load(openfile)
|
15 |
+
|
16 |
+
json_object1[0]
|
17 |
+
|
18 |
+
model = SentenceTransformer('keepitreal/vietnamese-sbert', device='cpu')
|
19 |
+
|
20 |
+
#Load sentences & embeddings from disc
|
21 |
+
with open('embeddings.pkl', "rb") as fIn:
|
22 |
+
stored_data = pickle.load(fIn)
|
23 |
+
stored_sentences = stored_data['sentences']
|
24 |
+
stored_embeddings = stored_data['embeddings']
|
25 |
+
|
26 |
+
emb = torch.from_numpy(stored_embeddings)
|
27 |
+
|
28 |
+
|
29 |
+
def semantic_search(query, top_k=20):
|
30 |
+
query_embedding = model.encode(query, convert_to_tensor=True)
|
31 |
+
|
32 |
+
# We use cosine-similarity and torch.topk to find the highest 5 scores
|
33 |
+
cos_scores = util.cos_sim(query_embedding, emb)[0]
|
34 |
+
top_results = torch.topk(cos_scores, k=top_k)
|
35 |
+
|
36 |
+
str_results = ""
|
37 |
+
for score, idx in zip(top_results[0], top_results[1]):
|
38 |
+
str_results += str(json_object1[idx]) + " - (Score: {:.4f})".format(score) + "\n"
|
39 |
+
|
40 |
+
return str_results
|
41 |
+
|
42 |
+
|
43 |
+
demo = gr.Interface(
|
44 |
+
fn=semantic_search,
|
45 |
+
inputs=gr.Textbox(lines=2, placeholder="Input text query..."),
|
46 |
+
outputs="text",
|
47 |
+
)
|
48 |
+
demo.launch(share=True)
|
embeddings.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8f424f4016bb2d019a4e2bc611cea027eb722212b18f7350614adfbaf89c687
|
3 |
+
size 143095812
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
sentence-transformers==2.2.2
|
2 |
+
torch==2.0.1+cu118
|
3 |
+
tqdm==4.66.1
|
4 |
+
gradio==3.46.1
|
5 |
+
pandas==1.5.3
|