mtoan65 commited on
Commit
a5ba329
1 Parent(s): 800776f

init application

Browse files
Files changed (3) hide show
  1. app.py +48 -0
  2. embeddings.pkl +3 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer, util
2
+ import os
3
+ from tqdm import tqdm
4
+ import pandas as pd
5
+ import json
6
+ import pickle
7
+ import torch
8
+ import gradio as gr
9
+
10
+
11
+ with open('new_transcript.json', 'r', encoding='utf-8') as openfile:
12
+
13
+ # Reading from json file
14
+ json_object1 = json.load(openfile)
15
+
16
+ json_object1[0]
17
+
18
+ model = SentenceTransformer('keepitreal/vietnamese-sbert', device='cpu')
19
+
20
+ #Load sentences & embeddings from disc
21
+ with open('embeddings.pkl', "rb") as fIn:
22
+ stored_data = pickle.load(fIn)
23
+ stored_sentences = stored_data['sentences']
24
+ stored_embeddings = stored_data['embeddings']
25
+
26
+ emb = torch.from_numpy(stored_embeddings)
27
+
28
+
29
+ def semantic_search(query, top_k=20):
30
+ query_embedding = model.encode(query, convert_to_tensor=True)
31
+
32
+ # We use cosine-similarity and torch.topk to find the highest 5 scores
33
+ cos_scores = util.cos_sim(query_embedding, emb)[0]
34
+ top_results = torch.topk(cos_scores, k=top_k)
35
+
36
+ str_results = ""
37
+ for score, idx in zip(top_results[0], top_results[1]):
38
+ str_results += str(json_object1[idx]) + " - (Score: {:.4f})".format(score) + "\n"
39
+
40
+ return str_results
41
+
42
+
43
+ demo = gr.Interface(
44
+ fn=semantic_search,
45
+ inputs=gr.Textbox(lines=2, placeholder="Input text query..."),
46
+ outputs="text",
47
+ )
48
+ demo.launch(share=True)
embeddings.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8f424f4016bb2d019a4e2bc611cea027eb722212b18f7350614adfbaf89c687
3
+ size 143095812
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ sentence-transformers==2.2.2
2
+ torch==2.0.1+cu118
3
+ tqdm==4.66.1
4
+ gradio==3.46.1
5
+ pandas==1.5.3