shakhovak
commited on
Commit
•
decf09e
1
Parent(s):
bddcd9a
updates
Browse files- data/low_score_sripts.json +37 -0
- retrieve_bot.py +12 -22
data/low_score_sripts.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"generic":[
|
2 |
+
"What does it mean?",
|
3 |
+
"You have two strikes. Three strikes and you' re out. It' s a sports metaphor. Explain again!",
|
4 |
+
"Again, urban slang. In which, I believe I' m gaining remarkable fluency. So, could you repeat?",
|
5 |
+
"I'm confused.",
|
6 |
+
"I can't comment without violating our agreement that I don' t criticize you.",
|
7 |
+
"Oh!",
|
8 |
+
"I need to use the restroom.",
|
9 |
+
"Move. Move. Move!",
|
10 |
+
"I was going to mention it at the time, but then I thought, some day maybe...",
|
11 |
+
"Well...",
|
12 |
+
"Apparently... I have no idea!?",
|
13 |
+
"I'm not sure...",
|
14 |
+
"Nothing. I say nothing.",
|
15 |
+
"Well, my friend. Focus and repeat!",
|
16 |
+
"I don't follow.",
|
17 |
+
"Thank you. Can we just talk about something else",
|
18 |
+
"Aw…",
|
19 |
+
"I have insufficient data to proceed. Excuse me?"
|
20 |
+
|
21 |
+
],
|
22 |
+
"greetings": [
|
23 |
+
"Hello.",
|
24 |
+
"Hello to you, insufficiently intelligent person.",
|
25 |
+
"Hello, my friend.",
|
26 |
+
"Hi,my friend. I’ m sorry I’ m late, but your companion left the most indecipherable invitation.",
|
27 |
+
"Hi,my friend. It’ s me, Sheldon. In the living room. I just, I wanted you to know I saw the tie. Message received. You’ re welcome. You carry on.",
|
28 |
+
"Hi, uh,my friend, this circular is addressed to occupant, but with our apartment switch, it’ s unclear whether it’ s yours or mine.",
|
29 |
+
"Hi",
|
30 |
+
"Hi. Hello. Oh, and a special hello to my friend, who needs to be mentioned by name.",
|
31 |
+
"Hello. So I guess you’ re really holding up the other four fingers?",
|
32 |
+
"Hi. Um, I’ ve reconsidered. Uh, you can’ t work where I work. Enjoy the rest of your evening."
|
33 |
+
|
34 |
+
]
|
35 |
+
|
36 |
+
|
37 |
+
}
|
retrieve_bot.py
CHANGED
@@ -12,29 +12,12 @@ from utils import (
|
|
12 |
from collections import deque
|
13 |
from transformers import pipeline
|
14 |
import torch
|
|
|
15 |
from transformers import AutoTokenizer
|
16 |
from dialog_tag import DialogTag
|
17 |
|
18 |
# this class representes main functions of retrieve bot
|
19 |
|
20 |
-
low_scoring_list = [
|
21 |
-
"What does it mean?",
|
22 |
-
"You have two strikes. Three strikes and you’ re out. It’ s a sports metaphor. Explain again!",
|
23 |
-
"Again, urban slang. In which, I believe I’ m gaining remarkable fluency. So, could you repeat?",
|
24 |
-
"I’m confused.",
|
25 |
-
"I can’t comment without violating our agreement that I don’ t criticize you.",
|
26 |
-
"Oh!",
|
27 |
-
"I need to use the restroom.",
|
28 |
-
"Move. Move. Move!",
|
29 |
-
"I was going to mention it at the time, but then I thought, some day maybe...",
|
30 |
-
"Well...",
|
31 |
-
"Apparently... I have no idea!?",
|
32 |
-
"I’m not sure...",
|
33 |
-
"Nothing. I say nothing.",
|
34 |
-
"Well, my friend. Focus and repeat!",
|
35 |
-
]
|
36 |
-
|
37 |
-
|
38 |
class ChatBot:
|
39 |
def __init__(self):
|
40 |
self.vect_data = []
|
@@ -45,6 +28,7 @@ class ChatBot:
|
|
45 |
self.reranking_model = None
|
46 |
self.device = None
|
47 |
self.tokenizer = None
|
|
|
48 |
|
49 |
def load(self):
|
50 |
""" "This method is called first to load all datasets and
|
@@ -54,6 +38,8 @@ class ChatBot:
|
|
54 |
with open("data/scripts_vectors.pkl", "rb") as fp:
|
55 |
self.vect_data = pickle.load(fp)
|
56 |
self.scripts = pd.read_pickle("data/scripts.pkl")
|
|
|
|
|
57 |
self.tag_model = DialogTag("distilbert-base-uncased")
|
58 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
59 |
self.ranking_model = SentenceTransformer(
|
@@ -83,11 +69,15 @@ class ChatBot:
|
|
83 |
query_encoding,
|
84 |
)
|
85 |
top_scores, top_indexes = top_candidates(
|
86 |
-
bot_cosine_scores, intent=intent, initial_data=self.scripts, top=
|
87 |
)
|
88 |
-
if top_scores[0] < 0.
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
91 |
else:
|
92 |
# test candidates and collects them with label 0 to dictionary
|
93 |
|
|
|
12 |
from collections import deque
|
13 |
from transformers import pipeline
|
14 |
import torch
|
15 |
+
import json
|
16 |
from transformers import AutoTokenizer
|
17 |
from dialog_tag import DialogTag
|
18 |
|
19 |
# this class representes main functions of retrieve bot
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
class ChatBot:
|
22 |
def __init__(self):
|
23 |
self.vect_data = []
|
|
|
28 |
self.reranking_model = None
|
29 |
self.device = None
|
30 |
self.tokenizer = None
|
31 |
+
self.low_scoring_list = None
|
32 |
|
33 |
def load(self):
|
34 |
""" "This method is called first to load all datasets and
|
|
|
38 |
with open("data/scripts_vectors.pkl", "rb") as fp:
|
39 |
self.vect_data = pickle.load(fp)
|
40 |
self.scripts = pd.read_pickle("data/scripts.pkl")
|
41 |
+
with open('low_score_sripts.json', 'r') as f:
|
42 |
+
self.low_scoring_list = json.load(f)
|
43 |
self.tag_model = DialogTag("distilbert-base-uncased")
|
44 |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
45 |
self.ranking_model = SentenceTransformer(
|
|
|
69 |
query_encoding,
|
70 |
)
|
71 |
top_scores, top_indexes = top_candidates(
|
72 |
+
bot_cosine_scores, intent=intent, initial_data=self.scripts, top=5
|
73 |
)
|
74 |
+
if top_scores[0] < 0.9:
|
75 |
+
if intent == "greetings":
|
76 |
+
answer = random.choice(self.low_scoring_list['greetings'])
|
77 |
+
self.conversation_history.clear()
|
78 |
+
else:
|
79 |
+
answer = random.choice(self.low_scoring_list['generic'])
|
80 |
+
self.conversation_history.clear()
|
81 |
else:
|
82 |
# test candidates and collects them with label 0 to dictionary
|
83 |
|