File size: 4,276 Bytes
3fb88a6
 
8e44dd8
3fb88a6
8e44dd8
 
 
 
 
 
 
3fb88a6
 
 
 
8e44dd8
3fb88a6
 
 
8e44dd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fb88a6
 
 
 
 
 
8e44dd8
3fb88a6
 
 
 
 
 
 
 
 
 
 
 
8e44dd8
 
3fb88a6
8e44dd8
 
 
 
 
3fb88a6
 
 
8e44dd8
3fb88a6
 
 
 
 
8e44dd8
 
3fb88a6
8e44dd8
 
 
 
3fb88a6
8e44dd8
 
 
 
 
 
3fb88a6
8e44dd8
 
 
 
 
3fb88a6
8e44dd8
 
 
 
 
 
3fb88a6
8e44dd8
 
 
 
 
 
 
 
 
 
 
 
3fb88a6
 
 
 
 
8e44dd8
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import pandas as pd
import pickle
import random
from sentence_transformers import SentenceTransformer
from utils import (
    encode,
    cosine_sim,
    top_candidates,
    candidates_reranking,
    intent_classification,
)
from collections import deque
from transformers import pipeline
import torch
from transformers import AutoTokenizer
from dialog_tag import DialogTag

# this class representes main functions of retrieve bot

low_scoring_list = [
    "What does it mean?",
    "You have two strikes. Three strikes and you’ re out. It’ s a sports metaphor. Explain again!",
    "Again, urban slang. In which, I believe I’ m gaining remarkable fluency. So, could you repeat?",
    "I’m confused.",
    "I can’t comment without violating our agreement that I don’ t criticize you.",
    "Oh!",
    "I need to use the restroom.",
    "Move. Move. Move!",
    "I was going to mention it at the time, but then I thought, some day maybe...",
    "Well...",
    "Apparently... I have no idea!?",
    "I’m not sure...",
    "Nothing. I say nothing.",
    "Well, my friend. Focus and repeat!",
]


class ChatBot:
    def __init__(self):
        self.vect_data = []
        self.scripts = []
        self.conversation_history = deque([], maxlen=5)
        self.tag_model = None
        self.ranking_model = None
        self.reranking_model = None
        self.device = None
        self.tokenizer = None

    def load(self):
        """ "This method is called first to load all datasets and
        model used by the chat bot; all the data to be saved in
        tha data folder, models to be loaded from hugging face"""

        with open("data/scripts_vectors.pkl", "rb") as fp:
            self.vect_data = pickle.load(fp)
        self.scripts = pd.read_pickle("data/scripts.pkl")
        self.tag_model = DialogTag("distilbert-base-uncased")
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.ranking_model = SentenceTransformer(
         "Shakhovak/chatbot_sentence-transformer"
         )  # # sentence-transformers/LaBSE  or sentence-transformers/all-mpnet-base-v2 or Shakhovak/chatbot_sentence-transformer

        self.tokenizer_reranking = AutoTokenizer.from_pretrained("bert-base-uncased")
        self.reranking_model = pipeline(
            model="Shakhovak/RerankerModel_chat_bot",
            device=self.device,
            tokenizer=self.tokenizer_reranking,
        )

    def generate_response(self, utterance: str) -> str:
        """this functions identifies potential
        candidates for answer and ranks them"""

        intent = intent_classification(utterance, utterance, self.tag_model)
        query_encoding = encode(
            texts=utterance,
            intent=intent,
            model=self.ranking_model,
            contexts=self.conversation_history,
        )
        bot_cosine_scores = cosine_sim(
            self.vect_data,
            query_encoding,
        )
        top_scores, top_indexes = top_candidates(
            bot_cosine_scores, intent=intent, initial_data=self.scripts, top=10
        )
        if top_scores[0] < 0.9:
            answer = random.choice(low_scoring_list)
            self.conversation_history.clear()
        else:
            # test candidates and collects them with label 0 to dictionary

            reranked_dict = candidates_reranking(
                top_indexes,
                self.conversation_history,
                utterance,
                self.scripts,
                self.reranking_model,
            )
            # if any candidates were selected, range them and pick up the top
            # else keep up the initial top 1

            if len(reranked_dict) >= 1:
                updated_top_candidates = dict(
                    sorted(reranked_dict.items(), key=lambda item: item[1])
                )
                answer = self.scripts.iloc[list(updated_top_candidates.keys())[0]][
                    "answer"
                ]
            else:
                answer = self.scripts.iloc[top_indexes[0]]["answer"]

        self.conversation_history.append(utterance)
        self.conversation_history.append(answer)

        return answer


# katya = ChatBot()
# katya.load()
# katya.generate_response("hi man!")