File size: 7,021 Bytes
2dcc710
fca97ef
 
 
 
2dcc710
 
 
fca97ef
 
 
2dcc710
 
fca97ef
 
2dcc710
 
 
 
 
fca97ef
 
 
 
 
 
 
 
 
2dcc710
 
 
 
 
 
 
 
fca97ef
 
 
 
 
 
 
aaec9ae
fca97ef
aaec9ae
fca97ef
aaec9ae
fca97ef
 
 
 
 
 
aaec9ae
fca97ef
 
 
 
 
 
 
 
 
 
2dcc710
 
 
 
 
 
 
053ffc5
fca97ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f938f7
fca97ef
 
0f938f7
fca97ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f938f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import logging
import json
from contextlib import asynccontextmanager
from typing import Any, List, Tuple
import random

from fastapi import FastAPI
from pydantic import BaseModel
from FlagEmbedding import BGEM3FlagModel, FlagReranker
from starlette.requests import Request
import torch


random.seed(42)

logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


def get_data(model):
    with open("data/paris-2024-faq.json") as f:
        data = json.load(f)
    data = [it for it in data if it['lang'] == 'en']
    questions = [it['label'] for it in data]
    q_embeddings = model[0].encode(questions, return_dense=False, return_sparse=False, return_colbert_vecs=True)
    return q_embeddings['colbert_vecs'], questions, [it['body'] for it in data]


class InputLoad(BaseModel):
    question: str


class ResponseLoad(BaseModel):
    answer: str


class ML(BaseModel):
    retriever: Any
    ranker: Any
    data: Tuple[List[Any], List[str], List[str]]


def load_models(app: FastAPI) -> FastAPI:
    logger.info("Loading embedding model...")
    retriever=BGEM3FlagModel('BAAI/bge-m3',  use_fp16=True) ,
    logger.info("Loading ranker model...")
    ranker=FlagReranker('BAAI/bge-reranker-v2-m3', use_fp16=True)
    logger.info("Done loading models!")
    ml = ML(
        retriever=retriever,
        ranker=ranker,
        data=get_data(retriever)
    )
    app.ml = ml
    logger.info("Done with startup steps!")
    return app


@asynccontextmanager
async def lifespan(app: FastAPI):
    app = load_models(app=app)
    yield


app = FastAPI(lifespan=lifespan)


@app.get("/health")
def health_check():
    return {"server": "running"}


@app.post("/answer/")
async def receive(input_load: InputLoad, request: Request) -> ResponseLoad:
    ml: ML = request.app.ml
    candidate_indices, candidate_scores = get_candidates(input_load.question, ml)
    answer_candidate, rank_score, retriever_score = rerank_candidates(input_load.question, candidate_indices, candidate_scores, ml)
    answer = get_final_answer(answer_candidate, retriever_score)
    return ResponseLoad(answer=answer)


def get_candidates(question, ml, topk=5):
    question_emb = ml.retriever[0].encode([question], return_dense=False, return_sparse=False, return_colbert_vecs=True)
    question_emb = question_emb['colbert_vecs'][0]
    scores = [ml.retriever[0].colbert_score(question_emb, faq_emb) for faq_emb in ml.data[0]]
    scores_tensor = torch.stack(scores)
    top_values, top_indices = torch.topk(scores_tensor, topk)
    return top_indices.tolist(), top_values.tolist()


def rerank_candidates(question, indices, values, ml):
    candidate_answers = [ml.data[2][_ind] for _ind in indices]
    scores = ml.ranker.compute_score([[question, it] for it in candidate_answers])
    rank_score = max(scores)
    rank_ind = scores.index(rank_score)
    retriever_score = values[rank_ind]
    return candidate_answers[rank_ind], rank_score, retriever_score


def get_final_answer(answer, retriever_score):
    logger.info(f"Retriever score: {retriever_score}")
    if retriever_score < 0.65:
        # nothing relevant found!
        return random.sample(NOT_FOUND_ANSWERS, k=1)[0]
    elif retriever_score < 0.8:
        # might be relevant, but let's be careful
        return f"{random.sample(ROUGH_MATCH_INTROS, k=1)[0]}\n\n{answer}"
    else:
        # good match
        return f"{random.sample(GOOD_MATCH_INTROS, k=1)[0]}\n\n{answer}\n\n{random.sample(GOOD_MATCH_ENDS, k=1)[0]}"


NOT_FOUND_ANSWERS = [
    "I'm sorry, but I couldn't find any information related to your question in my knowledge base.",
    "Apologies, but I don't have the information you're looking for at the moment.",
    "I’m sorry, I couldn’t locate any relevant details in my current data.",
    "Unfortunately, I wasn't able to find an answer to your query. Can I help with something else?",
    "I'm afraid I don't have the information you need right now. Please feel free to ask another question.",
    "Sorry, I couldn't find anything that matches your question in my knowledge base.",
    "I apologize, but I wasn't able to retrieve information related to your query.",
    "I'm sorry, but it looks like I don't have an answer for that. Is there anything else I can assist with?",
    "Regrettably, I couldn't find the information you requested. Can I help you with anything else?",
    "I’m sorry, but I don't have the details you're seeking in my knowledge database."
]

GOOD_MATCH_INTROS = [
    "I was able to find the following answer to your question:",
    "I located the following information for your query:",
    "I found the following details that should address your question:",
    "I have gathered the following information for you:",
    "Here is the answer I found to your question:",
    "I came across the following details relevant to your inquiry:",
    "The following information should help answer your question:",
    "I found this information that might be useful to you:",
    "I identified the following details for your query:",
    "Here is the response I was able to find for your question:"
]

GOOD_MATCH_ENDS = [
    "I hope this answers your question. Feel free to ask any follow-up questions.",
    "I trust this information is helpful. If you have any more questions, please don't hesitate to ask.",
    "I hope you found this answer useful. Let me know if there's anything else I can assist you with.",
    "I trust this resolves your query. If you need further assistance, feel free to reach out.",
    "I hope this information meets your needs. Please ask if you have any additional questions.",
    "I hope this helps. If you have any more queries, I'm here to assist you.",
    "I trust this answers your question. Don't hesitate to ask if you need more information.",
    "I hope this response is helpful. I'm available for any further questions you might have.",
    "I hope this information is what you were looking for. Feel free to ask more questions if needed.",
    "I trust this provides the clarity you needed. Let me know if there's anything else I can help with."
]

ROUGH_MATCH_INTROS = [
    "I found some information that might be relevant to your question:",
    "Here is what I found, though it may not be exactly what you're looking for:",
    "I located the following details, but I'm not entirely sure if they address your query:",
    "I have some information that could be related to your question:",
    "This is what I found, but it may not fully answer your question:",
    "I identified some details that might help, though they may not be completely relevant:",
    "Here is some information that could be pertinent, but I'm unsure if it fully addresses your query:",
    "I came across the following information, but it might not be exactly what you need:",
    "I found some details that may be useful, but they might not directly answer your question:",
    "This information could be relevant, though I'm not certain it fully resolves your query:"
]