Artteiv tosanoob commited on
Commit
f3de9b9
1 Parent(s): 6542d1b

fix query bugs no.1 (#10)

Browse files

- fix query bugs no.1 (ca0f0a7af6fd5c4b7cf9e8f01ba12534151b22e6)


Co-authored-by: Trương Tấn Cường <tosanoob@users.noreply.huggingface.co>

Files changed (2) hide show
  1. chat/consumers.py +1 -1
  2. chat/model_manage.py +172 -269
chat/consumers.py CHANGED
@@ -1,5 +1,5 @@
1
  import json
2
- from . import model_manage2 as md
3
  from channels.generic.websocket import WebsocketConsumer
4
  from .database_manage import DataManage
5
 
 
1
  import json
2
+ from . import model_manage as md
3
  from channels.generic.websocket import WebsocketConsumer
4
  from .database_manage import DataManage
5
 
chat/model_manage.py CHANGED
@@ -1,271 +1,174 @@
1
- # # my_app/model_manager.py
2
- # import google.generativeai as genai
3
- # import chat.arxiv_bot.arxiv_bot_utils as utils
4
- # import json
5
-
6
- # model = None
7
-
8
- # model_retrieval = None
9
-
10
- # model_answer = None
11
-
12
- # RETRIEVAL_INSTRUCT = """You are an auto chatbot that response with only one action below based on user question.
13
- # 1. If the guest question is asking about a science topic, you need to respond the information in JSON schema below:
14
- # {
15
- # "keywords": [a list of string keywords about the topic],
16
- # "description": "a paragraph describing the topic in about 50 to 100 words"
17
- # }
18
- # 2. If the guest is not asking for any informations or documents, you need to respond in JSON schema below:
19
- # {
20
- # "answer": "your answer to the user question"
21
- # }"""
22
-
23
- # ANSWER_INSTRUCT = """You are a library assistant that help answering customer question based on the information given.
24
- # You always answer in a conversational form naturally and politely.
25
- # You must introduce all the records given, each must contain title, authors and the link to the pdf file."""
26
-
27
- # def create_model():
28
- # with open("apikey.txt","r") as apikey:
29
- # key = apikey.readline()
30
- # genai.configure(api_key=key)
31
- # for m in genai.list_models():
32
- # if 'generateContent' in m.supported_generation_methods:
33
- # print(m.name)
34
- # print("He was there")
35
- # config = genai.GenerationConfig(max_output_tokens=2048,
36
- # temperature=1.0)
37
- # safety_settings = [
38
- # {
39
- # "category": "HARM_CATEGORY_DANGEROUS",
40
- # "threshold": "BLOCK_NONE",
41
- # },
42
- # {
43
- # "category": "HARM_CATEGORY_HARASSMENT",
44
- # "threshold": "BLOCK_NONE",
45
- # },
46
- # {
47
- # "category": "HARM_CATEGORY_HATE_SPEECH",
48
- # "threshold": "BLOCK_NONE",
49
- # },
50
- # {
51
- # "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
52
- # "threshold": "BLOCK_NONE",
53
- # },
54
- # {
55
- # "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
56
- # "threshold": "BLOCK_NONE",
57
- # },
58
- # ]
59
- # global model, model_retrieval, model_answer
60
- # model = genai.GenerativeModel("gemini-1.5-pro-latest",
61
- # generation_config=config,
62
- # safety_settings=safety_settings)
63
- # model_retrieval = genai.GenerativeModel("gemini-1.5-pro-latest",
64
- # generation_config=config,
65
- # safety_settings=safety_settings,
66
- # system_instruction=RETRIEVAL_INSTRUCT)
67
- # model_answer = genai.GenerativeModel("gemini-1.5-pro-latest",
68
- # generation_config=config,
69
- # safety_settings=safety_settings,
70
- # system_instruction=ANSWER_INSTRUCT)
71
- # return model, model_answer, model_retrieval
72
-
73
- # def get_model():
74
- # global model, model_answer, model_retrieval
75
- # if model is None:
76
- # # Khởi tạo model ở đây
77
- # model, model_answer, model_retrieval = create_model() # Giả sử create_model là hàm tạo model của bạn
78
- # return model, model_answer, model_retrieval
79
-
80
- # def extract_keyword_prompt(query):
81
- # """A prompt that return a JSON block as arguments for querying database"""
82
-
83
- # prompt = """[INST] SYSTEM: You are an auto chatbot that response with only one action below based on user question.
84
- # 1. If the guest question is asking about a science topic, you need to respond the information in JSON schema below:
85
- # {
86
- # "keywords": [a list of string keywords about the topic],
87
- # "description": "a paragraph describing the topic in about 50 to 100 words"
88
- # }
89
- # 2. If the guest is not asking for any informations or documents, you need to respond in JSON schema below:
90
- # {
91
- # "answer": "your answer to the user question"
92
- # }
93
- # QUESTION: """ + query + """[/INST]
94
- # ANSWER: """
95
- # return prompt
96
-
97
- # def make_answer_prompt(input, contexts):
98
- # """A prompt that return the final answer, based on the queried context"""
99
-
100
- # prompt = (
101
- # """[INST] You are a library assistant that help answering customer QUESTION based on the INFORMATION given.
102
- # You always answer in a conversational form naturally and politely.
103
- # You must introduce all the records given, each must contain title, authors and the link to the pdf file.
104
- # QUESTION: {input}
105
- # INFORMATION: '{contexts}'
106
- # [/INST]
107
- # ANSWER:
108
- # """
109
- # ).format(input=input, contexts=contexts)
110
- # return prompt
111
-
112
- # def retrieval_chat_template(question):
113
- # return {
114
- # "role":"user",
115
- # "parts":[f"QUESTION: {question} \n ANSWER:"]
116
- # }
117
-
118
- # def answer_chat_template(question, contexts):
119
- # return {
120
- # "role":"user",
121
- # "parts":[f"QUESTION: {question} \n INFORMATION: {contexts} \n ANSWER:"]
122
- # }
123
-
124
- # def response(args, db_instance):
125
- # """Create response context, based on input arguments"""
126
- # keys = list(dict.keys(args))
127
- # if "answer" in keys:
128
- # return args['answer'], None # trả lời trực tiếp
129
-
130
- # if "keywords" in keys:
131
- # # perform query
132
- # query_texts = args["description"]
133
- # keywords = args["keywords"]
134
- # results = utils.db.query_relevant(keywords=keywords, query_texts=query_texts)
135
- # # print(results)
136
- # ids = results['metadatas'][0]
137
- # if len(ids) == 0:
138
- # # go crawl some
139
- # new_records = utils.crawl_arxiv(keyword_list=keywords, max_results=10)
140
- # print("Got new records: ",len(new_records))
141
- # if type(new_records) == str:
142
- # return "Error occured, information not found", new_records
143
- # utils.db.add(new_records)
144
- # db_instance.add(new_records)
145
- # results = utils.db.query_relevant(keywords=keywords, query_texts=query_texts)
146
- # ids = results['metadatas'][0]
147
- # print("Re-queried on chromadb, results: ",ids)
148
- # paper_id = [id['paper_id'] for id in ids]
149
- # paper_info = db_instance.query_id(paper_id)
150
- # print(paper_info)
151
- # records = [] # get title (2), author (3), link (6)
152
- # result_string = ""
153
- # if paper_info:
154
- # for i in range(len(paper_info)):
155
- # result_string += "Record no.{} - Title: {}, Author: {}, Link: {}, ".format(i+1,paper_info[i][2],paper_info[i][3],paper_info[i][6])
156
- # id = paper_info[i][0]
157
- # selected_document = utils.db.query_exact(id)["documents"]
158
- # doc_str = "Summary:"
159
- # for doc in selected_document:
160
- # doc_str+= doc + " "
161
- # result_string += doc_str
162
- # records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])
163
- # return result_string, records
164
- # else:
165
- # return "Information not found", "Information not found"
166
- # # invoke llm and return result
167
-
168
- # # if "title" in keys:
169
- # # title = args['title']
170
- # # authors = utils.authors_str_to_list(args['author'])
171
- # # paper_info = db_instance.query(title = title,author = authors)
172
- # # # if query not found then go crawl brh
173
- # # # print(paper_info)
174
-
175
- # # if len(paper_info) == 0:
176
- # # new_records = utils.crawl_exact_paper(title=title,author=authors)
177
- # # print("Got new records: ",len(new_records))
178
- # # if type(new_records) == str:
179
- # # # print(new_records)
180
- # # return "Error occured, information not found", "Information not found"
181
- # # utils.db.add(new_records)
182
- # # db_instance.add(new_records)
183
- # # paper_info = db_instance.query(title = title,author = authors)
184
- # # print("Re-queried on chromadb, results: ",paper_info)
185
- # # # -------------------------------------
186
- # # records = [] # get title (2), author (3), link (6)
187
- # # result_string = ""
188
- # # for i in range(len(paper_info)):
189
- # # result_string += "Title: {}, Author: {}, Link: {}".format(paper_info[i][2],paper_info[i][3],paper_info[i][6])
190
- # # records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])
191
- # # # process results:
192
- # # if len(result_string) == 0:
193
- # # return "Information not found", "Information not found"
194
- # # return result_string, records
195
- # # invoke llm and return result
196
-
197
- # def full_chain_single_question(input_prompt, db_instance):
198
- # try:
199
- # first_prompt = extract_keyword_prompt(input_prompt)
200
- # temp_answer = model.generate_content(first_prompt).text
201
-
202
- # args = json.loads(utils.trimming(temp_answer))
203
- # contexts, results = response(args, db_instance)
204
- # if not results:
205
- # # print(contexts)
206
- # return "Random question, direct return", contexts
207
- # else:
208
- # output_prompt = make_answer_prompt(input_prompt,contexts)
209
- # answer = model.generate_content(output_prompt).text
210
- # return temp_answer, answer
211
- # except Exception as e:
212
- # # print(e)
213
- # return temp_answer, "Error occured: " + str(e)
214
 
215
-
216
- # def format_chat_history_from_web(chat_history: list):
217
- # temp_chat = []
218
- # for message in chat_history:
219
- # temp_chat.append(
220
- # {
221
- # "role": message["role"],
222
- # "parts": [message["content"]]
223
- # }
224
- # )
225
- # return temp_chat
226
-
227
- # # def full_chain_history_question(chat_history: list, db_instance):
228
- # # try:
229
- # # temp_chat = format_chat_history_from_web(chat_history)
230
- # # print('Extracted temp chat: ',temp_chat)
231
- # # first_prompt = extract_keyword_prompt(temp_chat[-1]["parts"][0])
232
- # # temp_answer = model.generate_content(first_prompt).text
233
-
234
- # # args = json.loads(utils.trimming(temp_answer))
235
- # # contexts, results = response(args, db_instance)
236
- # # print('Context extracted: ',contexts)
237
- # # if not results:
238
- # # return "Random question, direct return", contexts
239
- # # else:
240
- # # QA_Prompt = make_answer_prompt(temp_chat[-1]["parts"][0], contexts)
241
- # # temp_chat[-1]["parts"] = QA_Prompt
242
- # # print(temp_chat)
243
- # # answer = model.generate_content(temp_chat).text
244
- # # return temp_answer, answer
245
- # # except Exception as e:
246
- # # # print(e)
247
- # # return temp_answer, "Error occured: " + str(e)
248
-
249
- # def full_chain_history_question(chat_history: list, db_instance):
250
- # try:
251
- # temp_chat = format_chat_history_from_web(chat_history)
252
- # question = temp_chat[-1]['parts'][0]
253
- # first_answer = model_retrieval.generate_content(temp_chat).text
254
 
255
- # print(first_answer)
256
- # args = json.loads(utils.trimming(first_answer))
257
-
258
- # contexts, results = response(args, db_instance)
259
- # if not results:
260
- # return "Random question, direct return", contexts
261
- # else:
262
- # print('Context to answers: ',contexts)
263
- # answer_chat = answer_chat_template(question, contexts)
264
- # temp_chat[-1] = answer_chat
265
- # answer = model_answer.generate_content(temp_chat).text
266
- # return first_answer, answer
267
- # except Exception as e:
268
- # if first_answer:
269
- # return first_answer, "Error occured: " + str(e)
270
- # else:
271
- # return "No answer", "Error occured: " + str(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chat.arxiv_bot.arxiv_bot_utils as utils
2
+ import google.generativeai as genai
3
+ import json
4
+ import os
5
+ from google.generativeai.types import content_types
6
+ from collections.abc import Iterable
7
+ from IPython import display
8
+ from IPython.display import Markdown
9
+
10
+ # ----------------------- define instructions -----------------------
11
+ system_instruction = """You are a library chatbot that help people to find relevant articles about a topic, or find a specific article with given title and authors.
12
+ Your job is to analyze the user question, generate enough parameters based on the user question and use the tools that are given to you.
13
+ Also, after the function call is done, you must post-process the results in a more conversational form, providing some explanation about the paper based on its summary to avoid recitation.
14
+ You must provide the link to its Arxiv pdf page."""
15
+
16
+ # --------------------------- define tools --------------------------
17
+ def search_for_relevant_article(keywords: list['str'], topic_description: str) -> str:
18
+ """This tool is used to search for articles from the database which is relevant to a topic, using a list of more than 3 keywords and a long sentence topic description.
19
+ If there is not enough 3 keywords from the question, the model must generate more keywords related to the topic.
20
+ If there is no description about the topic, the model must generate a description for the function call.
21
+ \nThe result is a string describe the records found from the database: 'Record no. - Title: <title>, Author: <authors>, Link: <link to the pdf file>, Summary: <summary of the article>'. There can be many records.
22
+ \nIf the result is 'Information not found' it means some error has occured, or the database has no relevant article"""
23
+
24
+ print('Keywords: {}, description: {}'.format(keywords,topic_description))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ results = utils.ArxivChroma.query_relevant(keywords=keywords, query_texts=topic_description)
27
+ # print(results)
28
+ ids = results['metadatas'][0]
29
+ if len(ids) == 0:
30
+ # go crawl some
31
+ new_records = utils.crawl_arxiv(keyword_list=keywords, max_results=10)
32
+ # print("Got new records: ",len(new_records))
33
+ if type(new_records) == str:
34
+ return "Information not found"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ utils.ArxivChroma.add(new_records)
37
+ utils.ArxivSQL.add(new_records)
38
+ results = utils.ArxivChroma.query_relevant(keywords=keywords, query_texts=topic_description)
39
+ ids = results['metadatas'][0]
40
+ # print("Re-queried on chromadb, results: ",ids)
41
+
42
+ paper_id = [id['paper_id'] for id in ids]
43
+ paper_info = utils.ArxivSQL.query_id(paper_id)
44
+ # print(paper_info)
45
+ records = [] # get title (2), author (3), link (6)
46
+ result_string = ""
47
+ if paper_info:
48
+ for i in range(len(paper_info)):
49
+ result_string += "Record no.{} - Title: {}, Author: {}, Link: {}, ".format(i+1,paper_info[i][2],paper_info[i][3],paper_info[i][6])
50
+ id = paper_info[i][0]
51
+ selected_document = utils.ArxivChroma.query_exact(id)["documents"]
52
+ doc_str = "Summary:"
53
+ for doc in selected_document:
54
+ doc_str+= doc + " "
55
+ result_string += doc_str
56
+ records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])
57
+ return result_string
58
+ else:
59
+ return "Information not found"
60
+
61
+ def search_for_specific_article(title: str, authors: list['str']) -> str:
62
+ """This tool is used to search for a specific article from the database, with its name and authors given.
63
+ \nThe result is a string describe the records found from the database: 'Record no. - Title: <title>, Author: <authors>, Link: <link to the pdf file>, Summary: <summary of the article>'. There can be many records.
64
+ \nIf the result is 'Information not found' it means some error has occured, or the database has no relevant article"""
65
+
66
+ print('Keywords: {}, description: {}'.format(title,authors))
67
+
68
+ paper_info = utils.ArxivSQL.query(title = title,author = authors)
69
+ if paper_info:
70
+ new_records = utils.crawl_exact_paper(title=title,author=authors)
71
+ # print("Got new records: ",len(new_records))
72
+ if type(new_records) == str:
73
+ # print(new_records)
74
+ return "Information not found"
75
+ utils.ArxivChroma.add(new_records)
76
+ utils.ArxivSQL.add(new_records)
77
+ paper_info = utils.ArxivSQL.query(title = title,author = authors)
78
+ # print("Re-queried on chromadb, results: ",paper_info)
79
+ # -------------------------------------
80
+ records = [] # get title (2), author (3), link (6)
81
+ result_string = ""
82
+ if paper_info:
83
+ for i in range(len(paper_info)):
84
+ result_string += "Record no.{} - Title: {}, Author: {}, Link: {}, ".format(i+1,paper_info[i][2],paper_info[i][3],paper_info[i][6])
85
+ id = paper_info[i][0]
86
+ selected_document = utils.ArxivChroma.query_exact(id)["documents"]
87
+ doc_str = "Summary:"
88
+ for doc in selected_document:
89
+ doc_str+= doc + " "
90
+ result_string += doc_str
91
+ records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])
92
+ # process results:
93
+ if len(result_string) == 0:
94
+ return "Information not found"
95
+ return result_string
96
+
97
+ def answer_others_questions(question: str) -> str:
98
+ """This tool is the default option for other questions that are not related to article or paper request. The model will response the question with its own answer."""
99
+ return question
100
+
101
+ tools = [search_for_relevant_article, search_for_specific_article, answer_others_questions]
102
+ tools_name = ['search_for_relevant_article', 'search_for_specific_article', 'answer_others_questions']
103
+
104
+ # load key, prepare config ------------------------
105
+ with open("apikey.txt","r") as apikey:
106
+ key = apikey.readline()
107
+ genai.configure(api_key=key)
108
+ generation_config = {
109
+ "temperature": 1,
110
+ "top_p": 1,
111
+ "top_k": 0,
112
+ "max_output_tokens": 2048,
113
+ "response_mime_type": "text/plain",
114
+ }
115
+ safety_settings = [
116
+ {
117
+ "category": "HARM_CATEGORY_DANGEROUS",
118
+ "threshold": "BLOCK_NONE",
119
+ },
120
+ {
121
+ "category": "HARM_CATEGORY_HARASSMENT",
122
+ "threshold": "BLOCK_NONE",
123
+ },
124
+ {
125
+ "category": "HARM_CATEGORY_HATE_SPEECH",
126
+ "threshold": "BLOCK_NONE",
127
+ },
128
+ {
129
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
130
+ "threshold": "BLOCK_NONE",
131
+ },
132
+ {
133
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
134
+ "threshold": "BLOCK_NONE",
135
+ },
136
+ ]
137
+ # this function return a tool_config with mode 'none', 'any', 'auto'
138
+ def tool_config_from_mode(mode: str, fns: Iterable[str] = ()):
139
+ """Create a tool config with the specified function calling mode."""
140
+ return content_types.to_tool_config(
141
+ {"function_calling_config": {"mode": mode, "allowed_function_names": fns}}
142
+ )
143
+
144
+ def init_model(mode = "auto"):
145
+ # return an instance of a model, holding its own ChatSession
146
+ # every socket session holds its own model
147
+ # this function must be called upon socket init, also start_chat() to begin chat
148
+ model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest",
149
+ safety_settings=safety_settings,
150
+ generation_config=generation_config,
151
+ tools=tools,
152
+ tool_config=tool_config_from_mode(mode),
153
+ system_instruction=system_instruction)
154
+ chat_instance = model.start_chat(enable_automatic_function_calling=True)
155
+ return model, chat_instance
156
+
157
+ # handle tool call and chatsession
158
+ def full_chain_history_question(user_input, chat_instance: genai.ChatSession, mode="auto"):
159
+ try:
160
+ response = chat_instance.send_message(user_input,tool_config=tool_config_from_mode(mode)).text
161
+ return response, chat_instance.history
162
+ except Exception as e:
163
+ print(e)
164
+ return f'Error occured during call: {e}', chat_instance.history
165
+
166
+ # for printing log session
167
+ def print_history(history):
168
+ for content in history:
169
+ part = content.parts[0]
170
+ print(content.role, "->", type(part).to_dict(part))
171
+ print('-'*80)
172
+
173
+ utils.ArxivChroma.connect()
174
+ utils.ArxivSQL.connect()