Artteiv AnhLedger commited on
Commit
d496453
1 Parent(s): 76c62c3

Upload 2 files (#4)

Browse files

- Upload 2 files (15df57044ac33e5949c35e4173aab31a66d3e812)


Co-authored-by: Nguyen Nguyen Anh <AnhLedger@users.noreply.huggingface.co>

Files changed (2) hide show
  1. consumers.py +21 -0
  2. model_manage.py +201 -0
consumers.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from . import model_manage as md
3
+ from chat.arxiv_bot.arxiv_bot_utils import ArxivSQL
4
+ from channels.generic.websocket import WebsocketConsumer
5
+
6
+
7
+ class ChatConsumer(WebsocketConsumer):
8
+ def connect(self):
9
+ self.accept()
10
+ self.db_instance = ArxivSQL()
11
+
12
+ def disconnect(self, close_code):
13
+ pass
14
+ def receive(self, text_data):
15
+ text_data_json = json.loads(text_data)
16
+ message = text_data_json["messages"]
17
+ print(message)
18
+ record, messagee = md.full_chain_history_question(message, self.db_instance)
19
+ print("First answer: ",record)
20
+ self.send(text_data=json.dumps({"message": messagee}))
21
+
model_manage.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # my_app/model_manager.py
2
+ import google.generativeai as genai
3
+ import chat.arxiv_bot.arxiv_bot_utils as utils
4
+ import json
5
+
6
+ model = None
7
+
8
+ def create_model():
9
+ with open("apikey.txt","r") as apikey:
10
+ key = apikey.readline()
11
+ genai.configure(api_key=key)
12
+ for m in genai.list_models():
13
+ if 'generateContent' in m.supported_generation_methods:
14
+ print(m.name)
15
+ print("He was there")
16
+ config = genai.GenerationConfig(max_output_tokens=2048,
17
+ temperature=0.7)
18
+ safety_settings = [
19
+ {
20
+ "category": "HARM_CATEGORY_DANGEROUS",
21
+ "threshold": "BLOCK_NONE",
22
+ },
23
+ {
24
+ "category": "HARM_CATEGORY_HARASSMENT",
25
+ "threshold": "BLOCK_NONE",
26
+ },
27
+ {
28
+ "category": "HARM_CATEGORY_HATE_SPEECH",
29
+ "threshold": "BLOCK_NONE",
30
+ },
31
+ {
32
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
33
+ "threshold": "BLOCK_NONE",
34
+ },
35
+ {
36
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
37
+ "threshold": "BLOCK_NONE",
38
+ },
39
+ ]
40
+ global model
41
+ model = genai.GenerativeModel("gemini-pro",
42
+ generation_config=config,
43
+ safety_settings=safety_settings)
44
+ return model
45
+
46
+ def get_model():
47
+ global model
48
+ if model is None:
49
+ # Khởi tạo model ở đây
50
+ model = create_model() # Giả sử create_model là hàm tạo model của bạn
51
+ return model
52
+
53
+ def extract_keyword_prompt(query):
54
+ """A prompt that return a JSON block as arguments for querying database"""
55
+
56
+ prompt = (
57
+ """[INST] SYSTEM: You are an assistant that choose only one action below based on guest question.
58
+ 1. If the guest question is asking for a single specific document or article with explicit title, you need to respond the information in JSON format with 2 keys "title", "author" if found any above. The authors are separated with the word 'and'.
59
+ 2. If the guest question is asking for relevant informations about a topic, you need to respond the information in JSON format with 2 keys "keywords", "description", include a list of keywords represent the main academic topic, \
60
+ and a description about the main topic. You may paraphrase the keywords to add more. \
61
+ 3. If the guest is not asking for any informations or documents, you need to respond with a polite answer in JSON format with 1 key "answer".
62
+ QUESTION: '{query}'
63
+ [/INST]
64
+ ANSWER:
65
+ """
66
+ ).format(query=query)
67
+
68
+ return prompt
69
+
70
+ def make_answer_prompt(input, contexts):
71
+ """A prompt that return the final answer, based on the queried context"""
72
+
73
+ prompt = (
74
+ """[INST] You are a library assistant that help to search articles and documents based on user's question.
75
+ From guest's question, you have found some records and documents that may help. Now you need to answer the guest with the information found.
76
+ If no information found in the database, you may generate some other recommendation related to user's question using your own knowledge. Each article or paper must have a link to the pdf download page.
77
+ You should answer in a conversational form politely.
78
+ QUESTION: '{input}'
79
+ INFORMATION: '{contexts}'
80
+ [/INST]
81
+ ANSWER:
82
+ """
83
+ ).format(input=input, contexts=contexts)
84
+
85
+ return prompt
86
+
87
+ def response(args, db_instance):
88
+ """Create response context, based on input arguments"""
89
+ keys = list(dict.keys(args))
90
+ if "answer" in keys:
91
+ return args['answer'], None # trả lời trực tiếp
92
+
93
+ if "keywords" in keys:
94
+ # perform query
95
+ query_texts = args["description"]
96
+ keywords = args["keywords"]
97
+ results = utils.db.query_relevant(keywords=keywords, query_texts=query_texts)
98
+ # print(results)
99
+ ids = results['metadatas'][0]
100
+ if len(ids) == 0:
101
+ # go crawl some
102
+ new_records = utils.crawl_arxiv(keyword_list=keywords, max_results=10)
103
+ print("Got new records: ",len(new_records))
104
+ if type(new_records) == str:
105
+ return "Error occured, information not found", new_records
106
+ utils.db.add(new_records)
107
+ db_instance.add(new_records)
108
+ results = utils.db.query_relevant(keywords=keywords, query_texts=query_texts)
109
+ ids = results['metadatas'][0]
110
+ print("Re-queried on chromadb, results: ",ids)
111
+ paper_id = [id['paper_id'] for id in ids]
112
+ paper_info = db_instance.query_id(paper_id)
113
+ print(paper_info)
114
+ records = [] # get title (2), author (3), link (6)
115
+ result_string = ""
116
+ if paper_info:
117
+ for i in range(len(paper_info)):
118
+ result_string += "Title: {}, Author: {}, Link: {}".format(paper_info[i][2],paper_info[i][3],paper_info[i][6])
119
+ records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])
120
+ return result_string, records
121
+ else:
122
+ return "Information not found", "Information not found"
123
+ # invoke llm and return result
124
+
125
+ if "title" in keys:
126
+ title = args['title']
127
+ authors = utils.authors_str_to_list(args['author'])
128
+ paper_info = db_instance.query(title = title,author = authors)
129
+ # if query not found then go crawl brh
130
+ # print(paper_info)
131
+
132
+ if len(paper_info) == 0:
133
+ new_records = utils.crawl_exact_paper(title=title,author=authors)
134
+ print("Got new records: ",len(new_records))
135
+ if type(new_records) == str:
136
+ # print(new_records)
137
+ return "Error occured, information not found", "Information not found"
138
+ utils.db.add(new_records)
139
+ db_instance.add(new_records)
140
+ paper_info = db_instance.query(title = title,author = authors)
141
+ print("Re-queried on chromadb, results: ",paper_info)
142
+ # -------------------------------------
143
+ records = [] # get title (2), author (3), link (6)
144
+ result_string = ""
145
+ for i in range(len(paper_info)):
146
+ result_string += "Title: {}, Author: {}, Link: {}".format(paper_info[i][2],paper_info[i][3],paper_info[i][6])
147
+ records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]])
148
+ # process results:
149
+ if len(result_string) == 0:
150
+ return "Information not found", "Information not found"
151
+ return result_string, records
152
+ # invoke llm and return result
153
+ def full_chain_single_question(input_prompt, db_instance):
154
+ try:
155
+ first_prompt = extract_keyword_prompt(input_prompt)
156
+ temp_answer = model.generate_content(first_prompt).text
157
+
158
+ args = json.loads(utils.trimming(temp_answer))
159
+ contexts, results = response(args, db_instance)
160
+ if not results:
161
+ # print(contexts)
162
+ return "Random question, direct return", contexts
163
+ else:
164
+ output_prompt = make_answer_prompt(input_prompt,contexts)
165
+ answer = model.generate_content(output_prompt).text
166
+ return temp_answer, answer
167
+ except Exception as e:
168
+ # print(e)
169
+ return temp_answer, "Error occured: " + str(e)
170
+
171
+
172
+ def format_chat_history_from_web(chat_history: list):
173
+ temp_chat = []
174
+ for message in chat_history:
175
+ temp_chat.append(
176
+ {
177
+ "role": message["role"],
178
+ "parts": [message["content"]]
179
+ }
180
+ )
181
+ return temp_chat
182
+
183
+ def full_chain_history_question(chat_history: list, db_instance):
184
+ try:
185
+ temp_chat = format_chat_history_from_web(chat_history)
186
+ first_prompt = extract_keyword_prompt(temp_chat[-1]["parts"][0])
187
+ temp_answer = model.generate_content(first_prompt).text
188
+
189
+ args = json.loads(utils.trimming(temp_answer))
190
+ contexts, results = response(args, db_instance)
191
+ if not results:
192
+ # print(contexts)
193
+ return "Random question, direct return", contexts
194
+ else:
195
+ QA_Prompt = make_answer_prompt(temp_chat[-1]["parts"][0], contexts)
196
+ temp_chat[-1]["parts"] = QA_Prompt
197
+ answer = model.generate_content(temp_chat).text
198
+ return temp_answer, answer
199
+ except Exception as e:
200
+ # print(e)
201
+ return temp_answer, "Error occured: " + str(e)