# # my_app/model_manager.py # import google.generativeai as genai # import chat.arxiv_bot.arxiv_bot_utils as utils # import json # model = None # model_retrieval = None # model_answer = None # RETRIEVAL_INSTRUCT = """You are an auto chatbot that response with only one action below based on user question. # 1. If the guest question is asking about a science topic, you need to respond the information in JSON schema below: # { # "keywords": [a list of string keywords about the topic], # "description": "a paragraph describing the topic in about 50 to 100 words" # } # 2. If the guest is not asking for any informations or documents, you need to respond in JSON schema below: # { # "answer": "your answer to the user question" # }""" # ANSWER_INSTRUCT = """You are a library assistant that help answering customer question based on the information given. # You always answer in a conversational form naturally and politely. # You must introduce all the records given, each must contain title, authors and the link to the pdf file.""" # def create_model(): # with open("apikey.txt","r") as apikey: # key = apikey.readline() # genai.configure(api_key=key) # for m in genai.list_models(): # if 'generateContent' in m.supported_generation_methods: # print(m.name) # print("He was there") # config = genai.GenerationConfig(max_output_tokens=2048, # temperature=1.0) # safety_settings = [ # { # "category": "HARM_CATEGORY_DANGEROUS", # "threshold": "BLOCK_NONE", # }, # { # "category": "HARM_CATEGORY_HARASSMENT", # "threshold": "BLOCK_NONE", # }, # { # "category": "HARM_CATEGORY_HATE_SPEECH", # "threshold": "BLOCK_NONE", # }, # { # "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", # "threshold": "BLOCK_NONE", # }, # { # "category": "HARM_CATEGORY_DANGEROUS_CONTENT", # "threshold": "BLOCK_NONE", # }, # ] # global model, model_retrieval, model_answer # model = genai.GenerativeModel("gemini-1.5-pro-latest", # generation_config=config, # safety_settings=safety_settings) # model_retrieval = genai.GenerativeModel("gemini-1.5-pro-latest", # generation_config=config, # safety_settings=safety_settings, # system_instruction=RETRIEVAL_INSTRUCT) # model_answer = genai.GenerativeModel("gemini-1.5-pro-latest", # generation_config=config, # safety_settings=safety_settings, # system_instruction=ANSWER_INSTRUCT) # return model, model_answer, model_retrieval # def get_model(): # global model, model_answer, model_retrieval # if model is None: # # Khởi tạo model ở đây # model, model_answer, model_retrieval = create_model() # Giả sử create_model là hàm tạo model của bạn # return model, model_answer, model_retrieval # def extract_keyword_prompt(query): # """A prompt that return a JSON block as arguments for querying database""" # prompt = """[INST] SYSTEM: You are an auto chatbot that response with only one action below based on user question. # 1. If the guest question is asking about a science topic, you need to respond the information in JSON schema below: # { # "keywords": [a list of string keywords about the topic], # "description": "a paragraph describing the topic in about 50 to 100 words" # } # 2. If the guest is not asking for any informations or documents, you need to respond in JSON schema below: # { # "answer": "your answer to the user question" # } # QUESTION: """ + query + """[/INST] # ANSWER: """ # return prompt # def make_answer_prompt(input, contexts): # """A prompt that return the final answer, based on the queried context""" # prompt = ( # """[INST] You are a library assistant that help answering customer QUESTION based on the INFORMATION given. # You always answer in a conversational form naturally and politely. # You must introduce all the records given, each must contain title, authors and the link to the pdf file. # QUESTION: {input} # INFORMATION: '{contexts}' # [/INST] # ANSWER: # """ # ).format(input=input, contexts=contexts) # return prompt # def retrieval_chat_template(question): # return { # "role":"user", # "parts":[f"QUESTION: {question} \n ANSWER:"] # } # def answer_chat_template(question, contexts): # return { # "role":"user", # "parts":[f"QUESTION: {question} \n INFORMATION: {contexts} \n ANSWER:"] # } # def response(args, db_instance): # """Create response context, based on input arguments""" # keys = list(dict.keys(args)) # if "answer" in keys: # return args['answer'], None # trả lời trực tiếp # if "keywords" in keys: # # perform query # query_texts = args["description"] # keywords = args["keywords"] # results = utils.db.query_relevant(keywords=keywords, query_texts=query_texts) # # print(results) # ids = results['metadatas'][0] # if len(ids) == 0: # # go crawl some # new_records = utils.crawl_arxiv(keyword_list=keywords, max_results=10) # print("Got new records: ",len(new_records)) # if type(new_records) == str: # return "Error occured, information not found", new_records # utils.db.add(new_records) # db_instance.add(new_records) # results = utils.db.query_relevant(keywords=keywords, query_texts=query_texts) # ids = results['metadatas'][0] # print("Re-queried on chromadb, results: ",ids) # paper_id = [id['paper_id'] for id in ids] # paper_info = db_instance.query_id(paper_id) # print(paper_info) # records = [] # get title (2), author (3), link (6) # result_string = "" # if paper_info: # for i in range(len(paper_info)): # result_string += "Record no.{} - Title: {}, Author: {}, Link: {}, ".format(i+1,paper_info[i][2],paper_info[i][3],paper_info[i][6]) # id = paper_info[i][0] # selected_document = utils.db.query_exact(id)["documents"] # doc_str = "Summary:" # for doc in selected_document: # doc_str+= doc + " " # result_string += doc_str # records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]]) # return result_string, records # else: # return "Information not found", "Information not found" # # invoke llm and return result # # if "title" in keys: # # title = args['title'] # # authors = utils.authors_str_to_list(args['author']) # # paper_info = db_instance.query(title = title,author = authors) # # # if query not found then go crawl brh # # # print(paper_info) # # if len(paper_info) == 0: # # new_records = utils.crawl_exact_paper(title=title,author=authors) # # print("Got new records: ",len(new_records)) # # if type(new_records) == str: # # # print(new_records) # # return "Error occured, information not found", "Information not found" # # utils.db.add(new_records) # # db_instance.add(new_records) # # paper_info = db_instance.query(title = title,author = authors) # # print("Re-queried on chromadb, results: ",paper_info) # # # ------------------------------------- # # records = [] # get title (2), author (3), link (6) # # result_string = "" # # for i in range(len(paper_info)): # # result_string += "Title: {}, Author: {}, Link: {}".format(paper_info[i][2],paper_info[i][3],paper_info[i][6]) # # records.append([paper_info[i][2],paper_info[i][3],paper_info[i][6]]) # # # process results: # # if len(result_string) == 0: # # return "Information not found", "Information not found" # # return result_string, records # # invoke llm and return result # def full_chain_single_question(input_prompt, db_instance): # try: # first_prompt = extract_keyword_prompt(input_prompt) # temp_answer = model.generate_content(first_prompt).text # args = json.loads(utils.trimming(temp_answer)) # contexts, results = response(args, db_instance) # if not results: # # print(contexts) # return "Random question, direct return", contexts # else: # output_prompt = make_answer_prompt(input_prompt,contexts) # answer = model.generate_content(output_prompt).text # return temp_answer, answer # except Exception as e: # # print(e) # return temp_answer, "Error occured: " + str(e) # def format_chat_history_from_web(chat_history: list): # temp_chat = [] # for message in chat_history: # temp_chat.append( # { # "role": message["role"], # "parts": [message["content"]] # } # ) # return temp_chat # # def full_chain_history_question(chat_history: list, db_instance): # # try: # # temp_chat = format_chat_history_from_web(chat_history) # # print('Extracted temp chat: ',temp_chat) # # first_prompt = extract_keyword_prompt(temp_chat[-1]["parts"][0]) # # temp_answer = model.generate_content(first_prompt).text # # args = json.loads(utils.trimming(temp_answer)) # # contexts, results = response(args, db_instance) # # print('Context extracted: ',contexts) # # if not results: # # return "Random question, direct return", contexts # # else: # # QA_Prompt = make_answer_prompt(temp_chat[-1]["parts"][0], contexts) # # temp_chat[-1]["parts"] = QA_Prompt # # print(temp_chat) # # answer = model.generate_content(temp_chat).text # # return temp_answer, answer # # except Exception as e: # # # print(e) # # return temp_answer, "Error occured: " + str(e) # def full_chain_history_question(chat_history: list, db_instance): # try: # temp_chat = format_chat_history_from_web(chat_history) # question = temp_chat[-1]['parts'][0] # first_answer = model_retrieval.generate_content(temp_chat).text # print(first_answer) # args = json.loads(utils.trimming(first_answer)) # contexts, results = response(args, db_instance) # if not results: # return "Random question, direct return", contexts # else: # print('Context to answers: ',contexts) # answer_chat = answer_chat_template(question, contexts) # temp_chat[-1] = answer_chat # answer = model_answer.generate_content(temp_chat).text # return first_answer, answer # except Exception as e: # if first_answer: # return first_answer, "Error occured: " + str(e) # else: # return "No answer", "Error occured: " + str(e)