shaocongma commited on
Commit
d165b85
1 Parent(s): 0bf0857

add new functions.

Browse files
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10.10
2
+ RUN useradd -m -u 1000 user
3
+ USER user
4
+ ENV HOME=/home/user \
5
+ PATH=/home/user/.local/bin:$PATH
6
+ WORKDIR $HOME/app
7
+ COPY --chown=user . $HOME/app
8
+ COPY ./requirements.txt ~/app/requirements.txt
9
+ RUN pip install -r requirements.txt
10
+ COPY . .
11
+ CMD ["chainlit", "run", "cyber-supervisor-openai.py", "--port", "7860"]
auto_draft.py DELETED
@@ -1,145 +0,0 @@
1
- # from utils.references import References
2
- # from utils.prompts import generate_paper_prompts, generate_keywords_prompts, generate_experiments_prompts
3
- # from utils.gpt_interaction import get_responses, extract_responses, extract_keywords, extract_json
4
- # from utils.tex_processing import replace_title
5
- # from utils.figures import generate_random_figures
6
- # import datetime
7
- # import shutil
8
- # import time
9
- # import logging
10
- # import os
11
- #
12
- # TOTAL_TOKENS = 0
13
- # TOTAL_PROMPTS_TOKENS = 0
14
- # TOTAL_COMPLETION_TOKENS = 0
15
- #
16
- # def make_archive(source, destination):
17
- # base = os.path.basename(destination)
18
- # name = base.split('.')[0]
19
- # format = base.split('.')[1]
20
- # archive_from = os.path.dirname(source)
21
- # archive_to = os.path.basename(source.strip(os.sep))
22
- # shutil.make_archive(name, format, archive_from, archive_to)
23
- # shutil.move('%s.%s'%(name,format), destination)
24
- # return destination
25
- #
26
- #
27
- # def log_usage(usage, generating_target, print_out=True):
28
- # global TOTAL_TOKENS
29
- # global TOTAL_PROMPTS_TOKENS
30
- # global TOTAL_COMPLETION_TOKENS
31
- #
32
- # prompts_tokens = usage['prompt_tokens']
33
- # completion_tokens = usage['completion_tokens']
34
- # total_tokens = usage['total_tokens']
35
- #
36
- # TOTAL_TOKENS += total_tokens
37
- # TOTAL_PROMPTS_TOKENS += prompts_tokens
38
- # TOTAL_COMPLETION_TOKENS += completion_tokens
39
- #
40
- # message = f"For generating {generating_target}, {total_tokens} tokens have been used ({prompts_tokens} for prompts; {completion_tokens} for completion). " \
41
- # f"{TOTAL_TOKENS} tokens have been used in total."
42
- # if print_out:
43
- # print(message)
44
- # logging.info(message)
45
- #
46
- # def pipeline(paper, section, save_to_path, model):
47
- # """
48
- # The main pipeline of generating a section.
49
- # 1. Generate prompts.
50
- # 2. Get responses from AI assistant.
51
- # 3. Extract the section text.
52
- # 4. Save the text to .tex file.
53
- # :return usage
54
- # """
55
- # print(f"Generating {section}...")
56
- # prompts = generate_paper_prompts(paper, section)
57
- # gpt_response, usage = get_responses(prompts, model)
58
- # output = extract_responses(gpt_response)
59
- # paper["body"][section] = output
60
- # tex_file = save_to_path + f"{section}.tex"
61
- # if section == "abstract":
62
- # with open(tex_file, "w") as f:
63
- # f.write(r"\begin{abstract}")
64
- # with open(tex_file, "a") as f:
65
- # f.write(output)
66
- # with open(tex_file, "a") as f:
67
- # f.write(r"\end{abstract}")
68
- # else:
69
- # with open(tex_file, "w") as f:
70
- # f.write(f"\section{{{section}}}\n")
71
- # with open(tex_file, "a") as f:
72
- # f.write(output)
73
- # time.sleep(5)
74
- # print(f"{section} has been generated. Saved to {tex_file}.")
75
- # return usage
76
- #
77
- #
78
- #
79
- # def generate_draft(title, description="", template="ICLR2022", model="gpt-4"):
80
- # """
81
- # The main pipeline of generating a paper.
82
- # 1. Copy everything to the output folder.
83
- # 2. Create references.
84
- # 3. Generate each section using `pipeline`.
85
- # 4. Post-processing: check common errors, fill the title, ...
86
- # """
87
- # paper = {}
88
- # paper_body = {}
89
- #
90
- # # Create a copy in the outputs folder.
91
- # # todo: use copy_templates function instead.
92
- # now = datetime.datetime.now()
93
- # target_name = now.strftime("outputs_%Y%m%d_%H%M%S")
94
- # source_folder = f"latex_templates/{template}"
95
- # destination_folder = f"outputs/{target_name}"
96
- # shutil.copytree(source_folder, destination_folder)
97
- #
98
- # bibtex_path = destination_folder + "/ref.bib"
99
- # save_to_path = destination_folder +"/"
100
- # replace_title(save_to_path, title)
101
- # logging.basicConfig( level=logging.INFO, filename=save_to_path+"generation.log")
102
- #
103
- # # Generate keywords and references
104
- # print("Initialize the paper information ...")
105
- # prompts = generate_keywords_prompts(title, description)
106
- # gpt_response, usage = get_responses(prompts, model)
107
- # keywords = extract_keywords(gpt_response)
108
- # log_usage(usage, "keywords")
109
- # ref = References(load_papers = "") #todo: allow users to upload bibfile.
110
- # ref.collect_papers(keywords, method="arxiv") #todo: add more methods to find related papers
111
- # all_paper_ids = ref.to_bibtex(bibtex_path) #todo: this will used to check if all citations are in this list
112
- #
113
- # print(f"The paper information has been initialized. References are saved to {bibtex_path}.")
114
- #
115
- # paper["title"] = title
116
- # paper["description"] = description
117
- # paper["references"] = ref.to_prompts() #todo: see if this prompts can be compressed.
118
- # paper["body"] = paper_body
119
- # paper["bibtex"] = bibtex_path
120
- #
121
- # print("Generating figures ...")
122
- # prompts = generate_experiments_prompts(paper)
123
- # gpt_response, usage = get_responses(prompts, model)
124
- # list_of_methods = list(extract_json(gpt_response))
125
- # log_usage(usage, "figures")
126
- # generate_random_figures(list_of_methods, save_to_path + "comparison.png")
127
- #
128
- # for section in ["introduction", "related works", "backgrounds", "methodology", "experiments", "conclusion", "abstract"]:
129
- # try:
130
- # usage = pipeline(paper, section, save_to_path, model=model)
131
- # log_usage(usage, section)
132
- # except Exception as e:
133
- # print(f"Failed to generate {section} due to the error: {e}")
134
- # print(f"The paper {title} has been generated. Saved to {save_to_path}.")
135
- # return make_archive(destination_folder, "output.zip")
136
- #
137
- # if __name__ == "__main__":
138
- # # title = "Training Adversarial Generative Neural Network with Adaptive Dropout Rate"
139
- # title = "Playing Atari Game with Deep Reinforcement Learning"
140
- # description = ""
141
- # template = "ICLR2022"
142
- # model = "gpt-4"
143
- # # model = "gpt-3.5-turbo"
144
- #
145
- # generate_draft(title, description, template, model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chainlit.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GPT赛博导师 (Cyber-Supervisor) 🚀🤖
2
+
3
+ 让ChatGPT为你的研究助力!提供研究课题,提供参考文献,帮助分析论文创新点.
4
+
5
+ ## 使用OpenAI API来运行这个项目
6
+ 1. 在环境变量中添加`OPENAI_API_KEY`.
7
+ 2. 默认模型使用`gpt-3.5-turbo-16k`. 可以通过修改环境变量中的`DEFAULT_MODEL`来进行修改.
8
+ 3. 在命令行中运行`chainlit run cyber-supervisor-openai.py`.
9
+
10
+ ## 基本原理
11
+ 目前提供了三个函数
12
+ 1. `find_research_directions`: 为你的研究课题寻找研究方向
13
+ 2. `find_references`: 为你的论文提供参考文献
14
+ 3. `judge_novelty`: 让赛博导师帮助分析你提出的想法的创新性
15
+ 基于OpenAI API的Function Call功能, ChatGPT会自主选择调用哪一个工具.
cyber-supervisor-langchain.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from tools import FindResearchDirectionsTool, JudgeNoveltyTool, FindReferencesTool
4
+ from langchain.chat_models import ChatOpenAI
5
+ from langchain.agents import initialize_agent
6
+ from langchain.agents import AgentType
7
+ import openai
8
+ from langchain.schema import SystemMessage
9
+ from langchain.memory import ConversationBufferMemory
10
+
11
+ openai.api_key = os.getenv("OPENAI_API_KEY")
12
+ print(os.getenv("OPENAI_API_KEY"))
13
+ default_model = os.getenv("DEFAULT_MODEL")
14
+ if default_model is None:
15
+ default_model = "gpt-3.5-turbo-16k"
16
+
17
+ import chainlit as cl
18
+
19
+ agent_kwargs = {
20
+ "system_message": SystemMessage(content="You are a mighty cyber professor. "
21
+ "Your task is to assist your student to find an idea of research including:"
22
+ "1. Search related references."
23
+ "2. Propose potential research directions."
24
+ "3. Evaluate the novelty of any research direction."
25
+ "Follow the following instructions: "
26
+ "1. You always response in the same language as your student."
27
+ "2. Ask your student for further information if necessary to provide more assistance. ")
28
+ }
29
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
30
+
31
+ @cl.langchain_factory(use_async=False)
32
+ def main():
33
+ tools = [FindResearchDirectionsTool(), JudgeNoveltyTool(), FindReferencesTool()]
34
+ llm = ChatOpenAI(temperature=0.9, model=default_model, streaming=True)
35
+ open_ai_agent = initialize_agent(tools,
36
+ llm,
37
+ agent=AgentType.OPENAI_FUNCTIONS,
38
+ verbose=True,
39
+ agent_kwargs=agent_kwargs,
40
+ memory=memory)
41
+ return open_ai_agent
42
+
43
+
44
+ @cl.langchain_run
45
+ async def run(agent, input_str):
46
+ res = await cl.make_async(agent)(input_str, callbacks=[cl.LangchainCallbackHandler()])
47
+ print(res)
48
+ await cl.Message(content=res["output"]).send()
cyber-supervisor-openai.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import openai
3
+ import ast
4
+ from tools import functions, TOOLS
5
+
6
+ MAX_ITER = 5
7
+
8
+ openai.api_key = os.getenv("OPENAI_API_KEY")
9
+ default_model = os.getenv("DEFAULT_MODEL")
10
+ if default_model is None:
11
+ default_model = "gpt-3.5-turbo-16k"
12
+
13
+ import chainlit as cl
14
+
15
+ async def process_new_delta(new_delta, openai_message, content_ui_message, function_ui_message):
16
+ if "role" in new_delta:
17
+ openai_message["role"] = new_delta["role"]
18
+ if "content" in new_delta:
19
+ new_content = new_delta.get("content") or ""
20
+ openai_message["content"] += new_content
21
+ await content_ui_message.stream_token(new_content)
22
+ if "function_call" in new_delta:
23
+ if "name" in new_delta["function_call"]:
24
+ openai_message["function_call"] = {
25
+ "name": new_delta["function_call"]["name"]}
26
+ await content_ui_message.send()
27
+ function_ui_message = cl.Message(
28
+ author=new_delta["function_call"]["name"],
29
+ content="", indent=1, language="json")
30
+ await function_ui_message.stream_token(new_delta["function_call"]["name"])
31
+
32
+ if "arguments" in new_delta["function_call"]:
33
+ if "arguments" not in openai_message["function_call"]:
34
+ openai_message["function_call"]["arguments"] = ""
35
+ openai_message["function_call"]["arguments"] += new_delta["function_call"]["arguments"]
36
+ await function_ui_message.stream_token(new_delta["function_call"]["arguments"])
37
+ return openai_message, content_ui_message, function_ui_message
38
+
39
+
40
+ system_message = "You are a mighty cyber professor. Follow the following instructions: " \
41
+ "1. You always response in the same language as your student." \
42
+ "2. Ask your student for further information if necessary to provide more assistance. " \
43
+ "3. If your student asks you to do something out of your responsibility, please say no. "
44
+
45
+ @cl.on_chat_start
46
+ def start_chat():
47
+ cl.user_session.set(
48
+ "message_history",
49
+ [{"role": "system", "content": system_message}],
50
+ )
51
+
52
+
53
+ @cl.on_message
54
+ async def run_conversation(user_message: str):
55
+ message_history = cl.user_session.get("message_history")
56
+ message_history.append({"role": "user", "content": user_message})
57
+
58
+ cur_iter = 0
59
+
60
+ while cur_iter < MAX_ITER:
61
+ # OpenAI call
62
+ openai_message = {"role": "", "content": ""}
63
+ function_ui_message = None
64
+ content_ui_message = cl.Message(content="")
65
+ async for stream_resp in await openai.ChatCompletion.acreate(
66
+ model=default_model,
67
+ messages=message_history,
68
+ stream=True,
69
+ function_call="auto",
70
+ functions=functions,
71
+ temperature=0.9
72
+ ):
73
+
74
+ new_delta = stream_resp.choices[0]["delta"]
75
+ openai_message, content_ui_message, function_ui_message = await process_new_delta(
76
+ new_delta, openai_message, content_ui_message, function_ui_message)
77
+
78
+ message_history.append(openai_message)
79
+ if function_ui_message is not None:
80
+ await function_ui_message.send()
81
+
82
+ if stream_resp.choices[0]["finish_reason"] == "stop":
83
+ break
84
+
85
+ elif stream_resp.choices[0]["finish_reason"] != "function_call":
86
+ raise ValueError(stream_resp.choices[0]["finish_reason"])
87
+
88
+ # if code arrives here, it means there is a function call
89
+ function_name = openai_message.get("function_call").get("name")
90
+ arguments = ast.literal_eval(
91
+ openai_message.get("function_call").get("arguments"))
92
+
93
+ if function_name == "find_research_directions":
94
+ function_response = TOOLS[function_name](
95
+ research_field=arguments.get("research_description"),
96
+ )
97
+ else:
98
+ function_response = TOOLS[function_name](
99
+ title=arguments.get("title"),
100
+ contributions=arguments.get("contributions"),
101
+ )
102
+ message_history.append(
103
+ {
104
+ "role": "function",
105
+ "name": function_name,
106
+ "content": f"{function_response}",
107
+ }
108
+ )
109
+
110
+ await cl.Message(
111
+ author=function_name,
112
+ content=str(function_response),
113
+ language='json',
114
+ indent=1,
115
+ ).send()
116
+ cur_iter += 1
initialization.py DELETED
@@ -1,7 +0,0 @@
1
- """Generate necessary components of prompts. """
2
- from utils.prompts import SYSTEM
3
-
4
- def get_keywords(model, title):
5
- pass
6
-
7
-
 
 
 
 
 
 
 
 
kdb_test.py CHANGED
@@ -7,14 +7,27 @@ import os
7
  import json
8
  from models import EMBEDDINGS
9
 
10
- HF_TOKEN = os.getenv("HF_TOKEN")
11
- REPO_ID = os.getenv("KDB_REPO")
12
 
13
- snapshot_download(REPO_ID, repo_type="dataset", local_dir="knowledge_databases/",
14
- local_dir_use_symlinks=False, token=HF_TOKEN)
 
 
 
15
  ALL_KDB = ["(None)"] + list_folders("knowledge_databases")
16
 
 
 
17
 
 
 
 
 
 
 
 
 
 
18
 
19
  def query_from_kdb(input, kdb, query_counts):
20
  if kdb == "(None)":
@@ -37,25 +50,36 @@ def query_from_kdb(input, kdb, query_counts):
37
  raise RuntimeError(f"Failed to query from FAISS.")
38
  return domain_knowledge, ""
39
 
40
- ANNOUNCEMENT = """"""
41
-
42
  with gr.Blocks() as demo:
43
- gr.HTML(ANNOUNCEMENT)
44
  with gr.Row():
45
  with gr.Column():
46
- kdb_dropdown = gr.Dropdown(choices=ALL_KDB, value="(None)")
47
- user_input = gr.Textbox(label="Input")
48
- button_retrieval = gr.Button("Query", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  with gr.Accordion("Advanced Setting", open=False):
51
- query_counts_slider = gr.Slider(minimum=1, maximum=20, value=10, step=1,
52
- interactive=True, label="QUERY_COUNTS",
53
- info="从知识库内检索多少条内容")
54
 
55
  retrieval_output = gr.JSON(label="Output")
56
 
57
-
58
  button_retrieval.click(fn=query_from_kdb, inputs=[user_input, kdb_dropdown, query_counts_slider], outputs=[retrieval_output, user_input])
 
59
  demo.queue(concurrency_count=1, max_size=5, api_open=False)
60
  demo.launch(show_error=True)
61
 
 
7
  import json
8
  from models import EMBEDDINGS
9
 
10
+ # todo: 功能还没做
 
11
 
12
+ HF_TOKEN = None # os.getenv("HF_TOKEN")
13
+ REPO_ID = None # os.getenv("KDB_REPO")
14
+ if HF_TOKEN is not None and REPO_ID is not None:
15
+ snapshot_download(REPO_ID, repo_type="dataset", local_dir="knowledge_databases/",
16
+ local_dir_use_symlinks=False, token=HF_TOKEN)
17
  ALL_KDB = ["(None)"] + list_folders("knowledge_databases")
18
 
19
+ ANNOUNCEMENT = """
20
+ # Evaluate the quality of retrieved date from the FAISS database
21
 
22
+ Use this space test the performance of some pre-constructed vector databases hosted at `shaocongma/kdb`. To use this space for your own FAISS database, follow this instruction:
23
+ 1. Duplicate this space.
24
+ 2. Add the secret key `HF_TOKEN` with your own Huggingface User Access Token.
25
+ 3. Create a Huggingface Dataset. Put your FAISS database to it.
26
+ 4. Add the secret key `REPO_ID` as your dataset's address.
27
+ """
28
+ AUTODRAFT = """
29
+ AutoDraft is a GPT-based project to generate an academic paper using the title and contributions. When generating specific sections, AutoDraft will query some necessary backgrounds in related fields from the pre-constructed vector database.
30
+ """
31
 
32
  def query_from_kdb(input, kdb, query_counts):
33
  if kdb == "(None)":
 
50
  raise RuntimeError(f"Failed to query from FAISS.")
51
  return domain_knowledge, ""
52
 
 
 
53
  with gr.Blocks() as demo:
 
54
  with gr.Row():
55
  with gr.Column():
56
+ gr.Markdown(ANNOUNCEMENT)
57
+
58
+ kdb_dropdown = gr.Dropdown(choices=ALL_KDB, value="(None)", label="Knowledge Databases",
59
+ info="Pre-defined knowledge databases utilized to aid in the generation of academic writing content. "
60
+ "Hosted at `shaocongma/kdb`.")
61
+ with gr.Tab("User's Input"):
62
+ user_input = gr.Textbox(label="Input", info="Input anything you like to test what will be retrived from the vector database.")
63
+ with gr.Row():
64
+ button_clear = gr.Button("Clear")
65
+ button_retrieval = gr.Button("Retrieve", variant="primary")
66
+ with gr.Tab("AutoDraft"):
67
+ gr.Markdown(AUTODRAFT)
68
+ title_input = gr.Textbox(label="Title")
69
+ contribution_input = gr.Textbox(label="Contributions", lines=5)
70
+ with gr.Row():
71
+ button_clear_2 = gr.Button("Clear")
72
+ button_retrieval_2 = gr.Button("Retrieve", variant="primary")
73
 
74
  with gr.Accordion("Advanced Setting", open=False):
75
+ query_counts_slider = gr.Slider(minimum=1, maximum=50, value=10, step=1,
76
+ interactive=True, label="QUERY_COUNTS",
77
+ info="How many contents will be retrieved from the vector database.")
78
 
79
  retrieval_output = gr.JSON(label="Output")
80
 
 
81
  button_retrieval.click(fn=query_from_kdb, inputs=[user_input, kdb_dropdown, query_counts_slider], outputs=[retrieval_output, user_input])
82
+
83
  demo.queue(concurrency_count=1, max_size=5, api_open=False)
84
  demo.launch(show_error=True)
85
 
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
tools.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import openai
3
+ from utils.references import References
4
+ from utils.gpt_interaction import GPTModel
5
+ from utils.prompts import SYSTEM
6
+ from langchain.tools import BaseTool
7
+ from pydantic import BaseModel, Field
8
+ from typing import Optional, Type
9
+
10
+ MAX_TOKENS = 2048
11
+
12
+ openai.api_key = os.getenv("OPENAI_API_KEY")
13
+ default_model = os.getenv("DEFAULT_MODEL")
14
+ if default_model is None:
15
+ default_model = "gpt-3.5-turbo-16k"
16
+ llm = GPTModel(model=default_model, delay=1)
17
+
18
+ paper_system_prompt = '''You are an assistant designed to propose choices of research direction.
19
+ The user will input questions or some keywords of a fields. You need to generate some paper titles and main contributions. Ensure follow the following instructions:
20
+ Instruction:
21
+ - Your response should follow the JSON format.
22
+ - Your response should have the following structure:
23
+ {
24
+ "your suggested paper title":
25
+ {
26
+ "summary": "an overview introducing what this paper will include",
27
+ "contributions": {
28
+ "contribution1": {"statement": "briefly describe this contribution", "reason": "reason why this contribution can make this paper outstanding"},
29
+ "contribution2": {"statement": "briefly describe this contribution", "reason": "reason why this contribution can make this paper outstanding"},
30
+ ...
31
+ }
32
+ }
33
+ "your suggested paper title":
34
+ {
35
+ "summary": "an overview introducing what this paper will include",
36
+ "contributions": {
37
+ "contribution1": {"statement": "briefly describe this contribution", "reason": "reason why this contribution can make this paper outstanding"},
38
+ "contribution2": {"statement": "briefly describe this contribution", "reason": "reason why this contribution can make this paper outstanding"},
39
+ ...
40
+ }
41
+ }
42
+ ...
43
+ }
44
+ - Please list three to five suggested title and at least three contributions for each paper.
45
+ '''
46
+
47
+
48
+ contribution_system_prompt = '''You are an assistant designed to criticize the contributions of a paper. You will be provided Paper's Title, References and Contributions. Ensure follow the following instructions:
49
+ Instruction:
50
+ - Your response should follow the JSON format.
51
+ - Your response should have the following structure:
52
+ {
53
+ "title": "the title provided by the user",
54
+ "comment": "your thoughts on if this title clearly reflects the key ideas of this paper and explain why"
55
+ "contributions": {
56
+ "contribution1": {"statement": "briefly describe what the contribution is",
57
+ "reason": "reason why the user claims it is a contribution",
58
+ "judge": "your thought about if this is a novel contribution and explain why",
59
+ "suggestion": "your suggestion on how to modify the research direction to enhance the novelty "},
60
+ "contribution2": {"statement": "briefly describe what the contribution is",
61
+ "reason": "reason why the user claims it is a contribution",
62
+ "judge": "your thought about if this is a novel contribution and explain why",
63
+ "suggestion": "your suggestion on how to modify the research direction to enhance the novelty "},
64
+ ...
65
+ }
66
+ }
67
+ - You need to carefully check if the claimed contribution has been made in the provided references, which makes the contribution not novel.
68
+ - You also need to propose your concerns on if any of contributions could be incremental or just a mild modification on an existing work.
69
+ '''
70
+
71
+
72
+ def find_research_directions(research_field):
73
+ output, _ = llm(systems=paper_system_prompt, prompts=research_field, return_json=False)
74
+ return output
75
+
76
+ def find_references(title, contributions):
77
+ max_tokens = MAX_TOKENS
78
+ ref = References(title=title, description=f"{contributions}")
79
+ keywords, _ = llm(systems=SYSTEM["keywords"], prompts=title, return_json=True)
80
+ keywords = {keyword: 10 for keyword in keywords}
81
+ ref.collect_papers(keywords)
82
+ ref_prompt = ref.to_prompts(max_tokens=max_tokens)
83
+ return ref_prompt
84
+
85
+
86
+ def judge_novelty(title, contributions):
87
+ max_tokens = MAX_TOKENS
88
+ ref = References(title=title, description=f"{contributions}")
89
+ keywords, _ = llm(systems=SYSTEM["keywords"], prompts=title, return_json=True)
90
+ keywords = {keyword: 10 for keyword in keywords}
91
+ ref.collect_papers(keywords)
92
+ ref_prompt = ref.to_prompts(max_tokens=max_tokens)
93
+ prompt = f"Title: {title}\n References: {ref_prompt}\n Contributions: {contributions}"
94
+ output, _ = llm(systems=contribution_system_prompt, prompts=prompt, return_json=False)
95
+ return output
96
+
97
+
98
+ functions = [
99
+ {
100
+ "name": "find_research_directions",
101
+ "description": "when your student has already shown interests in a specific topic and provided a rough description of potential contributions, help your student to dive this direction deeper",
102
+ "parameters": {
103
+ "type": "object",
104
+ "properties": {
105
+ "research_description": {
106
+ "type": "string",
107
+ "description": "a paragraph with details in English describing "
108
+ "(1) what is the main problem you are trying to solve "
109
+ "(2) what is the main novelty of this idea (3) how to complete this research."
110
+ }
111
+ },
112
+ "required": ["research_description"],
113
+ },
114
+ },
115
+ {
116
+ "name": "find_references",
117
+ "description": "find references for given details of a paper",
118
+ "parameters": {
119
+ "type": "object",
120
+ "properties": {
121
+ "title": {
122
+ "type": "string",
123
+ "description": "the title (in English) of the academic paper your student will write.",
124
+ },
125
+ "contributions": {"type": "string",
126
+ "description": "a general description on the contributions of this paper in English."
127
+ "If there are multiple contributions, index them with numbers."},
128
+ },
129
+ "required": ["title", "contributions"],
130
+ },
131
+ },
132
+ {
133
+ "name": "judge_novelty",
134
+ "description": "evaluate the novelty of a paper given its title and main contributions",
135
+ "parameters": {
136
+ "type": "object",
137
+ "properties": {
138
+ "title": {
139
+ "type": "string",
140
+ "description": "the title (in English) of the academic paper your student will write.",
141
+ },
142
+ "contributions": {"type": "string",
143
+ "description": "a general description on the contributions of this paper in English."
144
+ "If there are multiple contributions, index them with numbers."},
145
+ },
146
+ "required": ["title", "contributions"],
147
+ },
148
+ }
149
+ ]
150
+
151
+ TOOLS = {"find_research_directions": find_research_directions, "find_references": find_references, "judge_novelty": judge_novelty}
152
+
153
+ class FindResearchDirectionsCheckInput(BaseModel):
154
+ research_description: str = Field(..., description="a paragraph with details in English describing (1) what is the main problem you are trying to solve "
155
+ "(2) what is the main novelty of this idea (3) how to complete this research.")
156
+
157
+ class TitleDescriptionCheckInput(BaseModel):
158
+ title: str = Field(..., description="the title of the academic paper your student will write in English.")
159
+ contributions: str = Field(..., description="a general description on the contributions of this paper in English."
160
+ "If there are multiple contributions, index them with numbers.")
161
+
162
+
163
+ class FindResearchDirectionsTool(BaseTool):
164
+ name = "find_research_directions"
165
+ description = """Useful when your student has already shown interests in a specific topic and provided a rough description of
166
+ potential contributions and you need to help your student to dive this direction deeper for your student.
167
+
168
+ """
169
+ def _run(self, research_description: str):
170
+ response = find_research_directions(research_description)
171
+ return response
172
+
173
+ def _arun(self, research_field: str):
174
+ raise NotImplementedError("This tool does not support async")
175
+
176
+ args_schema: Optional[Type[BaseModel]] = FindResearchDirectionsCheckInput
177
+
178
+
179
+ class JudgeNoveltyTool(BaseTool):
180
+ name = "judge_novelty"
181
+ description = """Useful when you need to evaluate the novelty of your student's idea.
182
+
183
+ """
184
+ def _run(self, title: str, contributions: str):
185
+ response = judge_novelty(title, contributions)
186
+ return response
187
+
188
+ def _arun(self, title: str, contributions: str):
189
+ raise NotImplementedError("This tool does not support async")
190
+
191
+ args_schema: Optional[Type[BaseModel]] = TitleDescriptionCheckInput
192
+
193
+ class FindReferencesTool(BaseTool):
194
+ name = "find_references"
195
+ description = """Useful when you need to find references for a paper.
196
+
197
+ """
198
+ def _run(self, title: str, contributions: str):
199
+ response = find_references(title, contributions)
200
+ return response
201
+
202
+ def _arun(self, title: str, contributions: str):
203
+ raise NotImplementedError("This tool does not support async")
204
+
205
+ args_schema: Optional[Type[BaseModel]] = TitleDescriptionCheckInput
206
+