Spaces:
Running
Running
shaocongma
commited on
Commit
•
d165b85
1
Parent(s):
0bf0857
add new functions.
Browse files- Dockerfile +11 -0
- auto_draft.py +0 -145
- chainlit.md +15 -0
- cyber-supervisor-langchain.py +48 -0
- cyber-supervisor-openai.py +116 -0
- initialization.py +0 -7
- kdb_test.py +38 -14
- requirements.txt +0 -0
- tools.py +206 -0
Dockerfile
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10.10
|
2 |
+
RUN useradd -m -u 1000 user
|
3 |
+
USER user
|
4 |
+
ENV HOME=/home/user \
|
5 |
+
PATH=/home/user/.local/bin:$PATH
|
6 |
+
WORKDIR $HOME/app
|
7 |
+
COPY --chown=user . $HOME/app
|
8 |
+
COPY ./requirements.txt ~/app/requirements.txt
|
9 |
+
RUN pip install -r requirements.txt
|
10 |
+
COPY . .
|
11 |
+
CMD ["chainlit", "run", "cyber-supervisor-openai.py", "--port", "7860"]
|
auto_draft.py
DELETED
@@ -1,145 +0,0 @@
|
|
1 |
-
# from utils.references import References
|
2 |
-
# from utils.prompts import generate_paper_prompts, generate_keywords_prompts, generate_experiments_prompts
|
3 |
-
# from utils.gpt_interaction import get_responses, extract_responses, extract_keywords, extract_json
|
4 |
-
# from utils.tex_processing import replace_title
|
5 |
-
# from utils.figures import generate_random_figures
|
6 |
-
# import datetime
|
7 |
-
# import shutil
|
8 |
-
# import time
|
9 |
-
# import logging
|
10 |
-
# import os
|
11 |
-
#
|
12 |
-
# TOTAL_TOKENS = 0
|
13 |
-
# TOTAL_PROMPTS_TOKENS = 0
|
14 |
-
# TOTAL_COMPLETION_TOKENS = 0
|
15 |
-
#
|
16 |
-
# def make_archive(source, destination):
|
17 |
-
# base = os.path.basename(destination)
|
18 |
-
# name = base.split('.')[0]
|
19 |
-
# format = base.split('.')[1]
|
20 |
-
# archive_from = os.path.dirname(source)
|
21 |
-
# archive_to = os.path.basename(source.strip(os.sep))
|
22 |
-
# shutil.make_archive(name, format, archive_from, archive_to)
|
23 |
-
# shutil.move('%s.%s'%(name,format), destination)
|
24 |
-
# return destination
|
25 |
-
#
|
26 |
-
#
|
27 |
-
# def log_usage(usage, generating_target, print_out=True):
|
28 |
-
# global TOTAL_TOKENS
|
29 |
-
# global TOTAL_PROMPTS_TOKENS
|
30 |
-
# global TOTAL_COMPLETION_TOKENS
|
31 |
-
#
|
32 |
-
# prompts_tokens = usage['prompt_tokens']
|
33 |
-
# completion_tokens = usage['completion_tokens']
|
34 |
-
# total_tokens = usage['total_tokens']
|
35 |
-
#
|
36 |
-
# TOTAL_TOKENS += total_tokens
|
37 |
-
# TOTAL_PROMPTS_TOKENS += prompts_tokens
|
38 |
-
# TOTAL_COMPLETION_TOKENS += completion_tokens
|
39 |
-
#
|
40 |
-
# message = f"For generating {generating_target}, {total_tokens} tokens have been used ({prompts_tokens} for prompts; {completion_tokens} for completion). " \
|
41 |
-
# f"{TOTAL_TOKENS} tokens have been used in total."
|
42 |
-
# if print_out:
|
43 |
-
# print(message)
|
44 |
-
# logging.info(message)
|
45 |
-
#
|
46 |
-
# def pipeline(paper, section, save_to_path, model):
|
47 |
-
# """
|
48 |
-
# The main pipeline of generating a section.
|
49 |
-
# 1. Generate prompts.
|
50 |
-
# 2. Get responses from AI assistant.
|
51 |
-
# 3. Extract the section text.
|
52 |
-
# 4. Save the text to .tex file.
|
53 |
-
# :return usage
|
54 |
-
# """
|
55 |
-
# print(f"Generating {section}...")
|
56 |
-
# prompts = generate_paper_prompts(paper, section)
|
57 |
-
# gpt_response, usage = get_responses(prompts, model)
|
58 |
-
# output = extract_responses(gpt_response)
|
59 |
-
# paper["body"][section] = output
|
60 |
-
# tex_file = save_to_path + f"{section}.tex"
|
61 |
-
# if section == "abstract":
|
62 |
-
# with open(tex_file, "w") as f:
|
63 |
-
# f.write(r"\begin{abstract}")
|
64 |
-
# with open(tex_file, "a") as f:
|
65 |
-
# f.write(output)
|
66 |
-
# with open(tex_file, "a") as f:
|
67 |
-
# f.write(r"\end{abstract}")
|
68 |
-
# else:
|
69 |
-
# with open(tex_file, "w") as f:
|
70 |
-
# f.write(f"\section{{{section}}}\n")
|
71 |
-
# with open(tex_file, "a") as f:
|
72 |
-
# f.write(output)
|
73 |
-
# time.sleep(5)
|
74 |
-
# print(f"{section} has been generated. Saved to {tex_file}.")
|
75 |
-
# return usage
|
76 |
-
#
|
77 |
-
#
|
78 |
-
#
|
79 |
-
# def generate_draft(title, description="", template="ICLR2022", model="gpt-4"):
|
80 |
-
# """
|
81 |
-
# The main pipeline of generating a paper.
|
82 |
-
# 1. Copy everything to the output folder.
|
83 |
-
# 2. Create references.
|
84 |
-
# 3. Generate each section using `pipeline`.
|
85 |
-
# 4. Post-processing: check common errors, fill the title, ...
|
86 |
-
# """
|
87 |
-
# paper = {}
|
88 |
-
# paper_body = {}
|
89 |
-
#
|
90 |
-
# # Create a copy in the outputs folder.
|
91 |
-
# # todo: use copy_templates function instead.
|
92 |
-
# now = datetime.datetime.now()
|
93 |
-
# target_name = now.strftime("outputs_%Y%m%d_%H%M%S")
|
94 |
-
# source_folder = f"latex_templates/{template}"
|
95 |
-
# destination_folder = f"outputs/{target_name}"
|
96 |
-
# shutil.copytree(source_folder, destination_folder)
|
97 |
-
#
|
98 |
-
# bibtex_path = destination_folder + "/ref.bib"
|
99 |
-
# save_to_path = destination_folder +"/"
|
100 |
-
# replace_title(save_to_path, title)
|
101 |
-
# logging.basicConfig( level=logging.INFO, filename=save_to_path+"generation.log")
|
102 |
-
#
|
103 |
-
# # Generate keywords and references
|
104 |
-
# print("Initialize the paper information ...")
|
105 |
-
# prompts = generate_keywords_prompts(title, description)
|
106 |
-
# gpt_response, usage = get_responses(prompts, model)
|
107 |
-
# keywords = extract_keywords(gpt_response)
|
108 |
-
# log_usage(usage, "keywords")
|
109 |
-
# ref = References(load_papers = "") #todo: allow users to upload bibfile.
|
110 |
-
# ref.collect_papers(keywords, method="arxiv") #todo: add more methods to find related papers
|
111 |
-
# all_paper_ids = ref.to_bibtex(bibtex_path) #todo: this will used to check if all citations are in this list
|
112 |
-
#
|
113 |
-
# print(f"The paper information has been initialized. References are saved to {bibtex_path}.")
|
114 |
-
#
|
115 |
-
# paper["title"] = title
|
116 |
-
# paper["description"] = description
|
117 |
-
# paper["references"] = ref.to_prompts() #todo: see if this prompts can be compressed.
|
118 |
-
# paper["body"] = paper_body
|
119 |
-
# paper["bibtex"] = bibtex_path
|
120 |
-
#
|
121 |
-
# print("Generating figures ...")
|
122 |
-
# prompts = generate_experiments_prompts(paper)
|
123 |
-
# gpt_response, usage = get_responses(prompts, model)
|
124 |
-
# list_of_methods = list(extract_json(gpt_response))
|
125 |
-
# log_usage(usage, "figures")
|
126 |
-
# generate_random_figures(list_of_methods, save_to_path + "comparison.png")
|
127 |
-
#
|
128 |
-
# for section in ["introduction", "related works", "backgrounds", "methodology", "experiments", "conclusion", "abstract"]:
|
129 |
-
# try:
|
130 |
-
# usage = pipeline(paper, section, save_to_path, model=model)
|
131 |
-
# log_usage(usage, section)
|
132 |
-
# except Exception as e:
|
133 |
-
# print(f"Failed to generate {section} due to the error: {e}")
|
134 |
-
# print(f"The paper {title} has been generated. Saved to {save_to_path}.")
|
135 |
-
# return make_archive(destination_folder, "output.zip")
|
136 |
-
#
|
137 |
-
# if __name__ == "__main__":
|
138 |
-
# # title = "Training Adversarial Generative Neural Network with Adaptive Dropout Rate"
|
139 |
-
# title = "Playing Atari Game with Deep Reinforcement Learning"
|
140 |
-
# description = ""
|
141 |
-
# template = "ICLR2022"
|
142 |
-
# model = "gpt-4"
|
143 |
-
# # model = "gpt-3.5-turbo"
|
144 |
-
#
|
145 |
-
# generate_draft(title, description, template, model)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
chainlit.md
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# GPT赛博导师 (Cyber-Supervisor) 🚀🤖
|
2 |
+
|
3 |
+
让ChatGPT为你的研究助力!提供研究课题,提供参考文献,帮助分析论文创新点.
|
4 |
+
|
5 |
+
## 使用OpenAI API来运行这个项目
|
6 |
+
1. 在环境变量中添加`OPENAI_API_KEY`.
|
7 |
+
2. 默认模型使用`gpt-3.5-turbo-16k`. 可以通过修改环境变量中的`DEFAULT_MODEL`来进行修改.
|
8 |
+
3. 在命令行中运行`chainlit run cyber-supervisor-openai.py`.
|
9 |
+
|
10 |
+
## 基本原理
|
11 |
+
目前提供了三个函数
|
12 |
+
1. `find_research_directions`: 为你的研究课题寻找研究方向
|
13 |
+
2. `find_references`: 为你的论文提供参考文献
|
14 |
+
3. `judge_novelty`: 让赛博导师帮助分析你提出的想法的创新性
|
15 |
+
基于OpenAI API的Function Call功能, ChatGPT会自主选择调用哪一个工具.
|
cyber-supervisor-langchain.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from tools import FindResearchDirectionsTool, JudgeNoveltyTool, FindReferencesTool
|
4 |
+
from langchain.chat_models import ChatOpenAI
|
5 |
+
from langchain.agents import initialize_agent
|
6 |
+
from langchain.agents import AgentType
|
7 |
+
import openai
|
8 |
+
from langchain.schema import SystemMessage
|
9 |
+
from langchain.memory import ConversationBufferMemory
|
10 |
+
|
11 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
12 |
+
print(os.getenv("OPENAI_API_KEY"))
|
13 |
+
default_model = os.getenv("DEFAULT_MODEL")
|
14 |
+
if default_model is None:
|
15 |
+
default_model = "gpt-3.5-turbo-16k"
|
16 |
+
|
17 |
+
import chainlit as cl
|
18 |
+
|
19 |
+
agent_kwargs = {
|
20 |
+
"system_message": SystemMessage(content="You are a mighty cyber professor. "
|
21 |
+
"Your task is to assist your student to find an idea of research including:"
|
22 |
+
"1. Search related references."
|
23 |
+
"2. Propose potential research directions."
|
24 |
+
"3. Evaluate the novelty of any research direction."
|
25 |
+
"Follow the following instructions: "
|
26 |
+
"1. You always response in the same language as your student."
|
27 |
+
"2. Ask your student for further information if necessary to provide more assistance. ")
|
28 |
+
}
|
29 |
+
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
30 |
+
|
31 |
+
@cl.langchain_factory(use_async=False)
|
32 |
+
def main():
|
33 |
+
tools = [FindResearchDirectionsTool(), JudgeNoveltyTool(), FindReferencesTool()]
|
34 |
+
llm = ChatOpenAI(temperature=0.9, model=default_model, streaming=True)
|
35 |
+
open_ai_agent = initialize_agent(tools,
|
36 |
+
llm,
|
37 |
+
agent=AgentType.OPENAI_FUNCTIONS,
|
38 |
+
verbose=True,
|
39 |
+
agent_kwargs=agent_kwargs,
|
40 |
+
memory=memory)
|
41 |
+
return open_ai_agent
|
42 |
+
|
43 |
+
|
44 |
+
@cl.langchain_run
|
45 |
+
async def run(agent, input_str):
|
46 |
+
res = await cl.make_async(agent)(input_str, callbacks=[cl.LangchainCallbackHandler()])
|
47 |
+
print(res)
|
48 |
+
await cl.Message(content=res["output"]).send()
|
cyber-supervisor-openai.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import openai
|
3 |
+
import ast
|
4 |
+
from tools import functions, TOOLS
|
5 |
+
|
6 |
+
MAX_ITER = 5
|
7 |
+
|
8 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
9 |
+
default_model = os.getenv("DEFAULT_MODEL")
|
10 |
+
if default_model is None:
|
11 |
+
default_model = "gpt-3.5-turbo-16k"
|
12 |
+
|
13 |
+
import chainlit as cl
|
14 |
+
|
15 |
+
async def process_new_delta(new_delta, openai_message, content_ui_message, function_ui_message):
|
16 |
+
if "role" in new_delta:
|
17 |
+
openai_message["role"] = new_delta["role"]
|
18 |
+
if "content" in new_delta:
|
19 |
+
new_content = new_delta.get("content") or ""
|
20 |
+
openai_message["content"] += new_content
|
21 |
+
await content_ui_message.stream_token(new_content)
|
22 |
+
if "function_call" in new_delta:
|
23 |
+
if "name" in new_delta["function_call"]:
|
24 |
+
openai_message["function_call"] = {
|
25 |
+
"name": new_delta["function_call"]["name"]}
|
26 |
+
await content_ui_message.send()
|
27 |
+
function_ui_message = cl.Message(
|
28 |
+
author=new_delta["function_call"]["name"],
|
29 |
+
content="", indent=1, language="json")
|
30 |
+
await function_ui_message.stream_token(new_delta["function_call"]["name"])
|
31 |
+
|
32 |
+
if "arguments" in new_delta["function_call"]:
|
33 |
+
if "arguments" not in openai_message["function_call"]:
|
34 |
+
openai_message["function_call"]["arguments"] = ""
|
35 |
+
openai_message["function_call"]["arguments"] += new_delta["function_call"]["arguments"]
|
36 |
+
await function_ui_message.stream_token(new_delta["function_call"]["arguments"])
|
37 |
+
return openai_message, content_ui_message, function_ui_message
|
38 |
+
|
39 |
+
|
40 |
+
system_message = "You are a mighty cyber professor. Follow the following instructions: " \
|
41 |
+
"1. You always response in the same language as your student." \
|
42 |
+
"2. Ask your student for further information if necessary to provide more assistance. " \
|
43 |
+
"3. If your student asks you to do something out of your responsibility, please say no. "
|
44 |
+
|
45 |
+
@cl.on_chat_start
|
46 |
+
def start_chat():
|
47 |
+
cl.user_session.set(
|
48 |
+
"message_history",
|
49 |
+
[{"role": "system", "content": system_message}],
|
50 |
+
)
|
51 |
+
|
52 |
+
|
53 |
+
@cl.on_message
|
54 |
+
async def run_conversation(user_message: str):
|
55 |
+
message_history = cl.user_session.get("message_history")
|
56 |
+
message_history.append({"role": "user", "content": user_message})
|
57 |
+
|
58 |
+
cur_iter = 0
|
59 |
+
|
60 |
+
while cur_iter < MAX_ITER:
|
61 |
+
# OpenAI call
|
62 |
+
openai_message = {"role": "", "content": ""}
|
63 |
+
function_ui_message = None
|
64 |
+
content_ui_message = cl.Message(content="")
|
65 |
+
async for stream_resp in await openai.ChatCompletion.acreate(
|
66 |
+
model=default_model,
|
67 |
+
messages=message_history,
|
68 |
+
stream=True,
|
69 |
+
function_call="auto",
|
70 |
+
functions=functions,
|
71 |
+
temperature=0.9
|
72 |
+
):
|
73 |
+
|
74 |
+
new_delta = stream_resp.choices[0]["delta"]
|
75 |
+
openai_message, content_ui_message, function_ui_message = await process_new_delta(
|
76 |
+
new_delta, openai_message, content_ui_message, function_ui_message)
|
77 |
+
|
78 |
+
message_history.append(openai_message)
|
79 |
+
if function_ui_message is not None:
|
80 |
+
await function_ui_message.send()
|
81 |
+
|
82 |
+
if stream_resp.choices[0]["finish_reason"] == "stop":
|
83 |
+
break
|
84 |
+
|
85 |
+
elif stream_resp.choices[0]["finish_reason"] != "function_call":
|
86 |
+
raise ValueError(stream_resp.choices[0]["finish_reason"])
|
87 |
+
|
88 |
+
# if code arrives here, it means there is a function call
|
89 |
+
function_name = openai_message.get("function_call").get("name")
|
90 |
+
arguments = ast.literal_eval(
|
91 |
+
openai_message.get("function_call").get("arguments"))
|
92 |
+
|
93 |
+
if function_name == "find_research_directions":
|
94 |
+
function_response = TOOLS[function_name](
|
95 |
+
research_field=arguments.get("research_description"),
|
96 |
+
)
|
97 |
+
else:
|
98 |
+
function_response = TOOLS[function_name](
|
99 |
+
title=arguments.get("title"),
|
100 |
+
contributions=arguments.get("contributions"),
|
101 |
+
)
|
102 |
+
message_history.append(
|
103 |
+
{
|
104 |
+
"role": "function",
|
105 |
+
"name": function_name,
|
106 |
+
"content": f"{function_response}",
|
107 |
+
}
|
108 |
+
)
|
109 |
+
|
110 |
+
await cl.Message(
|
111 |
+
author=function_name,
|
112 |
+
content=str(function_response),
|
113 |
+
language='json',
|
114 |
+
indent=1,
|
115 |
+
).send()
|
116 |
+
cur_iter += 1
|
initialization.py
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
"""Generate necessary components of prompts. """
|
2 |
-
from utils.prompts import SYSTEM
|
3 |
-
|
4 |
-
def get_keywords(model, title):
|
5 |
-
pass
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
kdb_test.py
CHANGED
@@ -7,14 +7,27 @@ import os
|
|
7 |
import json
|
8 |
from models import EMBEDDINGS
|
9 |
|
10 |
-
|
11 |
-
REPO_ID = os.getenv("KDB_REPO")
|
12 |
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
15 |
ALL_KDB = ["(None)"] + list_folders("knowledge_databases")
|
16 |
|
|
|
|
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
def query_from_kdb(input, kdb, query_counts):
|
20 |
if kdb == "(None)":
|
@@ -37,25 +50,36 @@ def query_from_kdb(input, kdb, query_counts):
|
|
37 |
raise RuntimeError(f"Failed to query from FAISS.")
|
38 |
return domain_knowledge, ""
|
39 |
|
40 |
-
ANNOUNCEMENT = """"""
|
41 |
-
|
42 |
with gr.Blocks() as demo:
|
43 |
-
gr.HTML(ANNOUNCEMENT)
|
44 |
with gr.Row():
|
45 |
with gr.Column():
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
with gr.Accordion("Advanced Setting", open=False):
|
51 |
-
query_counts_slider = gr.Slider(minimum=1, maximum=
|
52 |
-
|
53 |
-
|
54 |
|
55 |
retrieval_output = gr.JSON(label="Output")
|
56 |
|
57 |
-
|
58 |
button_retrieval.click(fn=query_from_kdb, inputs=[user_input, kdb_dropdown, query_counts_slider], outputs=[retrieval_output, user_input])
|
|
|
59 |
demo.queue(concurrency_count=1, max_size=5, api_open=False)
|
60 |
demo.launch(show_error=True)
|
61 |
|
|
|
7 |
import json
|
8 |
from models import EMBEDDINGS
|
9 |
|
10 |
+
# todo: 功能还没做
|
|
|
11 |
|
12 |
+
HF_TOKEN = None # os.getenv("HF_TOKEN")
|
13 |
+
REPO_ID = None # os.getenv("KDB_REPO")
|
14 |
+
if HF_TOKEN is not None and REPO_ID is not None:
|
15 |
+
snapshot_download(REPO_ID, repo_type="dataset", local_dir="knowledge_databases/",
|
16 |
+
local_dir_use_symlinks=False, token=HF_TOKEN)
|
17 |
ALL_KDB = ["(None)"] + list_folders("knowledge_databases")
|
18 |
|
19 |
+
ANNOUNCEMENT = """
|
20 |
+
# Evaluate the quality of retrieved date from the FAISS database
|
21 |
|
22 |
+
Use this space test the performance of some pre-constructed vector databases hosted at `shaocongma/kdb`. To use this space for your own FAISS database, follow this instruction:
|
23 |
+
1. Duplicate this space.
|
24 |
+
2. Add the secret key `HF_TOKEN` with your own Huggingface User Access Token.
|
25 |
+
3. Create a Huggingface Dataset. Put your FAISS database to it.
|
26 |
+
4. Add the secret key `REPO_ID` as your dataset's address.
|
27 |
+
"""
|
28 |
+
AUTODRAFT = """
|
29 |
+
AutoDraft is a GPT-based project to generate an academic paper using the title and contributions. When generating specific sections, AutoDraft will query some necessary backgrounds in related fields from the pre-constructed vector database.
|
30 |
+
"""
|
31 |
|
32 |
def query_from_kdb(input, kdb, query_counts):
|
33 |
if kdb == "(None)":
|
|
|
50 |
raise RuntimeError(f"Failed to query from FAISS.")
|
51 |
return domain_knowledge, ""
|
52 |
|
|
|
|
|
53 |
with gr.Blocks() as demo:
|
|
|
54 |
with gr.Row():
|
55 |
with gr.Column():
|
56 |
+
gr.Markdown(ANNOUNCEMENT)
|
57 |
+
|
58 |
+
kdb_dropdown = gr.Dropdown(choices=ALL_KDB, value="(None)", label="Knowledge Databases",
|
59 |
+
info="Pre-defined knowledge databases utilized to aid in the generation of academic writing content. "
|
60 |
+
"Hosted at `shaocongma/kdb`.")
|
61 |
+
with gr.Tab("User's Input"):
|
62 |
+
user_input = gr.Textbox(label="Input", info="Input anything you like to test what will be retrived from the vector database.")
|
63 |
+
with gr.Row():
|
64 |
+
button_clear = gr.Button("Clear")
|
65 |
+
button_retrieval = gr.Button("Retrieve", variant="primary")
|
66 |
+
with gr.Tab("AutoDraft"):
|
67 |
+
gr.Markdown(AUTODRAFT)
|
68 |
+
title_input = gr.Textbox(label="Title")
|
69 |
+
contribution_input = gr.Textbox(label="Contributions", lines=5)
|
70 |
+
with gr.Row():
|
71 |
+
button_clear_2 = gr.Button("Clear")
|
72 |
+
button_retrieval_2 = gr.Button("Retrieve", variant="primary")
|
73 |
|
74 |
with gr.Accordion("Advanced Setting", open=False):
|
75 |
+
query_counts_slider = gr.Slider(minimum=1, maximum=50, value=10, step=1,
|
76 |
+
interactive=True, label="QUERY_COUNTS",
|
77 |
+
info="How many contents will be retrieved from the vector database.")
|
78 |
|
79 |
retrieval_output = gr.JSON(label="Output")
|
80 |
|
|
|
81 |
button_retrieval.click(fn=query_from_kdb, inputs=[user_input, kdb_dropdown, query_counts_slider], outputs=[retrieval_output, user_input])
|
82 |
+
|
83 |
demo.queue(concurrency_count=1, max_size=5, api_open=False)
|
84 |
demo.launch(show_error=True)
|
85 |
|
requirements.txt
CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
|
|
tools.py
ADDED
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import openai
|
3 |
+
from utils.references import References
|
4 |
+
from utils.gpt_interaction import GPTModel
|
5 |
+
from utils.prompts import SYSTEM
|
6 |
+
from langchain.tools import BaseTool
|
7 |
+
from pydantic import BaseModel, Field
|
8 |
+
from typing import Optional, Type
|
9 |
+
|
10 |
+
MAX_TOKENS = 2048
|
11 |
+
|
12 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
13 |
+
default_model = os.getenv("DEFAULT_MODEL")
|
14 |
+
if default_model is None:
|
15 |
+
default_model = "gpt-3.5-turbo-16k"
|
16 |
+
llm = GPTModel(model=default_model, delay=1)
|
17 |
+
|
18 |
+
paper_system_prompt = '''You are an assistant designed to propose choices of research direction.
|
19 |
+
The user will input questions or some keywords of a fields. You need to generate some paper titles and main contributions. Ensure follow the following instructions:
|
20 |
+
Instruction:
|
21 |
+
- Your response should follow the JSON format.
|
22 |
+
- Your response should have the following structure:
|
23 |
+
{
|
24 |
+
"your suggested paper title":
|
25 |
+
{
|
26 |
+
"summary": "an overview introducing what this paper will include",
|
27 |
+
"contributions": {
|
28 |
+
"contribution1": {"statement": "briefly describe this contribution", "reason": "reason why this contribution can make this paper outstanding"},
|
29 |
+
"contribution2": {"statement": "briefly describe this contribution", "reason": "reason why this contribution can make this paper outstanding"},
|
30 |
+
...
|
31 |
+
}
|
32 |
+
}
|
33 |
+
"your suggested paper title":
|
34 |
+
{
|
35 |
+
"summary": "an overview introducing what this paper will include",
|
36 |
+
"contributions": {
|
37 |
+
"contribution1": {"statement": "briefly describe this contribution", "reason": "reason why this contribution can make this paper outstanding"},
|
38 |
+
"contribution2": {"statement": "briefly describe this contribution", "reason": "reason why this contribution can make this paper outstanding"},
|
39 |
+
...
|
40 |
+
}
|
41 |
+
}
|
42 |
+
...
|
43 |
+
}
|
44 |
+
- Please list three to five suggested title and at least three contributions for each paper.
|
45 |
+
'''
|
46 |
+
|
47 |
+
|
48 |
+
contribution_system_prompt = '''You are an assistant designed to criticize the contributions of a paper. You will be provided Paper's Title, References and Contributions. Ensure follow the following instructions:
|
49 |
+
Instruction:
|
50 |
+
- Your response should follow the JSON format.
|
51 |
+
- Your response should have the following structure:
|
52 |
+
{
|
53 |
+
"title": "the title provided by the user",
|
54 |
+
"comment": "your thoughts on if this title clearly reflects the key ideas of this paper and explain why"
|
55 |
+
"contributions": {
|
56 |
+
"contribution1": {"statement": "briefly describe what the contribution is",
|
57 |
+
"reason": "reason why the user claims it is a contribution",
|
58 |
+
"judge": "your thought about if this is a novel contribution and explain why",
|
59 |
+
"suggestion": "your suggestion on how to modify the research direction to enhance the novelty "},
|
60 |
+
"contribution2": {"statement": "briefly describe what the contribution is",
|
61 |
+
"reason": "reason why the user claims it is a contribution",
|
62 |
+
"judge": "your thought about if this is a novel contribution and explain why",
|
63 |
+
"suggestion": "your suggestion on how to modify the research direction to enhance the novelty "},
|
64 |
+
...
|
65 |
+
}
|
66 |
+
}
|
67 |
+
- You need to carefully check if the claimed contribution has been made in the provided references, which makes the contribution not novel.
|
68 |
+
- You also need to propose your concerns on if any of contributions could be incremental or just a mild modification on an existing work.
|
69 |
+
'''
|
70 |
+
|
71 |
+
|
72 |
+
def find_research_directions(research_field):
|
73 |
+
output, _ = llm(systems=paper_system_prompt, prompts=research_field, return_json=False)
|
74 |
+
return output
|
75 |
+
|
76 |
+
def find_references(title, contributions):
|
77 |
+
max_tokens = MAX_TOKENS
|
78 |
+
ref = References(title=title, description=f"{contributions}")
|
79 |
+
keywords, _ = llm(systems=SYSTEM["keywords"], prompts=title, return_json=True)
|
80 |
+
keywords = {keyword: 10 for keyword in keywords}
|
81 |
+
ref.collect_papers(keywords)
|
82 |
+
ref_prompt = ref.to_prompts(max_tokens=max_tokens)
|
83 |
+
return ref_prompt
|
84 |
+
|
85 |
+
|
86 |
+
def judge_novelty(title, contributions):
|
87 |
+
max_tokens = MAX_TOKENS
|
88 |
+
ref = References(title=title, description=f"{contributions}")
|
89 |
+
keywords, _ = llm(systems=SYSTEM["keywords"], prompts=title, return_json=True)
|
90 |
+
keywords = {keyword: 10 for keyword in keywords}
|
91 |
+
ref.collect_papers(keywords)
|
92 |
+
ref_prompt = ref.to_prompts(max_tokens=max_tokens)
|
93 |
+
prompt = f"Title: {title}\n References: {ref_prompt}\n Contributions: {contributions}"
|
94 |
+
output, _ = llm(systems=contribution_system_prompt, prompts=prompt, return_json=False)
|
95 |
+
return output
|
96 |
+
|
97 |
+
|
98 |
+
functions = [
|
99 |
+
{
|
100 |
+
"name": "find_research_directions",
|
101 |
+
"description": "when your student has already shown interests in a specific topic and provided a rough description of potential contributions, help your student to dive this direction deeper",
|
102 |
+
"parameters": {
|
103 |
+
"type": "object",
|
104 |
+
"properties": {
|
105 |
+
"research_description": {
|
106 |
+
"type": "string",
|
107 |
+
"description": "a paragraph with details in English describing "
|
108 |
+
"(1) what is the main problem you are trying to solve "
|
109 |
+
"(2) what is the main novelty of this idea (3) how to complete this research."
|
110 |
+
}
|
111 |
+
},
|
112 |
+
"required": ["research_description"],
|
113 |
+
},
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"name": "find_references",
|
117 |
+
"description": "find references for given details of a paper",
|
118 |
+
"parameters": {
|
119 |
+
"type": "object",
|
120 |
+
"properties": {
|
121 |
+
"title": {
|
122 |
+
"type": "string",
|
123 |
+
"description": "the title (in English) of the academic paper your student will write.",
|
124 |
+
},
|
125 |
+
"contributions": {"type": "string",
|
126 |
+
"description": "a general description on the contributions of this paper in English."
|
127 |
+
"If there are multiple contributions, index them with numbers."},
|
128 |
+
},
|
129 |
+
"required": ["title", "contributions"],
|
130 |
+
},
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"name": "judge_novelty",
|
134 |
+
"description": "evaluate the novelty of a paper given its title and main contributions",
|
135 |
+
"parameters": {
|
136 |
+
"type": "object",
|
137 |
+
"properties": {
|
138 |
+
"title": {
|
139 |
+
"type": "string",
|
140 |
+
"description": "the title (in English) of the academic paper your student will write.",
|
141 |
+
},
|
142 |
+
"contributions": {"type": "string",
|
143 |
+
"description": "a general description on the contributions of this paper in English."
|
144 |
+
"If there are multiple contributions, index them with numbers."},
|
145 |
+
},
|
146 |
+
"required": ["title", "contributions"],
|
147 |
+
},
|
148 |
+
}
|
149 |
+
]
|
150 |
+
|
151 |
+
TOOLS = {"find_research_directions": find_research_directions, "find_references": find_references, "judge_novelty": judge_novelty}
|
152 |
+
|
153 |
+
class FindResearchDirectionsCheckInput(BaseModel):
|
154 |
+
research_description: str = Field(..., description="a paragraph with details in English describing (1) what is the main problem you are trying to solve "
|
155 |
+
"(2) what is the main novelty of this idea (3) how to complete this research.")
|
156 |
+
|
157 |
+
class TitleDescriptionCheckInput(BaseModel):
|
158 |
+
title: str = Field(..., description="the title of the academic paper your student will write in English.")
|
159 |
+
contributions: str = Field(..., description="a general description on the contributions of this paper in English."
|
160 |
+
"If there are multiple contributions, index them with numbers.")
|
161 |
+
|
162 |
+
|
163 |
+
class FindResearchDirectionsTool(BaseTool):
|
164 |
+
name = "find_research_directions"
|
165 |
+
description = """Useful when your student has already shown interests in a specific topic and provided a rough description of
|
166 |
+
potential contributions and you need to help your student to dive this direction deeper for your student.
|
167 |
+
|
168 |
+
"""
|
169 |
+
def _run(self, research_description: str):
|
170 |
+
response = find_research_directions(research_description)
|
171 |
+
return response
|
172 |
+
|
173 |
+
def _arun(self, research_field: str):
|
174 |
+
raise NotImplementedError("This tool does not support async")
|
175 |
+
|
176 |
+
args_schema: Optional[Type[BaseModel]] = FindResearchDirectionsCheckInput
|
177 |
+
|
178 |
+
|
179 |
+
class JudgeNoveltyTool(BaseTool):
|
180 |
+
name = "judge_novelty"
|
181 |
+
description = """Useful when you need to evaluate the novelty of your student's idea.
|
182 |
+
|
183 |
+
"""
|
184 |
+
def _run(self, title: str, contributions: str):
|
185 |
+
response = judge_novelty(title, contributions)
|
186 |
+
return response
|
187 |
+
|
188 |
+
def _arun(self, title: str, contributions: str):
|
189 |
+
raise NotImplementedError("This tool does not support async")
|
190 |
+
|
191 |
+
args_schema: Optional[Type[BaseModel]] = TitleDescriptionCheckInput
|
192 |
+
|
193 |
+
class FindReferencesTool(BaseTool):
|
194 |
+
name = "find_references"
|
195 |
+
description = """Useful when you need to find references for a paper.
|
196 |
+
|
197 |
+
"""
|
198 |
+
def _run(self, title: str, contributions: str):
|
199 |
+
response = find_references(title, contributions)
|
200 |
+
return response
|
201 |
+
|
202 |
+
def _arun(self, title: str, contributions: str):
|
203 |
+
raise NotImplementedError("This tool does not support async")
|
204 |
+
|
205 |
+
args_schema: Optional[Type[BaseModel]] = TitleDescriptionCheckInput
|
206 |
+
|