Spaces:

du-lab
/

MLR-Copilot

Running

App Files Files Community

MLR-Copilot / app.py

Lim0011

Test

a79dcfe verified 2 months ago

raw

history blame contribute delete

13.4 kB

	import gradio as gr
	from pathlib import Path
	from reactagent.environment import Environment
	from reactagent.agents.agent_research import ResearchAgent
	from reactagent.runner import create_parser
	from reactagent import llm
	from reactagent.users.user import User
	import os
	import json


	# Global variables to store session state
	env = None
	agent = None
	state_example = False
	state_extract = False
	state_generate = False
	state_agent = False
	state_complete = False
	index_ex = "1"

	example_text = [
	"Research Paper 1: Dataset and Baseline for Automatic Student Feedback Analysis",
	"Research Paper 2: An Empirical Study on the Impact of Code Review on Software Quality"
	]

	# Load example JSON file
	def load_example_data():
	with open("example/example_data.json", "r") as json_file:
	example_data = json.load(json_file)

	for idx in example_data.keys():
	try:
	file = example_data[idx]["code_init"]
	with open(os.path.join("example", file), "r") as f:
	example_data[idx]["code_init"] = f.read()
	except FileNotFoundError:
	print(f"File not found: {file}. Skipping key: {idx}")
	try:
	file = example_data[idx]["code_final"]
	with open(os.path.join("example", file), "r") as f:
	example_data[idx]["code_final"] = f.read()
	except FileNotFoundError:
	print(f"File not found: {file}. Skipping key: {idx}")
	return example_data

	example_data = load_example_data()

	# Function to handle the selection of an example and populate the respective fields
	def load_example(example_id):
	global index_ex
	index_ex = str(example_id)
	example = example_data[index_ex]
	paper_text = 'Title:\t' + example['title'] + '\n\nAbstract:\t' + example['abstract']
	return paper_text

	example_text = [load_example(1), load_example(2)]

	# Function to handle example clicks
	def load_example_and_set_index(paper_text_input):
	global index_ex, state_example
	state_example = True
	index_ex = str(example_text.index(paper_text_input) + 1)
	paper_text = load_example(index_ex)

	return paper_text, "", "", "", "", "", ""



	########## Phase 1 ##############

	def extract_research_elements(paper_text):
	global state_extract, index_ex, state_example
	if not state_example or paper_text == "":
	return "", "", "", ""
	state_extract = True
	if paper_text != load_example(index_ex):
	return "", "", "", ""
	example = example_data[index_ex]
	tasks = example['research_tasks']
	gaps = example['research_gaps']
	keywords = example['keywords']
	recent_works = "\n".join(example['recent_works'])
	return tasks, gaps, keywords, recent_works


	# Step 2: Generate Research Hypothesis and Experiment Plan
	def generate_and_store(paper_text, tasks, gaps, keywords, recent_works):
	if (not state_extract or not state_example or paper_text == ""):
	return "", "", "", ""
	global state_generate, index_ex
	state_generate = True
	hypothesis = example_data[index_ex]['hypothesis']
	experiment_plan = example_data[index_ex]['experiment_plan']
	return hypothesis, experiment_plan, hypothesis, experiment_plan

	########## Phase 2 & 3 ##############
	def start_experiment_agent(hypothesis, plan):
	if (not state_extract or not state_generate or not state_example):
	return "", "", ""
	global state_agent, step_index, state_complete
	state_agent = True
	step_index = 0
	state_complete = False
	# predefined_message = f"Implement the following hypothesis and experiment plan:\n\nHypothesis:\n{hypothesis}\n\nExperiment Plan:\n{plan}"
	return example_data[index_ex]['code_init'], predefined_action_log, "", ""

	def submit_feedback(user_feedback, history, previous_response):
	if (not state_extract or not state_generate or not state_agent or not state_example):
	return "", "", ""
	global step_index, state_complete
	step_index += 1
	msg = history
	if step_index < len(process_steps):
	msg += previous_response + "\nUser feedback:" + user_feedback + "\n\n"
	response_info = process_steps[step_index]
	response = info_to_message(response_info) # Convert dictionary to formatted string
	response += "Please provide feedback based on the history, response entries, and observation, and questions: "
	step_index += 1
	msg += response
	else:
	state_complete = True
	response = "Agent Finished."

	return msg, response, example_data[index_ex]['code_init'] if state_complete else example_data[index_ex]['code_final'], ""

	def load_phase_2_inputs(hypothesis, plan):
	return hypothesis, plan, "# Code implementation will be displayed here after Start ExperimentAgent."



	predefined_action_log = """
	[Reasoning]: To understand the initial structure and functionality of train.py for effective improvements.
	[Action]: Inspect Script (train.py)
	Input: {"script_name": "train.py", "start_line_number": "1", "end_line_number": "74"}
	Objective: Understand the training script, including data processing, [...]
	[Observation]: The train.py script imports [...]. Sets random seeds [...]. Defines [...] Placeholder functions [...] exist without implementation. [...]
	[Feedback]: The script structure is clear, but key functions (train_model, predict) need proper implementation for proposed model training and prediction.\n
	"""


	predefined_observation = """
	Epoch [1/10],
	Train MSE: 0.543,
	Test MSE: 0.688
	Epoch [2/10],
	Train MSE: 0.242,
	Test MSE: 0.493\n
	"""

	# Initialize the global step_index and history
	process_steps = [
	{
	"Action": "Inspect Script Lines (train.py)",
	"Observation": (
	"The train.py script imports necessary libraries (e.g., pandas, sklearn, torch). "
	"Sets random seeds for reproducibility. Defines compute_metrics_for_regression function "
	"to calculate RMSE for different dimensions. Placeholder functions train_model and "
	"predict exist without implementations."
	),
	},
	{
	"Action": "Execute Script (train.py)",
	"Observation": (
	"The script executed successfully. Generated embeddings using the BERT model. Completed "
	"the training process without errors. Metrics calculation placeholders indicated areas needing implementation."
	),
	},
	{
	"Action": "Edit Script (train.py)",
	"Observation": (
	"Edited train.py to separate data loading, model definition, training loop, and evaluation into distinct functions. "
	"The edited train.py now has clearly defined functions"
	"for data loading (load_data), model definition (build_model), "
	"training (train_model), and evaluation (evaluate_model). Similarly, eval.py is reorganized to load the model and perform predictions efficiently."
	),
	},
	{
	"Action": "Retrieve Model",
	"Observation": "CNN and BiLSTM retrieved.",
	},
	{
	"Action": "Execute Script (train.py)",
	"Observation": (
	"The model trained over the specified number of epochs. Training and validation loss values are recorded for each epoch, "
	"the decrease in loss indicates improved model performance."
	)
	},
	{
	"Action": "Evaluation",
	"Observation": predefined_observation,
	}
	]
	def info_to_message(info):
	msg = ""
	for k, v in info.items():
	if isinstance(v, dict):
	tempv = v
	v = ""
	for k2, v2 in tempv.items():
	v += f"{k2}:\n {v2}\n"
	v = User.indent_text(v, 2)
	msg += '-' * 64
	msg += '\n'
	msg += f"{k}:\n{v}\n"
	return msg


	def handle_example_click(example_index):
	global index_ex
	index_ex = example_index
	return load_example(index_ex) # Simply return the text to display it in the textbox

	# Gradio Interface
	with gr.Blocks(theme=gr.themes.Default()) as app:
	gr.Markdown("# MLR- Copilot: Machine Learning Research based on LLM Agents [Paper Link](https://www.arxiv.org/abs/2408.14033)")
	gr.Markdown("### ")

	gr.Markdown("MLR-Copilot is a framework where LLMs mimic researchers’ thought processes, designed to enhance the productivity of machine learning research by automating the generation and implementation of research ideas. It begins with a research paper, autonomously generating and validating these ideas, while incorporating human feedback to help reach executable research outcomes.")




	# Use state variables to store generated hypothesis and experiment plan
	hypothesis_state = gr.State("")
	experiment_plan_state = gr.State("")

	########## Phase 1: Research Idea Generation Tab ##############
	with gr.Tab("💡Stage 1: Research Idea Generation"):
	gr.Markdown("### Extract Research Elements and Generate Research Ideas")

	with gr.Row():
	with gr.Column():
	paper_text_input = gr.Textbox(value="", lines=10, label="📑 Research Paper Text")
	extract_button = gr.Button("🔍 Extract Research Elements")
	with gr.Row():
	tasks_output = gr.Textbox(placeholder="Research task definition", label="Research Tasks", lines=2, interactive=True)
	gaps_output = gr.Textbox(placeholder="Research gaps of current works", label="Research Gaps", lines=2, interactive=True)
	keywords_output = gr.Textbox(placeholder="Paper keywords", label="Keywords", lines=2, interactive=True)
	recent_works_output = gr.Textbox(placeholder="Recent works extracted from Semantic Scholar", label="Recent Works", lines=2, interactive=True)
	with gr.Column():
	with gr.Row(): # Move the button to the top
	generate_button = gr.Button("✍️ Generate Research Hypothesis & Experiment Plan")
	with gr.Group():
	gr.Markdown("### 🌟 Research Idea")
	with gr.Row():
	hypothesis_output = gr.Textbox(label="Generated Hypothesis", lines=20, interactive=False)
	experiment_plan_output = gr.Textbox(label="Generated Experiment Plan", lines=20, interactive=False)

	gr.Examples(
	examples=example_text,
	inputs=[paper_text_input],
	outputs=[paper_text_input, tasks_output, gaps_output, keywords_output, recent_works_output, hypothesis_output, experiment_plan_output],
	fn=load_example_and_set_index,
	run_on_click = True,
	label="⬇️ Click an example to load"
	)

	# Step 1: Extract Research Elements
	extract_button.click(
	fn=extract_research_elements,
	inputs=paper_text_input,
	outputs=[tasks_output, gaps_output, keywords_output, recent_works_output]
	)

	generate_button.click(
	fn=generate_and_store,
	inputs=[paper_text_input, tasks_output, gaps_output, keywords_output, recent_works_output],
	outputs=[hypothesis_output, experiment_plan_output, hypothesis_state, experiment_plan_state]
	)



	########## Phase 2 & 3: Experiment implementation and execution ##############
	with gr.Tab("🧪 Stage 2 & Stage 3: Experiment implementation and execution"):
	gr.Markdown("### Interact with the ExperimentAgent")

	with gr.Row():
	with gr.Column():
	with gr.Group():
	gr.Markdown("### 🌟 Generated Research Idea")
	with gr.Row():
	idea_input = gr.Textbox(label="Generated Research Hypothesis", lines=30, interactive=False)
	plan_input = gr.Textbox(label="Generated Experiment Plan", lines=30, interactive=False)

	with gr.Column():
	start_exp_agnet = gr.Button("⚙️ Start / Reset ExperimentAgent", elem_classes=["agent-btn"])
	with gr.Group():
	gr.Markdown("### Implementation + Execution Log")
	log = gr.Textbox(label="📖 Execution Log", lines=20, interactive=False)
	code_display = gr.Code(label="🧑‍💻 Implementation", language="python", interactive=False)

	with gr.Column():
	response = gr.Textbox(label="🤖 ExperimentAgent Response", lines=30, interactive=False)
	feedback = gr.Textbox(placeholder="N/A", label="🧑‍🔬 User Feedback", lines=3, interactive=True)
	submit_button = gr.Button("Submit", elem_classes=["Submit-btn"])

	hypothesis_state.change(
	fn=load_phase_2_inputs,
	inputs=[hypothesis_state, experiment_plan_state],
	outputs=[idea_input, plan_input, code_display]
	)

	# Start research agent
	start_exp_agnet.click(
	fn=start_experiment_agent,
	inputs=[hypothesis_state, experiment_plan_state],
	outputs=[code_display, log, response, feedback]
	)

	submit_button.click(
	fn=submit_feedback,
	inputs=[feedback, log, response],
	outputs=[log, response, code_display, feedback]
	)

	# Test
	if __name__ == "__main__":
	step_index = 0
	app.launch(share=True)