Lim0011 commited on
Commit
fd63d9f
1 Parent(s): f4a4a2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -164
app.py CHANGED
@@ -11,29 +11,74 @@ from reactagent.users.user import User
11
  # Global variables to store session state
12
  env = None
13
  agent = None
14
-
15
- # Predefined research paper text (example)
16
- predefined_paper_text = """
17
- Title:
18
- Dataset and Baseline for Automatic Student Feedback Analysis
19
-
20
- Abstract:
21
- This paper presents a student feedback corpus containing 3000 instances of feedback written by university students. The dataset has been annotated for aspect terms, opinion terms, polarities of the opinion terms towards targeted aspects, document-level opinion polarities, and sentence separations. A hierarchical taxonomy for aspect categorization covering all areas of the teaching-learning process was developed. Both implicit and explicit aspects were annotated using this taxonomy. The paper discusses the annotation methodology, difficulties faced during the annotation, and details about aspect term categorization. The annotated corpus can be used for Aspect Extraction, Aspect Level Sentiment Analysis, and Document Level Sentiment Analysis. Baseline results for all three tasks are provided.
22
- """
23
-
24
- # Predefined extracted elements based on the paper text
25
- predefined_research_tasks = "The primary research tasks include the creation of a comprehensive student feedback corpus, aspect term annotation, opinion polarity annotation, and the development of a hierarchical taxonomy."
26
- predefined_research_gaps = "Gaps include the lack of detailed aspect-level annotations in existing datasets and the focus on document-level sentiment analysis."
27
- predefined_keywords = "Student Feedback Corpus, Aspect Terms, Opinion Terms, Polarity, Hierarchical Taxonomy, Aspect Extraction, Aspect Level Sentiment Analysis, Document Level Sentiment Analysis"
28
- predefined_recent_works = """
29
- 1. "Students feedback analysis model using deep learning-based method and linguistic knowledge for intelligent educational systems."
30
- 2. "An Automated Approach for Analysing Students Feedback Using Sentiment Analysis Techniques."
31
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  # Extraction function to simulate the extraction of Research Tasks (t), Research Gaps (g), Keywords (k), and Recent Works (R)
34
  def extract_research_elements(paper_text):
35
- # Returning the predefined extracted content
36
- return predefined_research_tasks, predefined_research_gaps, predefined_keywords, predefined_recent_works
 
 
 
 
 
37
 
38
  # Generation function for Research Hypothesis and Experiment Plan
39
  def generate_research_idea_and_plan(tasks, gaps, keywords, recent_works):
@@ -131,37 +176,9 @@ Objective: Understand the training script, including data processing, [...]
131
  [Feedback]: The script structure is clear, but key functions (train_model, predict) need proper implementation for proposed model training and prediction.
132
  """
133
 
134
- predefined_response = """
135
- [Reasoning]: Execute the "final_model.py" using ExecuteScript action to evaluate performance of the final model.
136
- [Action]: Execute "final_model.py" using ExecuteScript action.
137
- Input: {"script_name": "final_model.py"}
138
- """
139
-
140
- predefined_observation = """
141
- Epoch [1/10],
142
- Train MSE: 0.543,
143
- Test MSE: 0.688
144
- Epoch [2/10],
145
- Train MSE: 0.242,
146
- Test MSE: 0.493
147
- """
148
-
149
 
150
- # # Structured input as list of dictionaries
151
- # process_steps = [
152
- # "Action: Inspect Script Lines (train.py)\nObservation: The train.py script imports necessary libraries (e.g., pandas, sklearn, torch). Sets random seeds for reproducibility. Defines compute_metrics_for_regression function to calculate RMSE for different dimensions. Placeholder functions train_model and predict exist without implementations.\nFeedback: The script structure is clear, but key functions (train_model, predict) need proper implementation for proposed model training and prediction.",
153
- # "Action: Execute Script (train.py)\nObservation: The script executed successfully. Generated embeddings using the BERT model. Completed the training process without errors. Metrics calculation placeholders indicated areas needing implementation.\nFeedback: Experimental model definition and training logic are missing.",
154
- # "Action: Edit Script (train.py)\nObservation: Edited train.py to separate data loading, model definition, training loop, and evaluation into distinct functions. The edited train.py now has clearly defined functions for data loading (load_data), model definition (build_model), training (train_model), and evaluation (evaluate_model). Similarly, eval.py is reorganized to load the model and perform predictions efficiently.\nFeedback: Modify model architecture, retrieve the hybrid model of CNN, BiLSTM, and attention mechanisms, similar to the DTLP to align with the experiment design.",
155
- # "Action: Retrieve Model\nObservation: CNN and BiLSTM retrieved.\nFeedback: Modify the model architecture.",
156
- # "Action: Execute Script (train.py)\nObservation: The model trained over the specified number of epochs. Training and validation loss values are recorded for each epoch, the decrease in loss indicates improved model performance.\nFeedback: Continue with the next steps in model evaluation.",
157
- # predefined_observation
158
- # ]
159
 
160
- action_list = [
161
 
162
- predefined_response,
163
- predefined_observation
164
- ]
165
  # Predefined code to display in Phase 2
166
  predefined_code = """import pandas as pd
167
  from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
@@ -474,72 +491,41 @@ if __name__ == "__main__":
474
  """
475
 
476
 
477
- class SessionInfo:
478
- def __init__(self):
479
- self.coro_cache = {}
480
- self.parser = create_parser()
481
-
482
- def make_session(self, prompt, session_hash):
483
- id = session_hash
484
-
485
- llm_name='claude-3-5-sonnet-20240620'
486
- fastllm_name='claude-3-haiku-20240307'
487
- rawargs = [
488
- '--research-problem', prompt,
489
- '--log-dir', str(Path('logs', id)),
490
- '--work-dir', str(Path('workspaces', id)),
491
- '--llm-name', llm_name,
492
- '--edit-script-llm-name', llm_name,
493
- '--fast-llm-name', fastllm_name,
494
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
495
 
496
- args = self.parser.parse_args(rawargs)
497
- # llm.FAST_MODEL = args.fast_llm_name
498
- env = Environment(args)
499
- # agent = ResearchAgent(args, env)
500
- coro = agent.run(env)
501
-
502
- self.coro_cache[id] = coro
503
- return id
504
-
505
- def get_response(self, human_input, session_hash):
506
- coro_input = human_input
507
- if session_hash not in self.coro_cache:
508
- self.make_session(human_input, session_hash)
509
- coro_input = None
510
-
511
- try:
512
- output = self.coro_cache[session_hash].send(coro_input)
513
- except StopIteration:
514
- output = None
515
- del self.coro_cache[session_hash]
516
-
517
- return output
518
-
519
- session_info = SessionInfo()
520
-
521
- def info_to_message(info):
522
- msg = ""
523
- for k, v in info.items():
524
- if isinstance(v, dict):
525
- tempv = v
526
- v = ""
527
- for k2, v2 in tempv.items():
528
- v += f"{k2}:\n {v2}\n"
529
- v = User.indent_text(v, 2)
530
- msg += '-' * 64
531
- msg += '\n'
532
- msg += f"{k}:\n{v}\n"
533
-
534
- msg += "Please provide feedback based on the history, response entries, and observation, and questions: "
535
- return msg
536
-
537
- def predict(message, history, request: gr.Request):
538
- response = session_info.get_response(message, request.session_hash)
539
- if response is None:
540
- return "Agent is finished. Enter a new instruction."
541
- return info_to_message(response)
542
 
 
 
 
 
 
 
 
 
543
 
544
  # Initialize the global step_index and history
545
  process_steps = [
@@ -584,40 +570,35 @@ process_steps = [
584
  "Observation": predefined_observation,
585
  }
586
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
587
 
588
 
589
- # step_index = 0
590
- # def info_to_message(info):
591
- # msg = "Agent Response:\n"
592
- # for k, v in info.items():
593
- # if isinstance(v, dict):
594
- # tempv = v
595
- # v = ""
596
- # for k2, v2 in tempv.items():
597
- # v += f"{k2}:\n {v2}\n"
598
- # v = User.indent_text(v, 2)
599
- # msg += '-' * 64
600
- # msg += '\n'
601
- # msg += f"{k}:\n{v}\n"
602
-
603
- # msg += "Please provide feedback based on the history, response entries, and observation, and questions: "
604
- # print(msg)
605
- # return msg
606
-
607
- # def predict(message, history):
608
- # global step_index # Declare the use of global variable
609
- # if step_index < len(process_steps):
610
- # response_info = process_steps[step_index]
611
- # response = info_to_message(response_info) # Convert dictionary to formatted string
612
- # step_index += 1
613
- # else:
614
- # response = "Agent Finished."
615
-
616
- # return response, "N/A" # Return the formatted string and clear input
617
 
618
  # Gradio Interface
619
  with gr.Blocks() as app:
620
- gr.Markdown("# AI Research Assistant with Research Agent")
 
 
621
 
622
  # Use state variables to store generated hypothesis and experiment plan
623
  hypothesis_state = gr.State("")
@@ -629,13 +610,13 @@ with gr.Blocks() as app:
629
 
630
  with gr.Row():
631
  with gr.Column():
632
- paper_text_input = gr.Textbox(value=predefined_paper_text, lines=10, label="Research Paper Text")
633
  extract_button = gr.Button("Extract Research Elements")
634
  with gr.Row():
635
- tasks_output = gr.Textbox(placeholder="Research task definition", label="Research Tasks", lines=2, interactive=False)
636
- gaps_output = gr.Textbox(placeholder="Research gaps of current works", label="Research Gaps", lines=2, interactive=False)
637
- keywords_output = gr.Textbox(placeholder="Paper keywords", label="Keywords", lines=2, interactive=False)
638
- recent_works_output = gr.Textbox(placeholder="Recent works extracted from Semantic Scholar", label="Recent Works", lines=2, interactive=False)
639
  with gr.Column():
640
  with gr.Row(): # Move the button to the top right
641
  generate_button = gr.Button("Generate Research Hypothesis & Experiment Plan")
@@ -663,6 +644,21 @@ with gr.Blocks() as app:
663
  outputs=[hypothesis_output, experiment_plan_output, hypothesis_state, experiment_plan_state]
664
  )
665
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
666
  # Phase 2: Interactive Session Tab
667
  with gr.Tab("Phase 2&3: Experiment implementation and execution"):
668
  gr.Markdown("### Interact with the ExperimentAgent")
@@ -680,40 +676,29 @@ with gr.Blocks() as app:
680
  code_display = gr.Code(label="Implementation", language="python", interactive=False)
681
 
682
  with gr.Column():
683
- # chatbot = gr.ChatInterface(predict)
684
- response = gr.Textbox(label = "ExperimentAgent Response", lines=30, interactive=False)
685
- feedback = gr.Textbox(placeholder="N/A", label = "User Feedback", lines=3, interactive=True)
686
  submit_button = gr.Button("Submit", elem_classes=["Submit-btn"])
687
 
688
  def submit_feedback(user_feedback, history, previous_response):
689
  global step_index
690
  if_end = False
691
  step_index += 1
692
- if (step_index >= len(process_steps)):
693
- step_index = 0
694
- msg = ""
695
  msg = history
696
  if step_index < len(process_steps):
697
- msg += previous_response + "\nUser feedback:" + user_feedback +"\n\n"
698
  response_info = process_steps[step_index]
699
  response = info_to_message(response_info) # Convert dictionary to formatted string
 
700
  step_index += 1
 
701
  else:
702
  if_end = True
703
  response = "Agent Finished."
704
- msg += response
705
 
706
  return msg, response, predefined_code if if_end else final_code
707
- # def predict(message, history):
708
- # global step_index # Declare the use of global variable
709
- # if step_index < len(process_steps):
710
- # response_info = process_steps[step_index]
711
- # response = info_to_message(response_info) # Convert dictionary to formatted string
712
- # step_index += 1
713
- # else:
714
- # response = "Agent Finished."
715
-
716
- # Automatically populate the hypothesis and plan in Phase 2
717
  def load_phase_2_inputs(hypothesis, plan):
718
  return hypothesis, plan, "# Code implementation will be displayed here after Start ExperimentAgent."
719
 
@@ -734,6 +719,7 @@ with gr.Blocks() as app:
734
  inputs=[hypothesis_state, experiment_plan_state],
735
  outputs=[code_display, log]
736
  )
 
737
  submit_button.click(
738
  fn=submit_feedback,
739
  inputs=[feedback, log, response],
@@ -741,6 +727,5 @@ with gr.Blocks() as app:
741
  )
742
 
743
  if __name__ == "__main__":
744
- # app.launch(share=True)
745
  step_index = 0
746
  app.launch()
 
11
  # Global variables to store session state
12
  env = None
13
  agent = None
14
+ example_data = {
15
+ 1: {
16
+ "title": "Dataset and Baseline for Automatic Student Feedback Analysis",
17
+ "abstract": """
18
+ This paper presents a student feedback corpus containing 3000 instances of feedback written by university students.
19
+ The dataset has been annotated for aspect terms, opinion terms, polarities of the opinion terms towards targeted aspects,
20
+ document-level opinion polarities, and sentence separations. A hierarchical taxonomy for aspect categorization
21
+ covering all areas of the teaching-learning process was developed. Both implicit and explicit aspects were annotated
22
+ using this taxonomy. The paper discusses the annotation methodology, difficulties faced during the annotation,
23
+ and details about aspect term categorization. The annotated corpus can be used for Aspect Extraction,
24
+ Aspect Level Sentiment Analysis, and Document Level Sentiment Analysis. Baseline results for all three tasks are provided.
25
+ """,
26
+ "research_tasks": "The primary research tasks include the creation of a comprehensive student feedback corpus, aspect term annotation, opinion polarity annotation, and the development of a hierarchical taxonomy.",
27
+ "research_gaps": "Gaps include the lack of detailed aspect-level annotations in existing datasets and the focus on document-level sentiment analysis.",
28
+ "keywords": "Student Feedback Corpus, Aspect Terms, Opinion Terms, Polarity, Hierarchical Taxonomy, Aspect Extraction, Aspect Level Sentiment Analysis, Document Level Sentiment Analysis",
29
+ "recent_works": [
30
+ "Students feedback analysis model using deep learning-based method and linguistic knowledge for intelligent educational systems.",
31
+ "An Automated Approach for Analysing Students Feedback Using Sentiment Analysis Techniques."
32
+ ]
33
+ },
34
+ 2: {
35
+ "title": "An Empirical Study on the Impact of Code Review on Software Quality",
36
+ "abstract": """
37
+ This paper presents an empirical study examining the impact of code reviews on the quality of software projects.
38
+ The study involved analyzing over 500,000 code reviews across 20 open-source projects on GitHub.
39
+ The analysis was conducted to assess the relationship between code review practices and key software quality metrics,
40
+ such as defect density, code churn, and the frequency of post-release defects. The findings suggest that code reviews,
41
+ particularly when conducted by experienced reviewers, significantly reduce the number of defects in the codebase.
42
+ The paper discusses the methodology used for data collection, the statistical methods employed for analysis,
43
+ and the implications of these findings for software development practices.
44
+ """,
45
+ "research_tasks": "The primary research tasks include collecting and analyzing data on code reviews from open-source projects, measuring software quality metrics, and assessing the correlation between code review practices and software quality.",
46
+ "research_gaps": "Gaps include the lack of large-scale empirical studies that quantify the impact of code reviews on software quality and the limited focus on the role of reviewer expertise in existing literature.",
47
+ "keywords": "Code Reviews, Software Quality, Defect Density, Code Churn, Post-Release Defects, Empirical Study, Open-Source Projects, GitHub",
48
+ "recent_works": [
49
+ "The Effectiveness of Code Reviews in Identifying Defects: A Meta-Analysis of Empirical Studies",
50
+ "A Study on the Impact of Code Review Tools on Developer Productivity and Software Quality"
51
+ ]
52
+ }
53
+ }
54
+
55
+ # # Predefined research paper text (example)
56
+ # predefined_paper_text = """
57
+ # Title:
58
+ # Dataset and Baseline for Automatic Student Feedback Analysis
59
+
60
+ # Abstract:
61
+ # This paper presents a student feedback corpus containing 3000 instances of feedback written by university students. The dataset has been annotated for aspect terms, opinion terms, polarities of the opinion terms towards targeted aspects, document-level opinion polarities, and sentence separations. A hierarchical taxonomy for aspect categorization covering all areas of the teaching-learning process was developed. Both implicit and explicit aspects were annotated using this taxonomy. The paper discusses the annotation methodology, difficulties faced during the annotation, and details about aspect term categorization. The annotated corpus can be used for Aspect Extraction, Aspect Level Sentiment Analysis, and Document Level Sentiment Analysis. Baseline results for all three tasks are provided.
62
+ # """
63
+
64
+ # # Predefined extracted elements based on the paper text
65
+ # predefined_research_tasks = "The primary research tasks include the creation of a comprehensive student feedback corpus, aspect term annotation, opinion polarity annotation, and the development of a hierarchical taxonomy."
66
+ # predefined_research_gaps = "Gaps include the lack of detailed aspect-level annotations in existing datasets and the focus on document-level sentiment analysis."
67
+ # predefined_keywords = "Student Feedback Corpus, Aspect Terms, Opinion Terms, Polarity, Hierarchical Taxonomy, Aspect Extraction, Aspect Level Sentiment Analysis, Document Level Sentiment Analysis"
68
+ # predefined_recent_works = """
69
+ # 1. "Students feedback analysis model using deep learning-based method and linguistic knowledge for intelligent educational systems."
70
+ # 2. "An Automated Approach for Analysing Students Feedback Using Sentiment Analysis Techniques."
71
+ # """
72
 
73
  # Extraction function to simulate the extraction of Research Tasks (t), Research Gaps (g), Keywords (k), and Recent Works (R)
74
  def extract_research_elements(paper_text):
75
+ global index_ex
76
+ example = example_data[index_ex]
77
+ tasks = example['research_tasks']
78
+ gaps = example['research_gaps']
79
+ keywords = example['keywords']
80
+ recent_works = "\n".join(example['recent_works'])
81
+ return tasks, gaps, keywords, recent_works
82
 
83
  # Generation function for Research Hypothesis and Experiment Plan
84
  def generate_research_idea_and_plan(tasks, gaps, keywords, recent_works):
 
176
  [Feedback]: The script structure is clear, but key functions (train_model, predict) need proper implementation for proposed model training and prediction.
177
  """
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
 
 
 
 
 
 
 
 
 
180
 
 
181
 
 
 
 
182
  # Predefined code to display in Phase 2
183
  predefined_code = """import pandas as pd
184
  from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
 
491
  """
492
 
493
 
494
+ # Example data structure
495
+ example_data = {
496
+ 1: {
497
+ "title": "Dataset and Baseline for Automatic Student Feedback Analysis",
498
+ "abstract": "This paper presents a student feedback corpus containing 3000 instances of feedback written by university students. The dataset has been annotated for aspect terms, opinion terms, polarities of the opinion terms towards targeted aspects, document-level opinion polarities, and sentence separations. A hierarchical taxonomy for aspect categorization covering all areas of the teaching-learning process was developed. Both implicit and explicit aspects were annotated using this taxonomy. The paper discusses the annotation methodology, difficulties faced during the annotation, and details about aspect term categorization. The annotated corpus can be used for Aspect Extraction, Aspect Level Sentiment Analysis, and Document Level Sentiment Analysis. Baseline results for all three tasks are provided.",
499
+ "research_tasks": "The primary research tasks include the creation of a comprehensive student feedback corpus, aspect term annotation, opinion polarity annotation, and the development of a hierarchical taxonomy.",
500
+ "research_gaps": "Gaps include the lack of detailed aspect-level annotations in existing datasets and the focus on document-level sentiment analysis.",
501
+ "keywords": "Student Feedback Corpus, Aspect Terms, Opinion Terms, Polarity, Hierarchical Taxonomy, Aspect Extraction, Aspect Level Sentiment Analysis, Document Level Sentiment Analysis",
502
+ "recent_works": [
503
+ "Students feedback analysis model using deep learning-based method and linguistic knowledge for intelligent educational systems.",
504
+ "An Automated Approach for Analysing Students Feedback Using Sentiment Analysis Techniques."
 
 
 
 
 
 
505
  ]
506
+ },
507
+ 2: {
508
+ "title": "An Empirical Study on the Impact of Code Review on Software Quality",
509
+ "abstract": "This paper presents an empirical study examining the impact of code reviews on the quality of software projects. The study involved analyzing over 500,000 code reviews across 20 open-source projects on GitHub. The analysis was conducted to assess the relationship between code review practices and key software quality metrics, such as defect density, code churn, and the frequency of post-release defects. The findings suggest that code reviews, particularly when conducted by experienced reviewers, significantly reduce the number of defects in the codebase. The paper discusses the methodology used for data collection, the statistical methods employed for analysis, and the implications of these findings for software development practices.",
510
+ "research_tasks": "The primary research tasks include collecting and analyzing data on code reviews from open-source projects, measuring software quality metrics, and assessing the correlation between code review practices and software quality.",
511
+ "research_gaps": "Gaps include the lack of large-scale empirical studies that quantify the impact of code reviews on software quality and the limited focus on the role of reviewer expertise in existing literature.",
512
+ "keywords": "Code Reviews, Software Quality, Defect Density, Code Churn, Post-Release Defects, Empirical Study, Open-Source Projects, GitHub",
513
+ "recent_works": [
514
+ "The Effectiveness of Code Reviews in Identifying Defects: A Meta-Analysis of Empirical Studies",
515
+ "A Study on the Impact of Code Review Tools on Developer Productivity and Software Quality"
516
+ ]
517
+ }
518
+ }
519
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
 
521
+ predefined_observation = """
522
+ Epoch [1/10],
523
+ Train MSE: 0.543,
524
+ Test MSE: 0.688
525
+ Epoch [2/10],
526
+ Train MSE: 0.242,
527
+ Test MSE: 0.493
528
+ """
529
 
530
  # Initialize the global step_index and history
531
  process_steps = [
 
570
  "Observation": predefined_observation,
571
  }
572
  ]
573
+ def info_to_message(info):
574
+ msg = ""
575
+ for k, v in info.items():
576
+ if isinstance(v, dict):
577
+ tempv = v
578
+ v = ""
579
+ for k2, v2 in tempv.items():
580
+ v += f"{k2}:\n {v2}\n"
581
+ v = User.indent_text(v, 2)
582
+ msg += '-' * 64
583
+ msg += '\n'
584
+ msg += f"{k}:\n{v}\n"
585
+ return msg
586
 
587
 
588
+ index_ex = 1
589
+ # Function to handle the selection of an example and populate the respective fields
590
+ def load_example(example_id):
591
+ global index_ex
592
+ index_ex = example_id
593
+ example = example_data[example_id]
594
+ paper_text = 'Title:\t' + example['title'] + '\nAbstract:\t' + example['abstract']
595
+ return paper_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
596
 
597
  # Gradio Interface
598
  with gr.Blocks() as app:
599
+ gr.Markdown("# MLR- Copilot: AI Research Assistant with Research Agent")
600
+ gr.Markdown("### MLR-Copilot is a framework where LLMs mimic researchers’ thought processes, designed to enhance the productivity of machine learning research by automating the generation and implementation of research ideas.It begins with a research paper, autonomously generating and validating these ideas, while incorporating human feedback to help reach executable research outcomes.")
601
+
602
 
603
  # Use state variables to store generated hypothesis and experiment plan
604
  hypothesis_state = gr.State("")
 
610
 
611
  with gr.Row():
612
  with gr.Column():
613
+ paper_text_input = gr.Textbox(value=load_example(1), lines=10, label="Research Paper Text")
614
  extract_button = gr.Button("Extract Research Elements")
615
  with gr.Row():
616
+ tasks_output = gr.Textbox(placeholder="Research task definition", label="Research Tasks", lines=2, interactive=True)
617
+ gaps_output = gr.Textbox(placeholder="Research gaps of current works", label="Research Gaps", lines=2, interactive=True)
618
+ keywords_output = gr.Textbox(placeholder="Paper keywords", label="Keywords", lines=2, interactive=True)
619
+ recent_works_output = gr.Textbox(placeholder="Recent works extracted from Semantic Scholar", label="Recent Works", lines=2, interactive=True)
620
  with gr.Column():
621
  with gr.Row(): # Move the button to the top right
622
  generate_button = gr.Button("Generate Research Hypothesis & Experiment Plan")
 
644
  outputs=[hypothesis_output, experiment_plan_output, hypothesis_state, experiment_plan_state]
645
  )
646
 
647
+ # Example Buttons
648
+ with gr.Row():
649
+ example_1_button = gr.Button("Load Example 1:")
650
+ example_2_button = gr.Button("Load Example 2:")
651
+
652
+ example_1_button.click(
653
+ fn=lambda: load_example(1),
654
+ outputs=[paper_text_input]
655
+ )
656
+
657
+ example_2_button.click(
658
+ fn=lambda: load_example(2),
659
+ outputs=[paper_text_input]
660
+ )
661
+
662
  # Phase 2: Interactive Session Tab
663
  with gr.Tab("Phase 2&3: Experiment implementation and execution"):
664
  gr.Markdown("### Interact with the ExperimentAgent")
 
676
  code_display = gr.Code(label="Implementation", language="python", interactive=False)
677
 
678
  with gr.Column():
679
+ response = gr.Textbox(label="ExperimentAgent Response", lines=30, interactive=False)
680
+ feedback = gr.Textbox(placeholder="N/A", label="User Feedback", lines=3, interactive=True)
 
681
  submit_button = gr.Button("Submit", elem_classes=["Submit-btn"])
682
 
683
  def submit_feedback(user_feedback, history, previous_response):
684
  global step_index
685
  if_end = False
686
  step_index += 1
 
 
 
687
  msg = history
688
  if step_index < len(process_steps):
689
+ msg += previous_response + "\nUser feedback:" + user_feedback + "\n\n"
690
  response_info = process_steps[step_index]
691
  response = info_to_message(response_info) # Convert dictionary to formatted string
692
+ response += "Please provide feedback based on the history, response entries, and observation, and questions: "
693
  step_index += 1
694
+ msg += response
695
  else:
696
  if_end = True
697
  response = "Agent Finished."
698
+
699
 
700
  return msg, response, predefined_code if if_end else final_code
701
+
 
 
 
 
 
 
 
 
 
702
  def load_phase_2_inputs(hypothesis, plan):
703
  return hypothesis, plan, "# Code implementation will be displayed here after Start ExperimentAgent."
704
 
 
719
  inputs=[hypothesis_state, experiment_plan_state],
720
  outputs=[code_display, log]
721
  )
722
+
723
  submit_button.click(
724
  fn=submit_feedback,
725
  inputs=[feedback, log, response],
 
727
  )
728
 
729
  if __name__ == "__main__":
 
730
  step_index = 0
731
  app.launch()