Lim0011 commited on
Commit
305c7c5
1 Parent(s): fd63d9f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -551
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import random
2
  import gradio as gr
3
  from pathlib import Path
4
  from reactagent.environment import Environment
@@ -6,72 +5,45 @@ from reactagent.agents.agent_research import ResearchAgent
6
  from reactagent.runner import create_parser
7
  from reactagent import llm
8
  from reactagent.users.user import User
 
9
 
10
 
11
  # Global variables to store session state
12
  env = None
13
  agent = None
14
- example_data = {
15
- 1: {
16
- "title": "Dataset and Baseline for Automatic Student Feedback Analysis",
17
- "abstract": """
18
- This paper presents a student feedback corpus containing 3000 instances of feedback written by university students.
19
- The dataset has been annotated for aspect terms, opinion terms, polarities of the opinion terms towards targeted aspects,
20
- document-level opinion polarities, and sentence separations. A hierarchical taxonomy for aspect categorization
21
- covering all areas of the teaching-learning process was developed. Both implicit and explicit aspects were annotated
22
- using this taxonomy. The paper discusses the annotation methodology, difficulties faced during the annotation,
23
- and details about aspect term categorization. The annotated corpus can be used for Aspect Extraction,
24
- Aspect Level Sentiment Analysis, and Document Level Sentiment Analysis. Baseline results for all three tasks are provided.
25
- """,
26
- "research_tasks": "The primary research tasks include the creation of a comprehensive student feedback corpus, aspect term annotation, opinion polarity annotation, and the development of a hierarchical taxonomy.",
27
- "research_gaps": "Gaps include the lack of detailed aspect-level annotations in existing datasets and the focus on document-level sentiment analysis.",
28
- "keywords": "Student Feedback Corpus, Aspect Terms, Opinion Terms, Polarity, Hierarchical Taxonomy, Aspect Extraction, Aspect Level Sentiment Analysis, Document Level Sentiment Analysis",
29
- "recent_works": [
30
- "Students feedback analysis model using deep learning-based method and linguistic knowledge for intelligent educational systems.",
31
- "An Automated Approach for Analysing Students Feedback Using Sentiment Analysis Techniques."
32
- ]
33
- },
34
- 2: {
35
- "title": "An Empirical Study on the Impact of Code Review on Software Quality",
36
- "abstract": """
37
- This paper presents an empirical study examining the impact of code reviews on the quality of software projects.
38
- The study involved analyzing over 500,000 code reviews across 20 open-source projects on GitHub.
39
- The analysis was conducted to assess the relationship between code review practices and key software quality metrics,
40
- such as defect density, code churn, and the frequency of post-release defects. The findings suggest that code reviews,
41
- particularly when conducted by experienced reviewers, significantly reduce the number of defects in the codebase.
42
- The paper discusses the methodology used for data collection, the statistical methods employed for analysis,
43
- and the implications of these findings for software development practices.
44
- """,
45
- "research_tasks": "The primary research tasks include collecting and analyzing data on code reviews from open-source projects, measuring software quality metrics, and assessing the correlation between code review practices and software quality.",
46
- "research_gaps": "Gaps include the lack of large-scale empirical studies that quantify the impact of code reviews on software quality and the limited focus on the role of reviewer expertise in existing literature.",
47
- "keywords": "Code Reviews, Software Quality, Defect Density, Code Churn, Post-Release Defects, Empirical Study, Open-Source Projects, GitHub",
48
- "recent_works": [
49
- "The Effectiveness of Code Reviews in Identifying Defects: A Meta-Analysis of Empirical Studies",
50
- "A Study on the Impact of Code Review Tools on Developer Productivity and Software Quality"
51
- ]
52
- }
53
- }
54
-
55
- # # Predefined research paper text (example)
56
- # predefined_paper_text = """
57
- # Title:
58
- # Dataset and Baseline for Automatic Student Feedback Analysis
59
-
60
- # Abstract:
61
- # This paper presents a student feedback corpus containing 3000 instances of feedback written by university students. The dataset has been annotated for aspect terms, opinion terms, polarities of the opinion terms towards targeted aspects, document-level opinion polarities, and sentence separations. A hierarchical taxonomy for aspect categorization covering all areas of the teaching-learning process was developed. Both implicit and explicit aspects were annotated using this taxonomy. The paper discusses the annotation methodology, difficulties faced during the annotation, and details about aspect term categorization. The annotated corpus can be used for Aspect Extraction, Aspect Level Sentiment Analysis, and Document Level Sentiment Analysis. Baseline results for all three tasks are provided.
62
- # """
63
-
64
- # # Predefined extracted elements based on the paper text
65
- # predefined_research_tasks = "The primary research tasks include the creation of a comprehensive student feedback corpus, aspect term annotation, opinion polarity annotation, and the development of a hierarchical taxonomy."
66
- # predefined_research_gaps = "Gaps include the lack of detailed aspect-level annotations in existing datasets and the focus on document-level sentiment analysis."
67
- # predefined_keywords = "Student Feedback Corpus, Aspect Terms, Opinion Terms, Polarity, Hierarchical Taxonomy, Aspect Extraction, Aspect Level Sentiment Analysis, Document Level Sentiment Analysis"
68
- # predefined_recent_works = """
69
- # 1. "Students feedback analysis model using deep learning-based method and linguistic knowledge for intelligent educational systems."
70
- # 2. "An Automated Approach for Analysing Students Feedback Using Sentiment Analysis Techniques."
71
- # """
72
-
73
- # Extraction function to simulate the extraction of Research Tasks (t), Research Gaps (g), Keywords (k), and Recent Works (R)
74
  def extract_research_elements(paper_text):
 
 
75
  global index_ex
76
  example = example_data[index_ex]
77
  tasks = example['research_tasks']
@@ -80,92 +52,52 @@ def extract_research_elements(paper_text):
80
  recent_works = "\n".join(example['recent_works'])
81
  return tasks, gaps, keywords, recent_works
82
 
83
- # Generation function for Research Hypothesis and Experiment Plan
84
- def generate_research_idea_and_plan(tasks, gaps, keywords, recent_works):
85
- hypothesis = f"""
86
- Method: Advanced Aspect-Level Sentiment Analysis of Student Feedback Using a Hybrid Deep Learning Approach
87
-
88
- Step 1: Dataset Enhancement
89
-
90
- Data Collection and Preprocessing
91
- * Collect additional student feedback from multiple universities to expand the existing dataset.
92
- * Preprocess the data to ensure uniformity in annotation and eliminate noise, such as redundant information and grammatical errors.
93
- Annotation Refinement
94
- * Use advanced NLP techniques to further refine the aspect terms, opinion terms, and polarities.
95
- * Incorporate semi-supervised learning methods to improve annotation accuracy, utilizing both manual and automated processes.
96
-
97
- Step 2: Model Development
98
- Hybrid Model Architecture
99
- * Develop a hybrid model that integrates CNN, BiLSTM, and attention mechanisms, similar to the DTLP approach mentioned in the recent work by DTLP (Deep Learning and Teaching Process).
100
- * Incorporate a Transformer-based model (like BERT) to capture contextual nuances and improve the understanding of implicit aspects.
101
- Feature Integration
102
- * Enhance the feature set by combining statistical, linguistic, and sentiment knowledge features with word embeddings.
103
- * Include sentiment shifter rules and contextual polarity indicators to address challenges in sentiment analysis.
104
-
105
- Step 3: Training and Validation
106
- Model Training
107
- * Train the hybrid model using the enhanced dataset.
108
- * Use cross-validation techniques to ensure robustness and prevent overfitting.
109
- Baseline Comparisons
110
- * Compare the model's performance with baseline results provided in the original study and other recent works.
111
- * Use metrics such as accuracy, precision, recall, and F1-score to evaluate model performance across different tasks, including Aspect Extraction, Aspect Level Sentiment Analysis, and Document Level Sentiment Analysis.
112
-
113
- Step 4: Iterative Refinement
114
- Feedback Loop
115
- * Implement an iterative feedback loop where the model's predictions are reviewed and corrected, improving the model iteratively.
116
- * Engage domain experts in the review process to ensure the relevance and accuracy of the feedback. Continuous Learning
117
- * Utilize active learning techniques to continuously update the model with new data, ensuring it remains up-to-date with current trends in student feedback.
118
-
119
- Step 5: Deployment and Application
120
- Integration with Educational Systems
121
- * Deploy the model as a part of an intelligent educational system to analyze student feedback in real-time.
122
- * Provide actionable insights to educators and administrators to improve teaching methods and curriculum design. User Interface Development
123
- * Develop an intuitive user interface that allows educators to interact with the model, view feedback analysis, and generate reports.
124
- """
125
-
126
- experiment_plan = f"""
127
- Experiment: Validating the Hybrid Deep Learning Approach for Aspect-Level Sentiment Analysis of Student Feedback
128
-
129
- Objective:
130
- To validate the effectiveness of the proposed hybrid deep learning approach (combining CNN, BiLSTM, and Transformer models) for aspect-level sentiment analysis of student feedback by comparing its performance with baseline methods and recent works.
131
- Research Problem:
132
- Current sentiment analysis models for student feedback lack detailed aspect-level annotations and fail to address implicit aspects and contextual nuances in feedback data.
133
- Proposed Method:
134
- A hybrid deep learning model integrating CNN, BiLSTM, and Transformer-based models (like BERT) to enhance aspect-level sentiment analysis. The method incorporates sentiment shifter rules and contextual polarity indicators to address challenges in sentiment analysis.
135
-
136
- Experiment Design:
137
- 1. Dataset Preparation:
138
- * Existing Dataset: Use the dataset provided by Herath et al. (2022) with 3000 instances of student feedback, annotated for aspect terms, opinion terms, polarities, and document-level sentiments.
139
- * Data Augmentation: Expand the dataset by collecting additional feedback from multiple universities, ensuring diversity in feedback data.
140
- 2. Preprocessing:
141
- * Clean the data to remove noise and inconsistencies.
142
- * Tokenize the text and apply part-of-speech tagging.
143
- * Annotate additional feedback instances using the refined hierarchical taxonomy.
144
- 3. Model Training:
145
- * Baseline Models: Implement and train traditional machine learning models (e.g., SVM, Naive Bayes) and existing deep learning models (e.g., LSTM, BiLSTM) for sentiment analysis.
146
- * Proposed Hybrid Model: Train the proposed hybrid model combining CNN, BiLSTM, and Transformer (BERT) layers. Use pre-trained embeddings and fine-tune on the feedback dataset.
147
- 4. Feature Extraction:
148
- * Extract features using word embeddings, sentiment shifter rules, and contextual polarity indicators.
149
- * Integrate statistical, linguistic, and sentiment knowledge features with word embeddings to form a comprehensive feature set.
150
- 5. Evaluation Metrics:
151
- * Measure the performance of models using accuracy, precision, recall, and F1-score.
152
- * Perform aspect-level evaluation by analyzing the accuracy of aspect term extraction and sentiment classification.
153
- 6. Experiment Execution:
154
- * Training Phase: Train the baseline models and the proposed hybrid model on the training dataset.
155
- * Validation Phase: Validate the models using cross-validation techniques to ensure robustness and prevent overfitting.
156
- * Testing Phase: Evaluate the models on a held-out test set to compare their performance.
157
- 7. Comparison and Analysis:
158
- * Compare the performance of the proposed hybrid model with baseline models and recent works, such as DTLP and other sentiment analysis techniques.
159
- * Analyze the results to identify strengths and weaknesses of the proposed model in handling aspect-level sentiment analysis and implicit aspects.
160
- 8. Iterative Refinement:
161
- * Implement an iterative feedback loop where predictions are reviewed and corrected, improving model performance over iterations.
162
- * Engage domain experts to review the model's predictions and provide feedback for further refinement.
163
- 9. Deployment:
164
- * Integrate the validated model into an intelligent educational system for real-time feedback analysis.
165
- * Develop a user interface to allow educators to interact with the model, view feedback analysis, and generate reports.
166
- """
167
-
168
- return hypothesis, experiment_plan
169
 
170
  predefined_action_log = """
171
  [Reasoning]: To understand the initial structure and functionality of train.py for effective improvements.
@@ -177,347 +109,6 @@ Objective: Understand the training script, including data processing, [...]
177
  """
178
 
179
 
180
-
181
-
182
- # Predefined code to display in Phase 2
183
- predefined_code = """import pandas as pd
184
- from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
185
- import numpy as np
186
- import random
187
- import torch
188
- from sklearn.model_selection import train_test_split
189
-
190
- DIMENSIONS = ["cohesion", "syntax", "vocabulary", "phraseology", "grammar", "conventions"]
191
- SEED = 42
192
-
193
- random.seed(SEED)
194
- torch.manual_seed(SEED)
195
- np.random.seed(SEED)
196
-
197
- def compute_metrics_for_regression(y_test, y_test_pred):
198
- metrics = {}
199
- for task in DIMENSIONS:
200
- targets_task = [t[DIMENSIONS.index(task)] for t in y_test]
201
- pred_task = [l[DIMENSIONS.index(task)] for l in y_test_pred]
202
-
203
- rmse = mean_squared_error(targets_task, pred_task, squared=False)
204
-
205
- metrics[f"rmse_{task}"] = rmse
206
-
207
- return metrics
208
-
209
- def train_model(X_train, y_train, X_valid, y_valid):
210
- model = None # Placeholder for model training
211
- return model
212
-
213
- def predict(model, X):
214
- y_pred = np.random.rand(len(X), len(DIMENSIONS))
215
- return y_pred
216
-
217
- if __name__ == '__main__':
218
-
219
- ellipse_df = pd.read_csv('train.csv',
220
- header=0, names=['text_id', 'full_text', 'Cohesion', 'Syntax',
221
- 'Vocabulary', 'Phraseology','Grammar', 'Conventions'],
222
- index_col='text_id')
223
- ellipse_df = ellipse_df.dropna(axis=0)
224
-
225
- data_df = ellipse_df
226
- X = list(data_df.full_text.to_numpy())
227
- y = np.array([data_df.drop(['full_text'], axis=1).iloc[i] for i in range(len(X))])
228
-
229
- X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.10, random_state=SEED)
230
-
231
- model = train_model(X_train, y_train, X_valid, y_valid)
232
-
233
- y_valid_pred = predict(model, X_valid)
234
- metrics = compute_metrics_for_regression(y_valid, y_valid_pred)
235
- print(metrics)
236
- print("final MCRMSE on validation set: ", np.mean(list(metrics.values())))
237
-
238
- submission_df = pd.read_csv('test.csv', header=0, names=['text_id', 'full_text'], index_col='text_id')
239
- X_submission = list(submission_df.full_text.to_numpy())
240
- y_submission = predict(model, X_submission)
241
- submission_df = pd.DataFrame(y_submission, columns=DIMENSIONS)
242
- submission_df.index = submission_df.index.rename('text_id')
243
- submission_df.to_csv('submission.csv')
244
- """
245
-
246
- final_code = """
247
- * Resulting train.py:
248
- import pandas as pd
249
- import numpy as np
250
- import torch
251
- import torch.nn as nn
252
- import torch.optim as optim
253
- from torch.utils.data import DataLoader, Dataset
254
- from transformers import BertTokenizer, BertModel
255
-
256
- # Define constants
257
- DIMENSIONS = ['cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions']
258
-
259
- class EssayDataset(Dataset):
260
- def __init__(self, texts, targets, tokenizer, max_len):
261
- self.texts = texts
262
- self.targets = targets
263
- self.tokenizer = tokenizer
264
- self.max_len = max_len
265
-
266
- def __len__(self):
267
- return len(self.texts)
268
-
269
- def __getitem__(self, item):
270
- text = self.texts[item]
271
- target = self.targets[item]
272
-
273
- encoding = self.tokenizer.encode_plus(
274
- text,
275
- add_special_tokens=True,
276
- max_length=self.max_len,
277
- return_token_type_ids=False,
278
- padding='max_length',
279
- return_attention_mask=True,
280
- return_tensors='pt',
281
- truncation=True
282
- )
283
-
284
- return {
285
- 'text': text,
286
- 'input_ids': encoding['input_ids'].flatten(),
287
- 'attention_mask': encoding['attention_mask'].flatten(),
288
- 'targets': torch.tensor(target, dtype=torch.float)
289
- }
290
-
291
- class EssayScoreRegressor(nn.Module):
292
- def __init__(self, n_outputs):
293
- super(EssayScoreRegressor, self).__init__()
294
- self.bert = BertModel.from_pretrained('bert-base-uncased')
295
- self.drop = nn.Dropout(p=0.3)
296
- self.out = nn.Linear(self.bert.config.hidden_size, n_outputs)
297
-
298
- def forward(self, input_ids, attention_mask):
299
- pooled_output = self.bert(
300
- input_ids=input_ids,
301
- attention_mask=attention_mask
302
- )['pooler_output']
303
- output = self.drop(pooled_output)
304
- return self.out(output)
305
-
306
- def train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples):
307
- model = model.train()
308
- losses = []
309
-
310
- for d in data_loader:
311
- input_ids = d['input_ids'].to(device)
312
- attention_mask = d['attention_mask'].to(device)
313
- targets = d['targets'].to(device)
314
-
315
- outputs = model(input_ids=input_ids, attention_mask=attention_mask)
316
- loss = loss_fn(outputs, targets)
317
-
318
- losses.append(loss.item())
319
-
320
- loss.backward()
321
- optimizer.step()
322
- scheduler.step()
323
- optimizer.zero_grad()
324
-
325
- return np.mean(losses)
326
-
327
- def train_model(train_data, val_data, tokenizer, model, optimizer, scheduler, device, epochs, batch_size, max_len):
328
- train_dataset = EssayDataset(
329
- texts=train_data['full_text'].to_numpy(),
330
- targets=train_data[DIMENSIONS].to_numpy(),
331
- tokenizer=tokenizer,
332
- max_len=max_len
333
- )
334
-
335
- val_dataset = EssayDataset(
336
- texts=val_data['full_text'].to_numpy(),
337
- targets=val_data[DIMENSIONS].to_numpy(),
338
- tokenizer=tokenizer,
339
- max_len=max_len
340
- )
341
-
342
- train_data_loader = DataLoader(
343
- train_dataset,
344
- batch_size=batch_size,
345
- shuffle=True
346
- )
347
-
348
- val_data_loader = DataLoader(
349
- val_dataset,
350
- batch_size=batch_size,
351
- shuffle=False
352
- )
353
-
354
- loss_fn = nn.MSELoss().to(device)
355
-
356
- for epoch in range(epochs):
357
- print(f'Epoch {epoch + 1}/{epochs}')
358
- print('-' * 10)
359
-
360
- train_loss = train_epoch(
361
- model,
362
- train_data_loader,
363
- loss_fn,
364
- optimizer,
365
- device,
366
- scheduler,
367
- len(train_dataset)
368
- )
369
-
370
- print(f'Train loss {train_loss}')
371
-
372
- if __name__ == "__main__":
373
- df = pd.read_csv('train.csv')
374
- tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
375
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
376
- model = EssayScoreRegressor(n_outputs=len(DIMENSIONS))
377
- model = model.to(device)
378
-
379
- optimizer = optim.Adam(model.parameters(), lr=2e-5)
380
- total_steps = len(df) // 16 * 5
381
- scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=total_steps, gamma=0.1)
382
-
383
- train_data = df.sample(frac=0.8, random_state=42)
384
- val_data = df.drop(train_data.index)
385
-
386
- train_model(train_data, val_data, tokenizer, model, optimizer, scheduler, device, epochs=5, batch_size=16, max_len=160)
387
-
388
- * eval.py
389
- import sys
390
- import os
391
- import pandas as pd
392
- import numpy as np
393
- import torch
394
- from torch.utils.data import DataLoader
395
- from transformers import BertTokenizer
396
- from importlib import reload
397
- import train
398
-
399
- # Constants
400
- DIMENSIONS = train.DIMENSIONS
401
-
402
- class EssayDataset(Dataset):
403
- def __init__(self, texts, targets, tokenizer, max_len):
404
- self.texts = texts
405
- self.targets = targets
406
- self.tokenizer = tokenizer
407
- self.max_len = max_len
408
-
409
- def __len__(self):
410
- return len(self.texts)
411
-
412
- def __getitem__(self, item):
413
- text = self.texts[item]
414
- target = self.targets[item]
415
-
416
- encoding = self.tokenizer.encode_plus(
417
- text,
418
- add_special_tokens=True,
419
- max_length=self.max_len,
420
- return_token_type_ids=False,
421
- padding='max_length',
422
- return_attention_mask=True,
423
- return_tensors='pt',
424
- truncation=True
425
- )
426
-
427
- return {
428
- 'text': text,
429
- 'input_ids': encoding['input_ids'].flatten(),
430
- 'attention_mask': encoding['attention_mask'].flatten(),
431
- 'targets': torch.tensor(target, dtype=torch.float)
432
- }
433
-
434
- def get_score(submission_folder="../env"):
435
- submission_path = os.path.join(submission_folder, "submission.csv")
436
- solution = pd.read_csv(os.path.join(os.path.dirname(__file__), "answer.csv"))[DIMENSIONS].to_numpy()
437
- submission = pd.read_csv(submission_path)[DIMENSIONS].to_numpy()
438
-
439
- metrics = train.compute_metrics_for_regression(solution, submission)
440
- return np.mean(list(metrics.values()))
441
-
442
- def eval_model(model, data_loader, device, n_examples):
443
- model = model.eval()
444
- predictions = []
445
-
446
- with torch.no_grad():
447
- for d in data_loader:
448
- input_ids = d['input_ids'].to(device)
449
- attention_mask = d['attention_mask'].to(device)
450
-
451
- outputs = model(input_ids=input_ids, attention_mask=attention_mask)
452
- predictions.extend(outputs.cpu().numpy())
453
-
454
- return predictions
455
-
456
- if __name__ == "__main__":
457
- reload(train)
458
- tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
459
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
460
- model = train.EssayScoreRegressor(n_outputs=len(DIMENSIONS))
461
- model.load_state_dict(torch.load('model.bin'))
462
- model = model.to(device)
463
-
464
- test_data = pd.read_csv('test.csv')
465
- test_dataset = EssayDataset(
466
- texts=test_data['full_text'].to_numpy(),
467
- targets=np.zeros((len(test_data), len(DIMENSIONS))), # Dummy targets
468
- tokenizer=tokenizer,
469
- max_len=160
470
- )
471
-
472
- test_data_loader = DataLoader(
473
- test_dataset,
474
- batch_size=16,
475
- shuffle=False
476
- )
477
-
478
- predictions = eval_model(
479
- model,
480
- test_data_loader,
481
- device,
482
- len(test_dataset)
483
- )
484
-
485
- submission = pd.DataFrame(predictions, columns=DIMENSIONS)
486
- submission['text_id'] = test_data['text_id']
487
- submission.to_csv(os.path.join("../env", 'submission.csv'), index=False)
488
-
489
- print(get_score())
490
-
491
- """
492
-
493
-
494
- # Example data structure
495
- example_data = {
496
- 1: {
497
- "title": "Dataset and Baseline for Automatic Student Feedback Analysis",
498
- "abstract": "This paper presents a student feedback corpus containing 3000 instances of feedback written by university students. The dataset has been annotated for aspect terms, opinion terms, polarities of the opinion terms towards targeted aspects, document-level opinion polarities, and sentence separations. A hierarchical taxonomy for aspect categorization covering all areas of the teaching-learning process was developed. Both implicit and explicit aspects were annotated using this taxonomy. The paper discusses the annotation methodology, difficulties faced during the annotation, and details about aspect term categorization. The annotated corpus can be used for Aspect Extraction, Aspect Level Sentiment Analysis, and Document Level Sentiment Analysis. Baseline results for all three tasks are provided.",
499
- "research_tasks": "The primary research tasks include the creation of a comprehensive student feedback corpus, aspect term annotation, opinion polarity annotation, and the development of a hierarchical taxonomy.",
500
- "research_gaps": "Gaps include the lack of detailed aspect-level annotations in existing datasets and the focus on document-level sentiment analysis.",
501
- "keywords": "Student Feedback Corpus, Aspect Terms, Opinion Terms, Polarity, Hierarchical Taxonomy, Aspect Extraction, Aspect Level Sentiment Analysis, Document Level Sentiment Analysis",
502
- "recent_works": [
503
- "Students feedback analysis model using deep learning-based method and linguistic knowledge for intelligent educational systems.",
504
- "An Automated Approach for Analysing Students Feedback Using Sentiment Analysis Techniques."
505
- ]
506
- },
507
- 2: {
508
- "title": "An Empirical Study on the Impact of Code Review on Software Quality",
509
- "abstract": "This paper presents an empirical study examining the impact of code reviews on the quality of software projects. The study involved analyzing over 500,000 code reviews across 20 open-source projects on GitHub. The analysis was conducted to assess the relationship between code review practices and key software quality metrics, such as defect density, code churn, and the frequency of post-release defects. The findings suggest that code reviews, particularly when conducted by experienced reviewers, significantly reduce the number of defects in the codebase. The paper discusses the methodology used for data collection, the statistical methods employed for analysis, and the implications of these findings for software development practices.",
510
- "research_tasks": "The primary research tasks include collecting and analyzing data on code reviews from open-source projects, measuring software quality metrics, and assessing the correlation between code review practices and software quality.",
511
- "research_gaps": "Gaps include the lack of large-scale empirical studies that quantify the impact of code reviews on software quality and the limited focus on the role of reviewer expertise in existing literature.",
512
- "keywords": "Code Reviews, Software Quality, Defect Density, Code Churn, Post-Release Defects, Empirical Study, Open-Source Projects, GitHub",
513
- "recent_works": [
514
- "The Effectiveness of Code Reviews in Identifying Defects: A Meta-Analysis of Empirical Studies",
515
- "A Study on the Impact of Code Review Tools on Developer Productivity and Software Quality"
516
- ]
517
- }
518
- }
519
-
520
-
521
  predefined_observation = """
522
  Epoch [1/10],
523
  Train MSE: 0.543,
@@ -585,14 +176,6 @@ def info_to_message(info):
585
  return msg
586
 
587
 
588
- index_ex = 1
589
- # Function to handle the selection of an example and populate the respective fields
590
- def load_example(example_id):
591
- global index_ex
592
- index_ex = example_id
593
- example = example_data[example_id]
594
- paper_text = 'Title:\t' + example['title'] + '\nAbstract:\t' + example['abstract']
595
- return paper_text
596
 
597
  # Gradio Interface
598
  with gr.Blocks() as app:
@@ -604,7 +187,7 @@ with gr.Blocks() as app:
604
  hypothesis_state = gr.State("")
605
  experiment_plan_state = gr.State("")
606
 
607
- # Phase 1: Research Idea Generation Tab
608
  with gr.Tab("Phase 1: Research Idea Generation"):
609
  gr.Markdown("### Extract Research Elements and Generate Research Ideas")
610
 
@@ -623,9 +206,24 @@ with gr.Blocks() as app:
623
  with gr.Group():
624
  gr.Markdown("### Research Idea")
625
  with gr.Row():
626
- hypothesis_output = gr.Textbox(label="Generated Hypothesis", lines=45, interactive=False)
627
- experiment_plan_output = gr.Textbox(label="Generated Experiment Plan", lines=45, interactive=False)
 
 
 
 
 
 
 
 
 
 
628
 
 
 
 
 
 
629
  # Step 1: Extract Research Elements
630
  extract_button.click(
631
  fn=extract_research_elements,
@@ -633,34 +231,16 @@ with gr.Blocks() as app:
633
  outputs=[tasks_output, gaps_output, keywords_output, recent_works_output]
634
  )
635
 
636
- # Step 2: Generate Research Hypothesis and Experiment Plan
637
- def generate_and_store(tasks, gaps, keywords, recent_works):
638
- hypothesis, experiment_plan = generate_research_idea_and_plan(tasks, gaps, keywords, recent_works)
639
- return hypothesis, experiment_plan, hypothesis, experiment_plan
640
-
641
  generate_button.click(
642
  fn=generate_and_store,
643
  inputs=[tasks_output, gaps_output, keywords_output, recent_works_output],
644
  outputs=[hypothesis_output, experiment_plan_output, hypothesis_state, experiment_plan_state]
645
  )
646
 
647
- # Example Buttons
648
- with gr.Row():
649
- example_1_button = gr.Button("Load Example 1:")
650
- example_2_button = gr.Button("Load Example 2:")
651
-
652
- example_1_button.click(
653
- fn=lambda: load_example(1),
654
- outputs=[paper_text_input]
655
- )
656
-
657
- example_2_button.click(
658
- fn=lambda: load_example(2),
659
- outputs=[paper_text_input]
660
- )
661
 
662
- # Phase 2: Interactive Session Tab
663
- with gr.Tab("Phase 2&3: Experiment implementation and execution"):
 
664
  gr.Markdown("### Interact with the ExperimentAgent")
665
 
666
  with gr.Row():
@@ -669,7 +249,7 @@ with gr.Blocks() as app:
669
  plan_input = gr.Textbox(label="Experiment Plan", lines=30, interactive=False)
670
 
671
  with gr.Column():
672
- execute_button = gr.Button("Start ExperimentAgent", elem_classes=["execute-btn"])
673
  with gr.Group():
674
  gr.Markdown("### Implementation + Execution Log")
675
  log = gr.Textbox(label="Execution Log", lines=20, interactive=False)
@@ -680,42 +260,15 @@ with gr.Blocks() as app:
680
  feedback = gr.Textbox(placeholder="N/A", label="User Feedback", lines=3, interactive=True)
681
  submit_button = gr.Button("Submit", elem_classes=["Submit-btn"])
682
 
683
- def submit_feedback(user_feedback, history, previous_response):
684
- global step_index
685
- if_end = False
686
- step_index += 1
687
- msg = history
688
- if step_index < len(process_steps):
689
- msg += previous_response + "\nUser feedback:" + user_feedback + "\n\n"
690
- response_info = process_steps[step_index]
691
- response = info_to_message(response_info) # Convert dictionary to formatted string
692
- response += "Please provide feedback based on the history, response entries, and observation, and questions: "
693
- step_index += 1
694
- msg += response
695
- else:
696
- if_end = True
697
- response = "Agent Finished."
698
-
699
-
700
- return msg, response, predefined_code if if_end else final_code
701
-
702
- def load_phase_2_inputs(hypothesis, plan):
703
- return hypothesis, plan, "# Code implementation will be displayed here after Start ExperimentAgent."
704
-
705
- # Function to implement and execute with the research agent
706
- def implement_and_execute(hypothesis, plan):
707
- predefined_message = f"Implement the following hypothesis and experiment plan:\n\nHypothesis:\n{hypothesis}\n\nExperiment Plan:\n{plan}"
708
- return predefined_code, predefined_action_log
709
-
710
  hypothesis_state.change(
711
  fn=load_phase_2_inputs,
712
  inputs=[hypothesis_state, experiment_plan_state],
713
  outputs=[idea_input, plan_input, code_display]
714
  )
715
 
716
- # Trigger the research agent execution with the predefined hypothesis and plan
717
- execute_button.click(
718
- fn=implement_and_execute,
719
  inputs=[hypothesis_state, experiment_plan_state],
720
  outputs=[code_display, log]
721
  )
 
 
1
  import gradio as gr
2
  from pathlib import Path
3
  from reactagent.environment import Environment
 
5
  from reactagent.runner import create_parser
6
  from reactagent import llm
7
  from reactagent.users.user import User
8
+ import json
9
 
10
 
11
  # Global variables to store session state
12
  env = None
13
  agent = None
14
+ state_extract = False
15
+ state_generate = False
16
+ state_agent = False
17
+ state_complete = False
18
+ index_ex = "1"
19
+
20
+
21
+ # Load example JSON file
22
+ def load_example_data():
23
+ with open("example/example_data.json", "r") as json_file:
24
+ return json.load(json_file)
25
+
26
+ example_data = load_example_data()
27
+
28
+ with open("example/ex1_init.py", "r") as f:
29
+ predefined_code = f.read()
30
+
31
+ with open("example/ex1_final.py", "r") as f:
32
+ final_code = f.read()
33
+
34
+ # Function to handle the selection of an example and populate the respective fields
35
+ def load_example(example_id):
36
+ global index_ex
37
+ index_ex = str(example_id)
38
+ example = example_data[index_ex]
39
+ paper_text = 'Title:\t' + example['title'] + '\nAbstract:\t' + example['abstract']
40
+ return paper_text
41
+
42
+ ########## Phase 1 ##############
43
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def extract_research_elements(paper_text):
45
+ global state_extract
46
+ state_extract = True
47
  global index_ex
48
  example = example_data[index_ex]
49
  tasks = example['research_tasks']
 
52
  recent_works = "\n".join(example['recent_works'])
53
  return tasks, gaps, keywords, recent_works
54
 
55
+
56
+ # Step 2: Generate Research Hypothesis and Experiment Plan
57
+ def generate_and_store(tasks, gaps, keywords, recent_works):
58
+ if (not state_extract):
59
+ return "", "", "", ""
60
+ global state_generate
61
+ state_generate = True
62
+ global index_ex
63
+ hypothesis = example_data[index_ex]['hypothesis']
64
+ experiment_plan = example_data[index_ex]['experiment_plan']
65
+ return hypothesis, experiment_plan, hypothesis, experiment_plan
66
+
67
+ ########## Phase 2 & 3 ##############
68
+ def start_experiment_agent(hypothesis, plan):
69
+ if (not state_extract or not state_generate):
70
+ return "", ""
71
+ global state_agent
72
+ state_agent = True
73
+ predefined_message = f"Implement the following hypothesis and experiment plan:\n\nHypothesis:\n{hypothesis}\n\nExperiment Plan:\n{plan}"
74
+ return predefined_code, predefined_action_log
75
+
76
+ def submit_feedback(user_feedback, history, previous_response):
77
+ if (not state_extract or not state_generate or not state_agent):
78
+ return "", "", ""
79
+ global step_index
80
+ global state_complete
81
+ step_index += 1
82
+ msg = history
83
+ if step_index < len(process_steps):
84
+ msg += previous_response + "\nUser feedback:" + user_feedback + "\n\n"
85
+ response_info = process_steps[step_index]
86
+ response = info_to_message(response_info) # Convert dictionary to formatted string
87
+ response += "Please provide feedback based on the history, response entries, and observation, and questions: "
88
+ step_index += 1
89
+ msg += response
90
+ else:
91
+ state_complete = True
92
+ response = "Agent Finished."
93
+
94
+
95
+ return msg, response, predefined_code if state_complete else final_code
96
+
97
+ def load_phase_2_inputs(hypothesis, plan):
98
+ return hypothesis, plan, "# Code implementation will be displayed here after Start ExperimentAgent."
99
+
100
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  predefined_action_log = """
103
  [Reasoning]: To understand the initial structure and functionality of train.py for effective improvements.
 
109
  """
110
 
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  predefined_observation = """
113
  Epoch [1/10],
114
  Train MSE: 0.543,
 
176
  return msg
177
 
178
 
 
 
 
 
 
 
 
 
179
 
180
  # Gradio Interface
181
  with gr.Blocks() as app:
 
187
  hypothesis_state = gr.State("")
188
  experiment_plan_state = gr.State("")
189
 
190
+ ########## Phase 1: Research Idea Generation Tab ##############
191
  with gr.Tab("Phase 1: Research Idea Generation"):
192
  gr.Markdown("### Extract Research Elements and Generate Research Ideas")
193
 
 
206
  with gr.Group():
207
  gr.Markdown("### Research Idea")
208
  with gr.Row():
209
+ hypothesis_output = gr.Textbox(label="Generated Hypothesis", lines=20, interactive=False)
210
+ experiment_plan_output = gr.Textbox(label="Generated Experiment Plan", lines=20, interactive=False)
211
+
212
+ with gr.Row():
213
+ example_1_button = gr.Button("Load Example 1: " + example_data["1"]["title"])
214
+ example_2_button = gr.Button("Load Example 2: " + example_data["2"]["title"])
215
+
216
+ # Pre-step: load example
217
+ example_1_button.click(
218
+ fn=lambda: load_example(1),
219
+ outputs=[paper_text_input]
220
+ )
221
 
222
+ example_2_button.click(
223
+ fn=lambda: load_example(2),
224
+ outputs=[paper_text_input]
225
+ )
226
+
227
  # Step 1: Extract Research Elements
228
  extract_button.click(
229
  fn=extract_research_elements,
 
231
  outputs=[tasks_output, gaps_output, keywords_output, recent_works_output]
232
  )
233
 
 
 
 
 
 
234
  generate_button.click(
235
  fn=generate_and_store,
236
  inputs=[tasks_output, gaps_output, keywords_output, recent_works_output],
237
  outputs=[hypothesis_output, experiment_plan_output, hypothesis_state, experiment_plan_state]
238
  )
239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
 
241
+
242
+ ########## Phase 2 & 3: Experiment implementation and execution ##############
243
+ with gr.Tab("Phase 2 & Phase 3: Experiment implementation and execution"):
244
  gr.Markdown("### Interact with the ExperimentAgent")
245
 
246
  with gr.Row():
 
249
  plan_input = gr.Textbox(label="Experiment Plan", lines=30, interactive=False)
250
 
251
  with gr.Column():
252
+ start_exp_agnet = gr.Button("Start ExperimentAgent", elem_classes=["agent-btn"])
253
  with gr.Group():
254
  gr.Markdown("### Implementation + Execution Log")
255
  log = gr.Textbox(label="Execution Log", lines=20, interactive=False)
 
260
  feedback = gr.Textbox(placeholder="N/A", label="User Feedback", lines=3, interactive=True)
261
  submit_button = gr.Button("Submit", elem_classes=["Submit-btn"])
262
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  hypothesis_state.change(
264
  fn=load_phase_2_inputs,
265
  inputs=[hypothesis_state, experiment_plan_state],
266
  outputs=[idea_input, plan_input, code_display]
267
  )
268
 
269
+ # Start research agent
270
+ start_exp_agnet.click(
271
+ fn=start_experiment_agent,
272
  inputs=[hypothesis_state, experiment_plan_state],
273
  outputs=[code_display, log]
274
  )