meghsn's picture
Result updates
d5581cc
raw
history blame
449 Bytes
[
{
"agent_name": "GenericAgent-Claude-3.5-Sonnet",
"study_id": "study_id",
"benchmark": "WorkArena-L1",
"score": 56.4,
"std_err": 2.7,
"benchmark_specific": "No",
"benchmark_tuned": "No",
"followed_evaluation_protocol": "Yes",
"reproducible": "Yes",
"comments": "NA",
"original_or_reproduced": "Original",
"date_time": "2021-01-01 12:00:00"
}
]