{ "results": { "winogrande": { "acc,none": 0.7655880031570639, "acc_stderr,none": 0.011906130106237992, "alias": "winogrande" } }, "configs": { "winogrande": { "task": "winogrande", "dataset_path": "/lustre07/scratch/gagan30/arocr/meta-llama/self_rewarding_models/eval/winogrande", "dataset_name": "winogrande_xl", "training_split": "train", "validation_split": "validation", "doc_to_text": "def doc_to_text(doc):\n answer_to_num = {\"1\": 0, \"2\": 1}\n return answer_to_num[doc[\"answer\"]]\n", "doc_to_target": "def doc_to_target(doc):\n idx = doc[\"sentence\"].index(\"_\") + 1\n return doc[\"sentence\"][idx:].strip()\n", "doc_to_choice": "def doc_to_choice(doc):\n idx = doc[\"sentence\"].index(\"_\")\n options = [doc[\"option1\"], doc[\"option2\"]]\n return [doc[\"sentence\"][:idx] + opt for opt in options]\n", "description": "", "target_delimiter": " ", "fewshot_delimiter": "\n\n", "num_fewshot": 5, "metric_list": [ { "metric": "acc", "aggregation": "mean", "higher_is_better": true } ], "output_type": "multiple_choice", "repeats": 1, "should_decontaminate": true, "doc_to_decontamination_query": "sentence", "metadata": { "version": 1.0 } } }, "versions": { "winogrande": 1.0 }, "n-shot": { "winogrande": 5 }, "config": { "model": "vllm", "model_args": "pretrained=/lustre07/scratch/gagan30/arocr/meta-llama/self_rewarding_models/Voyage-dpo-1,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.9,data_parallel_size=1,max_model_len=4096", "batch_size": "auto:128", "batch_sizes": [], "device": "cuda", "use_cache": "/lustre07/scratch/gagan30/arocr/cache/", "limit": null, "bootstrap_iters": 100000, "gen_kwargs": null }, "git_hash": null }