File size: 53,574 Bytes
cb6d47c |
1 |
{"Muennighoff/xstory_cloze_ar": {"Answer Given options_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Answer Given options_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8669755129053607}, "template_name": "Answer Given options_armt"}, "Choose Story Ending_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Choose Story Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.927862342819325}, "template_name": "Choose Story Ending_armt"}, "Generate Ending_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Generate Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6479152878888154}, "template_name": "Generate Ending_armt"}, "Novel Correct Ending_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Novel Correct Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9185969556585043}, "template_name": "Novel Correct Ending_armt"}, "Story Continuation and Options_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Story Continuation and Options_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9113170086035738}, "template_name": "Story Continuation and Options_armt"}}, "Muennighoff/xstory_cloze_es": {"Answer Given options_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Answer Given options_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9272005294506949}, "template_name": "Answer Given options_esmt"}, "Choose Story Ending_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Choose Story Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9437458636664461}, "template_name": "Choose Story Ending_esmt"}, "Generate Ending_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Generate Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7445400397088021}, "template_name": "Generate Ending_esmt"}, "Novel Correct Ending_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Novel Correct Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9397749834546658}, "template_name": "Novel Correct Ending_esmt"}, "Story Continuation and Options_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Story Continuation and Options_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9298477829252151}, "template_name": "Story Continuation and Options_esmt"}}, "Muennighoff/xstory_cloze_eu": {"Answer Given options_eumt": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Answer Given options_eumt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7452018530774321}, "template_name": "Answer Given options_eumt"}, "Choose Story Ending_eumt": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Choose Story Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8676373262739907}, "template_name": "Choose Story Ending_eumt"}, "Generate Ending_eumt": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Generate Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6082064857710126}, "template_name": "Generate Ending_eumt"}, "Novel Correct Ending_eumt": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Novel Correct Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8219722038385175}, "template_name": "Novel Correct Ending_eumt"}, "Story Continuation and Options_eumt": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Story Continuation and Options_eumt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.814030443414957}, "template_name": "Story Continuation and Options_eumt"}}, "Muennighoff/xstory_cloze_hi": {"Answer Given options_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Answer Given options_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8266048974189278}, "template_name": "Answer Given options_himt"}, "Choose Story Ending_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Choose Story Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8841826604897419}, "template_name": "Choose Story Ending_himt"}, "Generate Ending_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Generate Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.657180675049636}, "template_name": "Generate Ending_himt"}, "Novel Correct Ending_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Novel Correct Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8669755129053607}, "template_name": "Novel Correct Ending_himt"}, "Story Continuation and Options_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Story Continuation and Options_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8689609530112509}, "template_name": "Story Continuation and Options_himt"}}, "Muennighoff/xstory_cloze_id": {"Answer Given options_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Answer Given options_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8616810059563204}, "template_name": "Answer Given options_idmt"}, "Choose Story Ending_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Choose Story Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.914626075446724}, "template_name": "Choose Story Ending_idmt"}, "Generate Ending_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Generate Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6730641958967571}, "template_name": "Generate Ending_idmt"}, "Novel Correct Ending_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Novel Correct Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8954334877564527}, "template_name": "Novel Correct Ending_idmt"}, "Story Continuation and Options_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Story Continuation and Options_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8927862342819325}, "template_name": "Story Continuation and Options_idmt"}}, "Muennighoff/xstory_cloze_zh": {"Answer Given options_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Answer Given options_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9060225016545335}, "template_name": "Answer Given options_zhmt"}, "Choose Story Ending_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Choose Story Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9238914626075446}, "template_name": "Choose Story Ending_zhmt"}, "Generate Ending_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Generate Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.686962276637988}, "template_name": "Generate Ending_zhmt"}, "Novel Correct Ending_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Novel Correct Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9185969556585043}, "template_name": "Novel Correct Ending_zhmt"}, "Story Continuation and Options_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Story Continuation and Options_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9192587690271343}, "template_name": "Story Continuation and Options_zhmt"}}, "Muennighoff/xwinograd_fr": {"Replace_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='Replace_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6506024096385542}, "template_name": "Replace_frmt"}, "True or False_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='True or False_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5662650602409639}, "template_name": "True or False_frmt"}, "does underscore refer to_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='does underscore refer to_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5542168674698795}, "template_name": "does underscore refer to_frmt"}, "stand for_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='stand for_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.4819277108433735}, "template_name": "stand for_frmt"}, "underscore refer to_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='underscore refer to_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6144578313253012}, "template_name": "underscore refer to_frmt"}}, "Muennighoff/xwinograd_pt": {"Replace_ptmt": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='Replace_ptmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6425855513307985}, "template_name": "Replace_ptmt"}, "True or False_ptmt": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='True or False_ptmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.49809885931558934}, "template_name": "True or False_ptmt"}, "does underscore refer to_ptmt": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='does underscore refer to_ptmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6045627376425855}, "template_name": "does underscore refer to_ptmt"}, "stand for_ptmt": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='stand for_ptmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5095057034220533}, "template_name": "stand for_ptmt"}, "underscore refer to_ptmt": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='underscore refer to_ptmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6273764258555133}, "template_name": "underscore refer to_ptmt"}}, "Muennighoff/xwinograd_zh": {"Replace_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='Replace_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6845238095238095}, "template_name": "Replace_zhmt"}, "True or False_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='True or False_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.503968253968254}, "template_name": "True or False_zhmt"}, "does underscore refer to_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='does underscore refer to_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.626984126984127}, "template_name": "does underscore refer to_zhmt"}, "stand for_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='stand for_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.503968253968254}, "template_name": "stand for_zhmt"}, "underscore refer to_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='underscore refer to_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.7023809523809523}, "template_name": "underscore refer to_zhmt"}}, "xcopa_id": {"C1 or C2? premise_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='C1 or C2? premise_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.52}, "template_name": "C1 or C2? premise_idmt"}, "best_option_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='best_option_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.73}, "template_name": "best_option_idmt"}, "cause_effect_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='cause_effect_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.82}, "template_name": "cause_effect_idmt"}, "i_am_hesitating_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='i_am_hesitating_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.76}, "template_name": "i_am_hesitating_idmt"}, "plausible_alternatives_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='plausible_alternatives_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.78}, "template_name": "plausible_alternatives_idmt"}}, "xcopa_sw": {"C1 or C2? premise_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='C1 or C2? premise_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.59}, "template_name": "C1 or C2? premise_swmt"}, "best_option_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='best_option_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.62}, "template_name": "best_option_swmt"}, "cause_effect_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='cause_effect_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.64}, "template_name": "cause_effect_swmt"}, "i_am_hesitating_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='i_am_hesitating_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.63}, "template_name": "i_am_hesitating_swmt"}, "plausible_alternatives_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='plausible_alternatives_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.64}, "template_name": "plausible_alternatives_swmt"}}, "xcopa_ta": {"C1 or C2? premise_tamt": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='C1 or C2? premise_tamt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.6}, "template_name": "C1 or C2? premise_tamt"}, "best_option_tamt": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='best_option_tamt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.47}, "template_name": "best_option_tamt"}, "cause_effect_tamt": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='cause_effect_tamt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.62}, "template_name": "cause_effect_tamt"}, "i_am_hesitating_tamt": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='i_am_hesitating_tamt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.64}, "template_name": "i_am_hesitating_tamt"}, "plausible_alternatives_tamt": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='plausible_alternatives_tamt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.63}, "template_name": "plausible_alternatives_tamt"}}, "xcopa_vi": {"C1 or C2? premise_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='C1 or C2? premise_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.56}, "template_name": "C1 or C2? premise_vimt"}, "best_option_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='best_option_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.77}, "template_name": "best_option_vimt"}, "cause_effect_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='cause_effect_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.84}, "template_name": "cause_effect_vimt"}, "i_am_hesitating_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='i_am_hesitating_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.82}, "template_name": "i_am_hesitating_vimt"}, "plausible_alternatives_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='plausible_alternatives_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.84}, "template_name": "plausible_alternatives_vimt"}}, "xcopa_zh": {"C1 or C2? premise_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='C1 or C2? premise_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.62}, "template_name": "C1 or C2? premise_zhmt"}, "best_option_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='best_option_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.72}, "template_name": "best_option_zhmt"}, "cause_effect_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='cause_effect_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.89}, "template_name": "cause_effect_zhmt"}, "i_am_hesitating_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='i_am_hesitating_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.9}, "template_name": "i_am_hesitating_zhmt"}, "plausible_alternatives_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/bloomz/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='plausible_alternatives_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.86}, "template_name": "plausible_alternatives_zhmt"}}} |