diff --git a/.gitattributes b/.gitattributes
index 9c24676beb68851799956f374928f1ad551c37b3..55efb38eafb56f2dd0719200f81894ef8245422e 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -177,3 +177,51 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 8b7178b178b/evaluation/generation/examples.8b7178b178b_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
 8b7178b178b/evaluation/generation/examples.8b7178b178b_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
 8b7178b178b/evaluation/generation/examples.8b7178b178b_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text
+8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_0.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..861f974eeac161f474a44020aa41ed3d3d4a9b06
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_0.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.4373157998206226, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.056741486001354216}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07630675677312203, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0021365872076980082}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.2947830429134024, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0047665492114514684}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.10947380559420297, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022511546576554292}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03437396040114199, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0011646295650624351}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.13907462394894776, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0031715068123442996}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.051001464150726095, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0014146940985046523}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.07227253003813615, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001955466598501427}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.28336049631237586, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004569573466829386}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10417993615217146, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002043414019393663}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.07290032072974349, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002034641349821296}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.2825727583996548, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004541613699421234}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10462419732455051, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002115527416565905}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_1.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..a9ec0a57b8354b510ead04d5de356e87e3cadb7e
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_1.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.5816764130686213, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.054147923323820346}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07783086970109386, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001926390801136041}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.36711372145643695, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005207087210896133}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11727270364668571, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021476260743326565}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.035941971570071736, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0011116447277521812}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.17604536516651662, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0036353372642678756}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05455324665630359, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013665856855669073}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.07209000868611247, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0017559014471748775}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.33901675935925657, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004687996190515175}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10871994085882164, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019153699814912425}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.0735230628158407, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018435905984185187}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.34433610068323095, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004792623835429767}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.11053867463797286, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020173857832705686}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_2.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..6fb91afeb3ce1148a225b5256529d72363b8c813
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_2.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.6801735381749798, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04514863078336761}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07666591908388082, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015425382902699712}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.40028519820196484, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0053218199498182555}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11972585427910692, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001963983917319172}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03562073396147935, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009954366405729978}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.19724892000097954, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.004019172908229666}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.055583861631680276, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012687258801338563}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.07037367893965144, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013558489579657599}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3648716666842121, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004723267009132289}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10999138993564177, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017648420950003858}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.07198504559682002, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014532243063151847}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3737640227903558, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004892612299725926}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.1122905686653009, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018425170251459826}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_3.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_3.json
new file mode 100644
index 0000000000000000000000000000000000000000..13ff18c86cb511025348204831e48f067d81f83b
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_3.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.8393015493701177, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.049868510474836}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07934496701755703, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017108996646152186}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.41957311539245606, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005383968460349908}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.12332290364001403, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020213255478620834}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03700401446806371, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010754080423489075}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.20785188427731804, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0039850136524860235}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.05769919253564089, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001315453385968732}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.07186075331384528, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015065682717614625}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3792930717112658, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004756348746075166}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.11171273283361995, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017620828691461753}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.07439776049493438, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016078356678010679}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.39147213524665503, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004968164479358761}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.11543602475589927, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001888123216509075}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_4.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_4.json
new file mode 100644
index 0000000000000000000000000000000000000000..1263e84c5629dfdbe72dbe25643856f57aca8aa4
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_4.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.8436391155168642, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.049789928911136136}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.08188000384427346, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015429926759057174}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.45225212783083296, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005452056426418837}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.12951808355992003, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019403013558157668}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03789712819393572, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009152136077591718}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.22689495723762804, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.004067188852506108}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.06050302495157815, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012369724150331927}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.07330729359090057, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013644419423480823}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.4045943171397325, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0048320778663538955}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.11592286526905457, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016906240203803347}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.07637812548771711, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014540942217052375}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.4207633762253156, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.005005614020197891}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.12060522562383363, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018022149452879743}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_5.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_5.json
new file mode 100644
index 0000000000000000000000000000000000000000..afbf69a82bd99bc9a43561f1680507437984b2dd
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-web_nlg_en_PALM_prompt_5.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.968801645723095, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05462868115593731}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.08408541082259362, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001415159971957011}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.4730441639084742, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005431978662817184}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.13468892691608006, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019240731337134673}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.038803485654806714, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008788058278028121}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.2379631167113304, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.004152278672512853}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.06252454636994373, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012377179103821956}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.07423644311431606, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012142160071926726}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.4194644730807224, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0047799777603664715}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.11896213494013362, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001650634135835398}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.07800322392434518, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013202039315876929}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.43851085762444947, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004968418620592047}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.12479160570822044, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017844920178026805}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-wiki_lingua_en_tldr_en_0.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-wiki_lingua_en_tldr_en_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..242fae1979f78c855ef9ddf32c98ab336fd59987
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-wiki_lingua_en_tldr_en_0.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.1628352267958049, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019076289743005436}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.28137034403026634, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002726139405286867}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.19188833167040015, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018702943944022418}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.035168122195033014, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008051014260490183}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.06371619685684557, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015314676134498058}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.04191366659253858, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000917658619128283}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.12246239102421737, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001285311648356421}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.2201424968967575, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0022006617198564397}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.14614932555210797, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012956973040530264}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.14960133985735732, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017445585328390875}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.2600296050390341, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002552101400202146}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.17662543049894286, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001716560474974557}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.8310798418084737, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06119305662626742}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-wiki_lingua_en_tldr_en_1.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-wiki_lingua_en_tldr_en_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..89f32caa36fd8c3ee75bcc6e3a81de12a627cee0
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_GEM-wiki_lingua_en_tldr_en_1.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.17507251962841255, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019979193178610104}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.3008600636001003, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002760929751246215}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.20607218590152535, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019348192676270253}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.040814840621502924, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008769075552676463}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.07239534384858555, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015745819458817655}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.04827166271383884, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000981638580890638}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.12604513290190442, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001331288983523961}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.22410236240350495, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021252698749924166}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.14993882097099004, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013143528095662098}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.1634506211607229, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018573519091366366}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.282240921650429, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002619133620350951}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.19267274247494454, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018042087124662393}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 2.463691563435929, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.056238669871763534}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_0.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..07245b00c863e58761a4c09ac494c1fcc144d843
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_0.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 0.3016960813220789, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04082243698870125}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.0628362517057776, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0006647851313912593}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.137799699754724, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0013399199497499206}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.08446489060239885, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.000832880994211257}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.009897778438587298, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0002813167869752497}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.02030644827526888, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0005626168002295672}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.013098749477153882, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00036393407871382073}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.060894085509621707, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0006165662598640652}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.1334600824136164, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0012622226683087598}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.08185345358329085, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0007758077715250189}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.05822489106803366, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00059732648786741}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.12759747263850774, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0011920315451119745}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.07821933484300712, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.000742258462877918}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_1.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..c523db860077e831e24df239bb0a685733f7ea30
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_1.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 9.014911506469076, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10639404635916568}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.4220547738164344, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0029747898853502135}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.44890682882262095, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0030482256607117985}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4071860181343587, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002172401493926826}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.18398717679998686, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002070314538395242}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.19695708046780447, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0021253468809232486}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.17675735102863532, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016983524897307275}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.301979337271323, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002426170411020153}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.32120889976820555, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024556761259021593}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.2901955244768342, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017445755551563752}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3498631569304881, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00275693533807641}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3704033862486897, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00277359280896307}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.3366538075182426, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020840116286440125}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_2.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..15db004d4aa99c7897c1c28387b487529c853355
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_2.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 11.1355912496663, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13104161087327487}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.45419518183344554, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0027266345121146297}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.48044140772752547, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002817950150203246}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4449026725503146, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020303864410380614}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2124884501715128, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0020421137596173576}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.22663601203825093, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0021799430214164035}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.20791813280930055, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0017732074653438918}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.32873823752244813, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002262430136729236}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.34880236417169236, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024064753259342344}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.32200623778436555, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017788348099423016}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.38047010773438, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002559842275120574}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.40224234667337105, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002660545444016772}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.37259609212766276, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020305961255707534}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_3.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_3.json
new file mode 100644
index 0000000000000000000000000000000000000000..724dce62f172eb6f393ec13619b4906e06a285de
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_3.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 12.10470203084112, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.160895558301053}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.45456971607102353, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0023960763531178423}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.47953391372338766, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0027865520920584822}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4492382386896765, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019699301602619833}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2163784140584202, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001980036007401307}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.23028758666497792, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0022756293438486903}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.2138339925008178, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0018445417129634963}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.33426681511660167, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.002069523940604892}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.35354783483337526, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024228991972543336}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.3303919839118561, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001783388701966232}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3827360863233357, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002359678766192712}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.40399025031746394, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002710450542079631}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.378239342272828, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002043462221499771}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_4.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_4.json
new file mode 100644
index 0000000000000000000000000000000000000000..b59237d0c0d384891ba88f6b11a9a7f8fd0f04b6
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_4.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 12.44085622955195, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13067375364343567}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.45479182860495637, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0023107278825576553}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.48017725960351876, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0027316537664281945}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4511875093908521, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001948885960105966}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.21833473257724795, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0019502917664114363}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.23333710855434647, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002269432186215575}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.21707781052843164, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00185164014349205}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.3366370287941819, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0019965287497770777}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.35684028344910274, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024057773390550254}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.33434874026181494, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017786642192651214}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.38571523251054607, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0022865314323742805}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.40803304829345277, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027038368009947755}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.38297835064075053, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0020438066934495387}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_5.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_5.json
new file mode 100644
index 0000000000000000000000000000000000000000..4d6a9efd57b9a41d9dc660181d857bf82ad1b6de
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_5.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 12.352678257477072, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1229858870702429}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.4536679355694742, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002282805690194143}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.48257839264210245, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002694452362144022}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.4520495564262539, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019091091728356577}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2184937204533077, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0019084340007320115}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2348008779716612, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0022303214323899125}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.21807002700335165, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0018243298667958761}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.33692930278278704, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0019923203298548866}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.36002100624003847, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024053149539646364}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.336313193439231, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001791714678974818}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.38599958036488224, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002272531904720447}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.4113378384062774, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002667546056299814}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.38490335194628533, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002018489316229454}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_gem_xsum_article_DOC_summary_0.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_gem_xsum_article_DOC_summary_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..86e22282cb6ec1ed3147295a424849271c434e16
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_gem_xsum_article_DOC_summary_0.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.15954668519186138, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002100617390256336}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.36309739763041776, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004368615445871372}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.2157374708631877, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002508783327476019}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.03857312620535916, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0012905906114606838}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.09177566784631346, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0029866334470645636}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.05279448511938101, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001664044690700437}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.11839547499241576, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001646949604960197}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.27198898577078495, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0035222426961979128}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.1604319788556621, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019556284658093183}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.1250746587410693, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017464873888176696}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.28828909491111, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0038785736556512756}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.1697976651408494, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002135233558113895}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 2.194933977793404, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09650169688197711}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_gem_xsum_article_DOC_summary_1.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_gem_xsum_article_DOC_summary_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..5200f1c2007ab17b42d86e7f358388bd1fefedf2
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_gem_xsum_article_DOC_summary_1.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.130085376048313, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019273226219467348}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.31899443410148204, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004375864955378675}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.18261422772647087, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.00258517997013667}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.027035766317831913, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010834329723279196}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.06843246331973844, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002783755254166952}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.03829439148207006, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0015268296315399182}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.09860861520075329, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014180838773530536}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.2439616017337858, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003388978319546548}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.13872448607191418, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0019214547149178605}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.10431817474650688, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0015880489605249845}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.25778306821268704, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0037812899736781526}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.14673205236069722, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002156691093638724}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.638275312642142, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11197811159644402}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/agg.8b7178b13b_gem_xsum_article_DOC_summary_2.json b/8b7178b13b/evaluation/generation/agg.8b7178b13b_gem_xsum_article_DOC_summary_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..720e848d767777b2f3fc7e2e80a6f880ababd91e
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/agg.8b7178b13b_gem_xsum_article_DOC_summary_2.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.14059986141007405, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001980010991959053}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.34704590818514364, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004609783640663682}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.1978001315070318, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002669936318830053}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.033448694107769246, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0011984145262489562}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.08605872654054372, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003191665944600775}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.04760922765448965, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001702755511678876}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.10721276455620249, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014796729903083395}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.26727650609085896, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0036995069904560843}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.1511855886298728, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0020261890172370246}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.11346372550782761, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016303825401644061}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.28288787023938955, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004038388892533831}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.16004925294103853, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0022364087298009884}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 2.0210914476360786, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12454216962651855}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_0.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_0.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..28ee79981c9d10e71ec4577328f5f963bb531873
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_0.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75f8a7429d4fa990fc951a1eb232a50cdfe103ebc4f4333f2172fdf37f20c0ec
+size 4145663
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_1.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_1.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..7db35f3405799b3d311fba2d32c06a95d0e47c95
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_1.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8198ef33953196c4beed6a0eb43940e14ec631b3599f985f4f948213a397bf5
+size 5175400
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_2.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_2.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..9336634c030c5b6df5dcfe9c7f117ebc50c99130
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_2.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ede4ec95ed7dbcd239fc5145e4c183de6edf7c4730fa59b8a15767964b2898e1
+size 6090106
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_3.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_3.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..b8ebfc342b7b5919d66537186910274781ef7d14
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_3.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b886146a6a2b25a0004d71a5ffcf0ded8cfd777be395c92cdc43eda4e11e84e3
+size 7018891
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_4.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_4.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..6986e2ce6cf3c2c77439c7ec12876ab5649c658c
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_4.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5460e9e85d6abd142df975a4be1d4d38d3ea658d547750d1be20ba6f8260897e
+size 7906446
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_5.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_5.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..b19414c4aff977e52ec5f6b5c38b2a7e883e501f
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-web_nlg_en_PALM_prompt_5.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92361c08ce0393304401ae18656799a36fd64cdd095286e241874604253293e6
+size 8802413
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_0.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_0.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..d19a9d757dcff21896e58dd348897b2f277605c3
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_0.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7812e51bc8a7484b2e16a2713ff42a9f1901d1a61db775f40ee84e41fd3a604f
+size 7699465
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_1.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_1.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..830dd7b14071defe6218e74f35c6f6e67e0449d4
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_1.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7cd79c8d0c5f8cbfbeb3c2542d489234b1c40a486e29cbc2cee637058b380710
+size 13315258
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_2.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_2.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_3.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_3.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_4.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_4.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_5.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_GEM-wiki_lingua_en_tldr_en_5.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..ad47f52b5dffeda682b8f5dd0c897026272020b5
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1abdb8dadd3b1fe934e1a1ccbbfa0ea4ca5656b2d16e5ff342efcbe5621eabd2
+size 4450074
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..c5c7b732cefc6018fa7bdc3e3889542bf690bd2e
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:130dc53c9e5caf7253adc9ce05994f043dee9bfaa1d249fa814e50808dd0f69c
+size 5179876
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..51ae2a19d189d2297dca383695ce543c08a720e5
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:135e1879d04e18711c109c15cea363ed607a6ec9fbc1daf46dbe6ebd0a4f1e08
+size 6247622
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..75fa7d2c24ba91a8ee8956eba39e026129a052aa
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d2e2a0f6b09d08e84b83ae3548a2499c56aca470925d5ff3ff61b0ae00a0252
+size 7310505
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..242ebcba864264efc0ea681edddc8fbc4551db0d
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c1b15d54c73679ec97ca5f57bab183661dd20887d21bf8c9f8f1efd0abbadfd
+size 8385150
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..617268f5cb0eb9fa5aa4f53020e03a69282ebbc7
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:617ed30496c1be4389b080e6e664675b18bb04395264dad1e9e369db533f2a6c
+size 9476118
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_0.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_0.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..42d882ccba59671e7ea3340953d5021b332ec98a
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_0.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0313d23cd7923b59e2c54d30e60aaf4ef2c75255c88c80bf65e42df6956e07bb
+size 2823747
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_1.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_1.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..5d436f6c828fbac946fcb3359d435771372567c3
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_1.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4493325c3cc1255da093dcc419ad34b7385ad37f68b3849cf8562b4c6bc09b4
+size 5103294
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_2.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_2.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..689e1133b9dfd13a1abd631d3469b5c108cb56d5
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_2.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6aa161eabcf632abf8a3a18136f127cde3ecf7519a9319ca940666fd5ab166aa
+size 7378211
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_3.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_3.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_4.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_4.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_5.jsonl b/8b7178b13b/evaluation/generation/examples.8b7178b13b_gem_xsum_article_DOC_summary_5.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_0.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..0129df45eed2ec66dffc528ef27134b36390e7b5
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_0.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.4373157998206226,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.056741486001354216
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.07630675677312203,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0021365872076980082
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.2947830429134024,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.0047665492114514684
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.10947380559420297,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0022511546576554292
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.03437396040114199,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0011646295650624351
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.13907462394894776,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0031715068123442996
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.051001464150726095,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0014146940985046523
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.07227253003813615,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.001955466598501427
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.28336049631237586,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004569573466829386
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.10417993615217146,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.002043414019393663
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.07290032072974349,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.002034641349821296
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.2825727583996548,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004541613699421234
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.10462419732455051,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.002115527416565905
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 0,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_1.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..60c0a1fe4e2248c9cbe31379cb8b848b0cfa2728
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_1.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.5816764130686213,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.054147923323820346
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.07783086970109386,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.001926390801136041
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.36711372145643695,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.005207087210896133
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.11727270364668571,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0021476260743326565
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.035941971570071736,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0011116447277521812
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.17604536516651662,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0036353372642678756
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.05455324665630359,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0013665856855669073
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.07209000868611247,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.0017559014471748775
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.33901675935925657,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004687996190515175
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.10871994085882164,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0019153699814912425
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.0735230628158407,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0018435905984185187
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.34433610068323095,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004792623835429767
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.11053867463797286,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0020173857832705686
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 1,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_2.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..6ec40008415d462a3091339618a4cdb147740456
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_2.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.6801735381749798,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.04514863078336761
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.07666591908388082,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0015425382902699712
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.40028519820196484,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.0053218199498182555
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.11972585427910692,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.001963983917319172
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.03562073396147935,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0009954366405729978
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.19724892000097954,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.004019172908229666
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.055583861631680276,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0012687258801338563
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.07037367893965144,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.0013558489579657599
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.3648716666842121,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004723267009132289
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.10999138993564177,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0017648420950003858
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.07198504559682002,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0014532243063151847
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.3737640227903558,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004892612299725926
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.1122905686653009,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0018425170251459826
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 2,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_3.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_3.json
new file mode 100644
index 0000000000000000000000000000000000000000..9eaa106978424f88c613e6feab1990be07b1f1b5
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_3.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.8393015493701177,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.049868510474836
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.07934496701755703,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0017108996646152186
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.41957311539245606,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.005383968460349908
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.12332290364001403,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0020213255478620834
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.03700401446806371,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0010754080423489075
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.20785188427731804,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0039850136524860235
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.05769919253564089,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.001315453385968732
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.07186075331384528,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.0015065682717614625
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.3792930717112658,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004756348746075166
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.11171273283361995,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0017620828691461753
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.07439776049493438,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0016078356678010679
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.39147213524665503,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004968164479358761
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.11543602475589927,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.001888123216509075
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 3,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_4.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_4.json
new file mode 100644
index 0000000000000000000000000000000000000000..9ac8372d295e63a979c9b6d99b44524240a6e947
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_4.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.8436391155168642,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.049789928911136136
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.08188000384427346,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0015429926759057174
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.45225212783083296,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.005452056426418837
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.12951808355992003,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0019403013558157668
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.03789712819393572,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0009152136077591718
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.22689495723762804,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.004067188852506108
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.06050302495157815,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0012369724150331927
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.07330729359090057,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.0013644419423480823
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.4045943171397325,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0048320778663538955
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.11592286526905457,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0016906240203803347
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.07637812548771711,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0014540942217052375
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.4207633762253156,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.005005614020197891
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.12060522562383363,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0018022149452879743
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 4,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_5.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_5.json
new file mode 100644
index 0000000000000000000000000000000000000000..f306e46c88c1212f2e08c302c340b32458c0d91d
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-web_nlg_en_PALM_prompt_5.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.968801645723095,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.05462868115593731
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.08408541082259362,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.001415159971957011
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.4730441639084742,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.005431978662817184
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.13468892691608006,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0019240731337134673
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.038803485654806714,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0008788058278028121
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.2379631167113304,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.004152278672512853
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.06252454636994373,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0012377179103821956
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.07423644311431606,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.0012142160071926726
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.4194644730807224,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0047799777603664715
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.11896213494013362,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.001650634135835398
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.07800322392434518,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0013202039315876929
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.43851085762444947,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004968418620592047
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.12479160570822044,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0017844920178026805
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 5,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-wiki_lingua_en_tldr_en_0.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-wiki_lingua_en_tldr_en_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..de4131bae6090790e6a6e627c0de7516f37a263e
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-wiki_lingua_en_tldr_en_0.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_precision": 0.1628352267958049,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0019076289743005436
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_recall": 0.28137034403026634,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.002726139405286867
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_fmeasure": 0.19188833167040015,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0018702943944022418
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_precision": 0.035168122195033014,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0008051014260490183
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_recall": 0.06371619685684557,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0015314676134498058
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_fmeasure": 0.04191366659253858,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.000917658619128283
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_precision": 0.12246239102421737,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.001285311648356421
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_recall": 0.2201424968967575,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0022006617198564397
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_fmeasure": 0.14614932555210797,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0012956973040530264
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_precision": 0.14960133985735732,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0017445585328390875
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_recall": 0.2600296050390341,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.002552101400202146
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_fmeasure": 0.17662543049894286,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.001716560474974557
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "bleu": 1.8310798418084737,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.06119305662626742
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 0,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-wiki_lingua_en_tldr_en_1.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-wiki_lingua_en_tldr_en_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..ccce1eb11a88ad2bf28961628ec2308884b54583
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_GEM-wiki_lingua_en_tldr_en_1.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_precision": 0.17507251962841255,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0019979193178610104
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_recall": 0.3008600636001003,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.002760929751246215
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_fmeasure": 0.20607218590152535,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0019348192676270253
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_precision": 0.040814840621502924,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0008769075552676463
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_recall": 0.07239534384858555,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0015745819458817655
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_fmeasure": 0.04827166271383884,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.000981638580890638
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_precision": 0.12604513290190442,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.001331288983523961
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_recall": 0.22410236240350495,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0021252698749924166
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_fmeasure": 0.14993882097099004,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0013143528095662098
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_precision": 0.1634506211607229,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0018573519091366366
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_recall": 0.282240921650429,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.002619133620350951
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_fmeasure": 0.19267274247494454,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0018042087124662393
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "bleu": 2.463691563435929,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.056238669871763534
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 1,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_0.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..830da6644a2d3a2788269bbf5d58afabb332d3eb
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_0.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "bleu": 0.3016960813220789,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "bleu_stderr": 0.04082243698870125
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_precision": 0.0628362517057776,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_precision_stderr": 0.0006647851313912593
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_recall": 0.137799699754724,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_recall_stderr": 0.0013399199497499206
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_fmeasure": 0.08446489060239885,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.000832880994211257
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_precision": 0.009897778438587298,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_precision_stderr": 0.0002813167869752497
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_recall": 0.02030644827526888,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_recall_stderr": 0.0005626168002295672
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_fmeasure": 0.013098749477153882,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.00036393407871382073
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_precision": 0.060894085509621707,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_precision_stderr": 0.0006165662598640652
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_recall": 0.1334600824136164,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_recall_stderr": 0.0012622226683087598
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_fmeasure": 0.08185345358329085,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0007758077715250189
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_precision": 0.05822489106803366,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.00059732648786741
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_recall": 0.12759747263850774,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.0011920315451119745
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_fmeasure": 0.07821933484300712,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.000742258462877918
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 0,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_1.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..b84ba9097759a6ef3c5dc348993a21c461992866
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_1.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "bleu": 9.014911506469076,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "bleu_stderr": 0.10639404635916568
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_precision": 0.4220547738164344,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_precision_stderr": 0.0029747898853502135
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_recall": 0.44890682882262095,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_recall_stderr": 0.0030482256607117985
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_fmeasure": 0.4071860181343587,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.002172401493926826
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_precision": 0.18398717679998686,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_precision_stderr": 0.002070314538395242
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_recall": 0.19695708046780447,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_recall_stderr": 0.0021253468809232486
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_fmeasure": 0.17675735102863532,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0016983524897307275
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_precision": 0.301979337271323,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_precision_stderr": 0.002426170411020153
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_recall": 0.32120889976820555,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_recall_stderr": 0.0024556761259021593
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_fmeasure": 0.2901955244768342,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0017445755551563752
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_precision": 0.3498631569304881,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.00275693533807641
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_recall": 0.3704033862486897,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.00277359280896307
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_fmeasure": 0.3366538075182426,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0020840116286440125
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 1,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_2.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..e6578da5b9343fac312eed7a233ec47886c5c1cd
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_2.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "bleu": 11.1355912496663,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "bleu_stderr": 0.13104161087327487
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_precision": 0.45419518183344554,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_precision_stderr": 0.0027266345121146297
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_recall": 0.48044140772752547,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_recall_stderr": 0.002817950150203246
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_fmeasure": 0.4449026725503146,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0020303864410380614
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_precision": 0.2124884501715128,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_precision_stderr": 0.0020421137596173576
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_recall": 0.22663601203825093,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_recall_stderr": 0.0021799430214164035
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_fmeasure": 0.20791813280930055,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0017732074653438918
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_precision": 0.32873823752244813,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_precision_stderr": 0.002262430136729236
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_recall": 0.34880236417169236,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_recall_stderr": 0.0024064753259342344
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_fmeasure": 0.32200623778436555,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0017788348099423016
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_precision": 0.38047010773438,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.002559842275120574
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_recall": 0.40224234667337105,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.002660545444016772
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_fmeasure": 0.37259609212766276,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0020305961255707534
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 2,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_3.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_3.json
new file mode 100644
index 0000000000000000000000000000000000000000..ba94275d4b2c531c8c17c3b9f0486bb7315152fe
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_3.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "bleu": 12.10470203084112,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "bleu_stderr": 0.160895558301053
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_precision": 0.45456971607102353,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_precision_stderr": 0.0023960763531178423
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_recall": 0.47953391372338766,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_recall_stderr": 0.0027865520920584822
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_fmeasure": 0.4492382386896765,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0019699301602619833
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_precision": 0.2163784140584202,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_precision_stderr": 0.001980036007401307
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_recall": 0.23028758666497792,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_recall_stderr": 0.0022756293438486903
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_fmeasure": 0.2138339925008178,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0018445417129634963
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_precision": 0.33426681511660167,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_precision_stderr": 0.002069523940604892
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_recall": 0.35354783483337526,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_recall_stderr": 0.0024228991972543336
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_fmeasure": 0.3303919839118561,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.001783388701966232
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_precision": 0.3827360863233357,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.002359678766192712
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_recall": 0.40399025031746394,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.002710450542079631
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_fmeasure": 0.378239342272828,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.002043462221499771
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 3,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_4.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_4.json
new file mode 100644
index 0000000000000000000000000000000000000000..51501f24602cedefee090c66612d4638d4c5e4c2
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_4.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "bleu": 12.44085622955195,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "bleu_stderr": 0.13067375364343567
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_precision": 0.45479182860495637,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_precision_stderr": 0.0023107278825576553
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_recall": 0.48017725960351876,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_recall_stderr": 0.0027316537664281945
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_fmeasure": 0.4511875093908521,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.001948885960105966
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_precision": 0.21833473257724795,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_precision_stderr": 0.0019502917664114363
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_recall": 0.23333710855434647,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_recall_stderr": 0.002269432186215575
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_fmeasure": 0.21707781052843164,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.00185164014349205
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_precision": 0.3366370287941819,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_precision_stderr": 0.0019965287497770777
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_recall": 0.35684028344910274,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_recall_stderr": 0.0024057773390550254
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_fmeasure": 0.33434874026181494,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0017786642192651214
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_precision": 0.38571523251054607,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0022865314323742805
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_recall": 0.40803304829345277,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.0027038368009947755
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_fmeasure": 0.38297835064075053,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0020438066934495387
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 4,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_5.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_5.json
new file mode 100644
index 0000000000000000000000000000000000000000..a7c0e38d7ee591381ab3343c7228b8fd8a4f3459
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_e2e_nlg_cleaned_generate_text_restaurant_5.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "bleu": 12.352678257477072,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "bleu_stderr": 0.1229858870702429
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_precision": 0.4536679355694742,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_precision_stderr": 0.002282805690194143
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_recall": 0.48257839264210245,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_recall_stderr": 0.002694452362144022
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_fmeasure": 0.4520495564262539,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0019091091728356577
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_precision": 0.2184937204533077,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_precision_stderr": 0.0019084340007320115
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_recall": 0.2348008779716612,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_recall_stderr": 0.0022303214323899125
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_fmeasure": 0.21807002700335165,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0018243298667958761
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_precision": 0.33692930278278704,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_precision_stderr": 0.0019923203298548866
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_recall": 0.36002100624003847,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_recall_stderr": 0.0024053149539646364
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_fmeasure": 0.336313193439231,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.001791714678974818
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_precision": 0.38599958036488224,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.002272531904720447
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_recall": 0.4113378384062774,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.002667546056299814
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_fmeasure": 0.38490335194628533,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.002018489316229454
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 5,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_gem_xsum_article_DOC_summary_0.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_gem_xsum_article_DOC_summary_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..337f5f76a01eaa500d5328a3c0a0b8847ff98fc7
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_gem_xsum_article_DOC_summary_0.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_precision": 0.15954668519186138,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_precision_stderr": 0.002100617390256336
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_recall": 0.36309739763041776,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_recall_stderr": 0.004368615445871372
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_fmeasure": 0.2157374708631877,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_fmeasure_stderr": 0.002508783327476019
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_precision": 0.03857312620535916,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_precision_stderr": 0.0012905906114606838
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_recall": 0.09177566784631346,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_recall_stderr": 0.0029866334470645636
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_fmeasure": 0.05279448511938101,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_fmeasure_stderr": 0.001664044690700437
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_precision": 0.11839547499241576,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_precision_stderr": 0.001646949604960197
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_recall": 0.27198898577078495,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_recall_stderr": 0.0035222426961979128
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_fmeasure": 0.1604319788556621,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_fmeasure_stderr": 0.0019556284658093183
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_precision": 0.1250746587410693,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_precision_stderr": 0.0017464873888176696
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_recall": 0.28828909491111,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_recall_stderr": 0.0038785736556512756
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_fmeasure": 0.1697976651408494,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_fmeasure_stderr": 0.002135233558113895
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "bleu": 2.194933977793404,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "bleu_stderr": 0.09650169688197711
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 0,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_gem_xsum_article_DOC_summary_1.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_gem_xsum_article_DOC_summary_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..f1d92b8f27f2c6b36ea3110ae86cca4714070d3d
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_gem_xsum_article_DOC_summary_1.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_precision": 0.130085376048313,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_precision_stderr": 0.0019273226219467348
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_recall": 0.31899443410148204,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_recall_stderr": 0.004375864955378675
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_fmeasure": 0.18261422772647087,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_fmeasure_stderr": 0.00258517997013667
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_precision": 0.027035766317831913,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_precision_stderr": 0.0010834329723279196
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_recall": 0.06843246331973844,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_recall_stderr": 0.002783755254166952
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_fmeasure": 0.03829439148207006,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_fmeasure_stderr": 0.0015268296315399182
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_precision": 0.09860861520075329,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_precision_stderr": 0.0014180838773530536
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_recall": 0.2439616017337858,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_recall_stderr": 0.003388978319546548
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_fmeasure": 0.13872448607191418,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_fmeasure_stderr": 0.0019214547149178605
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_precision": 0.10431817474650688,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_precision_stderr": 0.0015880489605249845
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_recall": 0.25778306821268704,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_recall_stderr": 0.0037812899736781526
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_fmeasure": 0.14673205236069722,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_fmeasure_stderr": 0.002156691093638724
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "bleu": 1.638275312642142,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "bleu_stderr": 0.11197811159644402
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 1,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/generation/slim.8b7178b13b_gem_xsum_article_DOC_summary_2.json b/8b7178b13b/evaluation/generation/slim.8b7178b13b_gem_xsum_article_DOC_summary_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..b522252ce9001ae8dcfaf4c8004be09865ca8bf7
--- /dev/null
+++ b/8b7178b13b/evaluation/generation/slim.8b7178b13b_gem_xsum_article_DOC_summary_2.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_precision": 0.14059986141007405,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_precision_stderr": 0.001980010991959053
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_recall": 0.34704590818514364,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_recall_stderr": 0.004609783640663682
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_fmeasure": 0.1978001315070318,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_fmeasure_stderr": 0.002669936318830053
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_precision": 0.033448694107769246,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_precision_stderr": 0.0011984145262489562
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_recall": 0.08605872654054372,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_recall_stderr": 0.003191665944600775
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_fmeasure": 0.04760922765448965,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_fmeasure_stderr": 0.001702755511678876
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_precision": 0.10721276455620249,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_precision_stderr": 0.0014796729903083395
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_recall": 0.26727650609085896,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_recall_stderr": 0.0036995069904560843
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_fmeasure": 0.1511855886298728,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_fmeasure_stderr": 0.0020261890172370246
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_precision": 0.11346372550782761,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_precision_stderr": 0.0016303825401644061
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_recall": 0.28288787023938955,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_recall_stderr": 0.004038388892533831
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_fmeasure": 0.16004925294103853,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_fmeasure_stderr": 0.0022364087298009884
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "bleu": 2.0210914476360786,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "bleu_stderr": 0.12454216962651855
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b13b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 2,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/rankeval/8b7178b13b_0.json b/8b7178b13b/evaluation/rankeval/8b7178b13b_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..7d929e15d881f66702b0464d88325f7749217183
--- /dev/null
+++ b/8b7178b13b/evaluation/rankeval/8b7178b13b_0.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.35,
+            "acc_stderr": 0.01509065034144423
+        },
+        "anli_r2": {
+            "acc": 0.346,
+            "acc_stderr": 0.015050266127564448
+        },
+        "anli_r3": {
+            "acc": 0.355,
+            "acc_stderr": 0.013819249004047296
+        },
+        "cb": {
+            "acc": 0.26785714285714285,
+            "acc_stderr": 0.05971290310957636,
+            "f1": 0.2374338624338624
+        },
+        "copa": {
+            "acc": 0.81,
+            "acc_stderr": 0.03942772444036623
+        },
+        "hellaswag": {
+            "acc": 0.5215096594303924,
+            "acc_stderr": 0.004985162074336112,
+            "acc_norm": 0.6843258315076678,
+            "acc_norm_stderr": 0.004638339207348913
+        },
+        "rte": {
+            "acc": 0.5270758122743683,
+            "acc_stderr": 0.030052303463143706
+        },
+        "winogrande": {
+            "acc": 0.5659037095501184,
+            "acc_stderr": 0.013929882555694054
+        },
+        "storycloze_2016": {
+            "acc": 0.7455905932656334,
+            "acc_stderr": 0.010071542492663043
+        },
+        "boolq": {
+            "acc": 0.6376146788990825,
+            "acc_stderr": 0.008407308655864048
+        },
+        "arc_easy": {
+            "acc": 0.6220538720538721,
+            "acc_stderr": 0.009949405744045469,
+            "acc_norm": 0.5395622895622896,
+            "acc_norm_stderr": 0.010227616386289017
+        },
+        "arc_challenge": {
+            "acc": 0.29180887372013653,
+            "acc_stderr": 0.013284525292403496,
+            "acc_norm": 0.3216723549488055,
+            "acc_norm_stderr": 0.013650488084494162
+        },
+        "sciq": {
+            "acc": 0.87,
+            "acc_stderr": 0.010640169792499361,
+            "acc_norm": 0.807,
+            "acc_norm_stderr": 0.012486268734370145
+        },
+        "piqa": {
+            "acc": 0.7627856365614799,
+            "acc_stderr": 0.009924694933586364,
+            "acc_norm": 0.7747551686615887,
+            "acc_norm_stderr": 0.00974664347103214
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/rankeval/8b7178b13b_0_lm-eval_global_step84877_2023-05-15-10-07-19_0shots_backup.json b/8b7178b13b/evaluation/rankeval/8b7178b13b_0_lm-eval_global_step84877_2023-05-15-10-07-19_0shots_backup.json
new file mode 100644
index 0000000000000000000000000000000000000000..7d929e15d881f66702b0464d88325f7749217183
--- /dev/null
+++ b/8b7178b13b/evaluation/rankeval/8b7178b13b_0_lm-eval_global_step84877_2023-05-15-10-07-19_0shots_backup.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.35,
+            "acc_stderr": 0.01509065034144423
+        },
+        "anli_r2": {
+            "acc": 0.346,
+            "acc_stderr": 0.015050266127564448
+        },
+        "anli_r3": {
+            "acc": 0.355,
+            "acc_stderr": 0.013819249004047296
+        },
+        "cb": {
+            "acc": 0.26785714285714285,
+            "acc_stderr": 0.05971290310957636,
+            "f1": 0.2374338624338624
+        },
+        "copa": {
+            "acc": 0.81,
+            "acc_stderr": 0.03942772444036623
+        },
+        "hellaswag": {
+            "acc": 0.5215096594303924,
+            "acc_stderr": 0.004985162074336112,
+            "acc_norm": 0.6843258315076678,
+            "acc_norm_stderr": 0.004638339207348913
+        },
+        "rte": {
+            "acc": 0.5270758122743683,
+            "acc_stderr": 0.030052303463143706
+        },
+        "winogrande": {
+            "acc": 0.5659037095501184,
+            "acc_stderr": 0.013929882555694054
+        },
+        "storycloze_2016": {
+            "acc": 0.7455905932656334,
+            "acc_stderr": 0.010071542492663043
+        },
+        "boolq": {
+            "acc": 0.6376146788990825,
+            "acc_stderr": 0.008407308655864048
+        },
+        "arc_easy": {
+            "acc": 0.6220538720538721,
+            "acc_stderr": 0.009949405744045469,
+            "acc_norm": 0.5395622895622896,
+            "acc_norm_stderr": 0.010227616386289017
+        },
+        "arc_challenge": {
+            "acc": 0.29180887372013653,
+            "acc_stderr": 0.013284525292403496,
+            "acc_norm": 0.3216723549488055,
+            "acc_norm_stderr": 0.013650488084494162
+        },
+        "sciq": {
+            "acc": 0.87,
+            "acc_stderr": 0.010640169792499361,
+            "acc_norm": 0.807,
+            "acc_norm_stderr": 0.012486268734370145
+        },
+        "piqa": {
+            "acc": 0.7627856365614799,
+            "acc_stderr": 0.009924694933586364,
+            "acc_norm": 0.7747551686615887,
+            "acc_norm_stderr": 0.00974664347103214
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/rankeval/8b7178b13b_1.json b/8b7178b13b/evaluation/rankeval/8b7178b13b_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..0b95f97c2e1d35c12d87508778037bce6f4b9928
--- /dev/null
+++ b/8b7178b13b/evaluation/rankeval/8b7178b13b_1.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.328,
+            "acc_stderr": 0.014853842487270334
+        },
+        "anli_r2": {
+            "acc": 0.316,
+            "acc_stderr": 0.01470919305605713
+        },
+        "anli_r3": {
+            "acc": 0.3591666666666667,
+            "acc_stderr": 0.013855141559780364
+        },
+        "cb": {
+            "acc": 0.26785714285714285,
+            "acc_stderr": 0.05971290310957635,
+            "f1": 0.21294539321104786
+        },
+        "copa": {
+            "acc": 0.79,
+            "acc_stderr": 0.040936018074033256
+        },
+        "hellaswag": {
+            "acc": 0.5201155148376817,
+            "acc_stderr": 0.004985741706385719,
+            "acc_norm": 0.6825333598884684,
+            "acc_norm_stderr": 0.004645393477680675
+        },
+        "rte": {
+            "acc": 0.4657039711191336,
+            "acc_stderr": 0.030025579819366426
+        },
+        "winogrande": {
+            "acc": 0.5872138910812944,
+            "acc_stderr": 0.013837060648682103
+        },
+        "storycloze_2016": {
+            "acc": 0.7365045430251203,
+            "acc_stderr": 0.010187168219156485
+        },
+        "boolq": {
+            "acc": 0.6477064220183486,
+            "acc_stderr": 0.00835476049390613
+        },
+        "arc_easy": {
+            "acc": 0.6422558922558923,
+            "acc_stderr": 0.009835772757343361,
+            "acc_norm": 0.6035353535353535,
+            "acc_norm_stderr": 0.010037412763064529
+        },
+        "arc_challenge": {
+            "acc": 0.3054607508532423,
+            "acc_stderr": 0.0134600804780025,
+            "acc_norm": 0.3319112627986348,
+            "acc_norm_stderr": 0.013760988200880538
+        },
+        "sciq": {
+            "acc": 0.905,
+            "acc_stderr": 0.009276910103103329,
+            "acc_norm": 0.88,
+            "acc_norm_stderr": 0.0102813280127474
+        },
+        "piqa": {
+            "acc": 0.764961915125136,
+            "acc_stderr": 0.009893146688805326,
+            "acc_norm": 0.7725788900979326,
+            "acc_norm_stderr": 0.009779850767847232
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/rankeval/8b7178b13b_1_lm-eval_global_step84877_2023-05-15-10-07-19_1shots_backup.json b/8b7178b13b/evaluation/rankeval/8b7178b13b_1_lm-eval_global_step84877_2023-05-15-10-07-19_1shots_backup.json
new file mode 100644
index 0000000000000000000000000000000000000000..0b95f97c2e1d35c12d87508778037bce6f4b9928
--- /dev/null
+++ b/8b7178b13b/evaluation/rankeval/8b7178b13b_1_lm-eval_global_step84877_2023-05-15-10-07-19_1shots_backup.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.328,
+            "acc_stderr": 0.014853842487270334
+        },
+        "anli_r2": {
+            "acc": 0.316,
+            "acc_stderr": 0.01470919305605713
+        },
+        "anli_r3": {
+            "acc": 0.3591666666666667,
+            "acc_stderr": 0.013855141559780364
+        },
+        "cb": {
+            "acc": 0.26785714285714285,
+            "acc_stderr": 0.05971290310957635,
+            "f1": 0.21294539321104786
+        },
+        "copa": {
+            "acc": 0.79,
+            "acc_stderr": 0.040936018074033256
+        },
+        "hellaswag": {
+            "acc": 0.5201155148376817,
+            "acc_stderr": 0.004985741706385719,
+            "acc_norm": 0.6825333598884684,
+            "acc_norm_stderr": 0.004645393477680675
+        },
+        "rte": {
+            "acc": 0.4657039711191336,
+            "acc_stderr": 0.030025579819366426
+        },
+        "winogrande": {
+            "acc": 0.5872138910812944,
+            "acc_stderr": 0.013837060648682103
+        },
+        "storycloze_2016": {
+            "acc": 0.7365045430251203,
+            "acc_stderr": 0.010187168219156485
+        },
+        "boolq": {
+            "acc": 0.6477064220183486,
+            "acc_stderr": 0.00835476049390613
+        },
+        "arc_easy": {
+            "acc": 0.6422558922558923,
+            "acc_stderr": 0.009835772757343361,
+            "acc_norm": 0.6035353535353535,
+            "acc_norm_stderr": 0.010037412763064529
+        },
+        "arc_challenge": {
+            "acc": 0.3054607508532423,
+            "acc_stderr": 0.0134600804780025,
+            "acc_norm": 0.3319112627986348,
+            "acc_norm_stderr": 0.013760988200880538
+        },
+        "sciq": {
+            "acc": 0.905,
+            "acc_stderr": 0.009276910103103329,
+            "acc_norm": 0.88,
+            "acc_norm_stderr": 0.0102813280127474
+        },
+        "piqa": {
+            "acc": 0.764961915125136,
+            "acc_stderr": 0.009893146688805326,
+            "acc_norm": 0.7725788900979326,
+            "acc_norm_stderr": 0.009779850767847232
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/rankeval/8b7178b13b_2.json b/8b7178b13b/evaluation/rankeval/8b7178b13b_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..3f863650cfd91d3c3485af5f9aa82b9a4be2f723
--- /dev/null
+++ b/8b7178b13b/evaluation/rankeval/8b7178b13b_2.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.336,
+            "acc_stderr": 0.014944140233795018
+        },
+        "anli_r2": {
+            "acc": 0.329,
+            "acc_stderr": 0.01486539538592835
+        },
+        "anli_r3": {
+            "acc": 0.35083333333333333,
+            "acc_stderr": 0.013782212417178195
+        },
+        "cb": {
+            "acc": 0.19642857142857142,
+            "acc_stderr": 0.05357142857142858,
+            "f1": 0.1668300653594771
+        },
+        "copa": {
+            "acc": 0.78,
+            "acc_stderr": 0.04163331998932263
+        },
+        "hellaswag": {
+            "acc": 0.5200159330810595,
+            "acc_stderr": 0.004985781620467012,
+            "acc_norm": 0.6863174666401115,
+            "acc_norm_stderr": 0.004630407476835209
+        },
+        "rte": {
+            "acc": 0.48014440433212996,
+            "acc_stderr": 0.0300727231673172
+        },
+        "winogrande": {
+            "acc": 0.5864246250986582,
+            "acc_stderr": 0.013840971763195306
+        },
+        "storycloze_2016": {
+            "acc": 0.743452699091395,
+            "acc_stderr": 0.01009926092771917
+        },
+        "boolq": {
+            "acc": 0.6382262996941896,
+            "acc_stderr": 0.008404238796949254
+        },
+        "arc_easy": {
+            "acc": 0.6481481481481481,
+            "acc_stderr": 0.009799078929868706,
+            "acc_norm": 0.6212121212121212,
+            "acc_norm_stderr": 0.00995373765654204
+        },
+        "arc_challenge": {
+            "acc": 0.310580204778157,
+            "acc_stderr": 0.013522292098053059,
+            "acc_norm": 0.33532423208191126,
+            "acc_norm_stderr": 0.013796182947785562
+        },
+        "sciq": {
+            "acc": 0.913,
+            "acc_stderr": 0.00891686663074591,
+            "acc_norm": 0.889,
+            "acc_norm_stderr": 0.009938701010583726
+        },
+        "piqa": {
+            "acc": 0.7573449401523396,
+            "acc_stderr": 0.0100020025697087,
+            "acc_norm": 0.764961915125136,
+            "acc_norm_stderr": 0.009893146688805319
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/rankeval/8b7178b13b_2_lm-eval_global_step84877_2023-05-15-10-07-19_2shots_backup.json b/8b7178b13b/evaluation/rankeval/8b7178b13b_2_lm-eval_global_step84877_2023-05-15-10-07-19_2shots_backup.json
new file mode 100644
index 0000000000000000000000000000000000000000..3f863650cfd91d3c3485af5f9aa82b9a4be2f723
--- /dev/null
+++ b/8b7178b13b/evaluation/rankeval/8b7178b13b_2_lm-eval_global_step84877_2023-05-15-10-07-19_2shots_backup.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.336,
+            "acc_stderr": 0.014944140233795018
+        },
+        "anli_r2": {
+            "acc": 0.329,
+            "acc_stderr": 0.01486539538592835
+        },
+        "anli_r3": {
+            "acc": 0.35083333333333333,
+            "acc_stderr": 0.013782212417178195
+        },
+        "cb": {
+            "acc": 0.19642857142857142,
+            "acc_stderr": 0.05357142857142858,
+            "f1": 0.1668300653594771
+        },
+        "copa": {
+            "acc": 0.78,
+            "acc_stderr": 0.04163331998932263
+        },
+        "hellaswag": {
+            "acc": 0.5200159330810595,
+            "acc_stderr": 0.004985781620467012,
+            "acc_norm": 0.6863174666401115,
+            "acc_norm_stderr": 0.004630407476835209
+        },
+        "rte": {
+            "acc": 0.48014440433212996,
+            "acc_stderr": 0.0300727231673172
+        },
+        "winogrande": {
+            "acc": 0.5864246250986582,
+            "acc_stderr": 0.013840971763195306
+        },
+        "storycloze_2016": {
+            "acc": 0.743452699091395,
+            "acc_stderr": 0.01009926092771917
+        },
+        "boolq": {
+            "acc": 0.6382262996941896,
+            "acc_stderr": 0.008404238796949254
+        },
+        "arc_easy": {
+            "acc": 0.6481481481481481,
+            "acc_stderr": 0.009799078929868706,
+            "acc_norm": 0.6212121212121212,
+            "acc_norm_stderr": 0.00995373765654204
+        },
+        "arc_challenge": {
+            "acc": 0.310580204778157,
+            "acc_stderr": 0.013522292098053059,
+            "acc_norm": 0.33532423208191126,
+            "acc_norm_stderr": 0.013796182947785562
+        },
+        "sciq": {
+            "acc": 0.913,
+            "acc_stderr": 0.00891686663074591,
+            "acc_norm": 0.889,
+            "acc_norm_stderr": 0.009938701010583726
+        },
+        "piqa": {
+            "acc": 0.7573449401523396,
+            "acc_stderr": 0.0100020025697087,
+            "acc_norm": 0.764961915125136,
+            "acc_norm_stderr": 0.009893146688805319
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/rankeval/8b7178b13b_3.json b/8b7178b13b/evaluation/rankeval/8b7178b13b_3.json
new file mode 100644
index 0000000000000000000000000000000000000000..0f847e0ac91d717a70d667d7d481f9d4c0fc3be6
--- /dev/null
+++ b/8b7178b13b/evaluation/rankeval/8b7178b13b_3.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.348,
+            "acc_stderr": 0.01507060460376841
+        },
+        "anli_r2": {
+            "acc": 0.344,
+            "acc_stderr": 0.015029633724408947
+        },
+        "anli_r3": {
+            "acc": 0.3325,
+            "acc_stderr": 0.013605417345710528
+        },
+        "cb": {
+            "acc": 0.3392857142857143,
+            "acc_stderr": 0.06384226561930825,
+            "f1": 0.3185837135128588
+        },
+        "copa": {
+            "acc": 0.78,
+            "acc_stderr": 0.04163331998932263
+        },
+        "hellaswag": {
+            "acc": 0.5206134236207927,
+            "acc_stderr": 0.00498553915978342,
+            "acc_norm": 0.6902011551483768,
+            "acc_norm_stderr": 0.00461465517501001
+        },
+        "rte": {
+            "acc": 0.48014440433212996,
+            "acc_stderr": 0.0300727231673172
+        },
+        "winogrande": {
+            "acc": 0.601420678768745,
+            "acc_stderr": 0.01376035717687383
+        },
+        "storycloze_2016": {
+            "acc": 0.7504008551576697,
+            "acc_stderr": 0.010008002459430848
+        },
+        "boolq": {
+            "acc": 0.6363914373088685,
+            "acc_stderr": 0.008413404209789989
+        },
+        "arc_easy": {
+            "acc": 0.648989898989899,
+            "acc_stderr": 0.009793703885101042,
+            "acc_norm": 0.6393097643097643,
+            "acc_norm_stderr": 0.009853512108416748
+        },
+        "arc_challenge": {
+            "acc": 0.3148464163822526,
+            "acc_stderr": 0.01357265770308495,
+            "acc_norm": 0.3250853242320819,
+            "acc_norm_stderr": 0.013688147309729119
+        },
+        "sciq": {
+            "acc": 0.917,
+            "acc_stderr": 0.008728527206074792,
+            "acc_norm": 0.911,
+            "acc_norm_stderr": 0.009008893392651521
+        },
+        "piqa": {
+            "acc": 0.7584330794341676,
+            "acc_stderr": 0.009986718001804467,
+            "acc_norm": 0.7665941240478781,
+            "acc_norm_stderr": 0.009869247889520991
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/rankeval/8b7178b13b_3_lm-eval_global_step84877_2023-05-15-10-07-25_3shots_backup.json b/8b7178b13b/evaluation/rankeval/8b7178b13b_3_lm-eval_global_step84877_2023-05-15-10-07-25_3shots_backup.json
new file mode 100644
index 0000000000000000000000000000000000000000..0f847e0ac91d717a70d667d7d481f9d4c0fc3be6
--- /dev/null
+++ b/8b7178b13b/evaluation/rankeval/8b7178b13b_3_lm-eval_global_step84877_2023-05-15-10-07-25_3shots_backup.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.348,
+            "acc_stderr": 0.01507060460376841
+        },
+        "anli_r2": {
+            "acc": 0.344,
+            "acc_stderr": 0.015029633724408947
+        },
+        "anli_r3": {
+            "acc": 0.3325,
+            "acc_stderr": 0.013605417345710528
+        },
+        "cb": {
+            "acc": 0.3392857142857143,
+            "acc_stderr": 0.06384226561930825,
+            "f1": 0.3185837135128588
+        },
+        "copa": {
+            "acc": 0.78,
+            "acc_stderr": 0.04163331998932263
+        },
+        "hellaswag": {
+            "acc": 0.5206134236207927,
+            "acc_stderr": 0.00498553915978342,
+            "acc_norm": 0.6902011551483768,
+            "acc_norm_stderr": 0.00461465517501001
+        },
+        "rte": {
+            "acc": 0.48014440433212996,
+            "acc_stderr": 0.0300727231673172
+        },
+        "winogrande": {
+            "acc": 0.601420678768745,
+            "acc_stderr": 0.01376035717687383
+        },
+        "storycloze_2016": {
+            "acc": 0.7504008551576697,
+            "acc_stderr": 0.010008002459430848
+        },
+        "boolq": {
+            "acc": 0.6363914373088685,
+            "acc_stderr": 0.008413404209789989
+        },
+        "arc_easy": {
+            "acc": 0.648989898989899,
+            "acc_stderr": 0.009793703885101042,
+            "acc_norm": 0.6393097643097643,
+            "acc_norm_stderr": 0.009853512108416748
+        },
+        "arc_challenge": {
+            "acc": 0.3148464163822526,
+            "acc_stderr": 0.01357265770308495,
+            "acc_norm": 0.3250853242320819,
+            "acc_norm_stderr": 0.013688147309729119
+        },
+        "sciq": {
+            "acc": 0.917,
+            "acc_stderr": 0.008728527206074792,
+            "acc_norm": 0.911,
+            "acc_norm_stderr": 0.009008893392651521
+        },
+        "piqa": {
+            "acc": 0.7584330794341676,
+            "acc_stderr": 0.009986718001804467,
+            "acc_norm": 0.7665941240478781,
+            "acc_norm_stderr": 0.009869247889520991
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/rankeval/8b7178b13b_4.json b/8b7178b13b/evaluation/rankeval/8b7178b13b_4.json
new file mode 100644
index 0000000000000000000000000000000000000000..5d45f389f5a7ddf986a1b66641193fa784359241
--- /dev/null
+++ b/8b7178b13b/evaluation/rankeval/8b7178b13b_4.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.344,
+            "acc_stderr": 0.015029633724408947
+        },
+        "anli_r2": {
+            "acc": 0.345,
+            "acc_stderr": 0.015039986742055238
+        },
+        "anli_r3": {
+            "acc": 0.34833333333333333,
+            "acc_stderr": 0.013759437498874072
+        },
+        "cb": {
+            "acc": 0.21428571428571427,
+            "acc_stderr": 0.055328333517248834,
+            "f1": 0.1997113997113997
+        },
+        "copa": {
+            "acc": 0.79,
+            "acc_stderr": 0.040936018074033256
+        },
+        "hellaswag": {
+            "acc": 0.5206134236207927,
+            "acc_stderr": 0.004985539159783419,
+            "acc_norm": 0.6900019916351324,
+            "acc_norm_stderr": 0.004615472210316043
+        },
+        "rte": {
+            "acc": 0.48736462093862815,
+            "acc_stderr": 0.030086851767188564
+        },
+        "winogrande": {
+            "acc": 0.6077348066298343,
+            "acc_stderr": 0.013722400462000888
+        },
+        "storycloze_2016": {
+            "acc": 0.7509353287012293,
+            "acc_stderr": 0.010000841162740146
+        },
+        "boolq": {
+            "acc": 0.636085626911315,
+            "acc_stderr": 0.00841491890912884
+        },
+        "arc_easy": {
+            "acc": 0.6632996632996633,
+            "acc_stderr": 0.009697166595752475,
+            "acc_norm": 0.6447811447811448,
+            "acc_norm_stderr": 0.009820245899287124
+        },
+        "arc_challenge": {
+            "acc": 0.3199658703071672,
+            "acc_stderr": 0.013631345807016196,
+            "acc_norm": 0.3447098976109215,
+            "acc_norm_stderr": 0.01388881628678211
+        },
+        "sciq": {
+            "acc": 0.927,
+            "acc_stderr": 0.008230354715244062,
+            "acc_norm": 0.908,
+            "acc_norm_stderr": 0.009144376393151108
+        },
+        "piqa": {
+            "acc": 0.7665941240478781,
+            "acc_stderr": 0.009869247889521007,
+            "acc_norm": 0.7682263329706203,
+            "acc_norm_stderr": 0.00984514377279404
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/rankeval/8b7178b13b_4_lm-eval_global_step84877_2023-05-15-10-06-46_4shots_backup.json b/8b7178b13b/evaluation/rankeval/8b7178b13b_4_lm-eval_global_step84877_2023-05-15-10-06-46_4shots_backup.json
new file mode 100644
index 0000000000000000000000000000000000000000..5d45f389f5a7ddf986a1b66641193fa784359241
--- /dev/null
+++ b/8b7178b13b/evaluation/rankeval/8b7178b13b_4_lm-eval_global_step84877_2023-05-15-10-06-46_4shots_backup.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.344,
+            "acc_stderr": 0.015029633724408947
+        },
+        "anli_r2": {
+            "acc": 0.345,
+            "acc_stderr": 0.015039986742055238
+        },
+        "anli_r3": {
+            "acc": 0.34833333333333333,
+            "acc_stderr": 0.013759437498874072
+        },
+        "cb": {
+            "acc": 0.21428571428571427,
+            "acc_stderr": 0.055328333517248834,
+            "f1": 0.1997113997113997
+        },
+        "copa": {
+            "acc": 0.79,
+            "acc_stderr": 0.040936018074033256
+        },
+        "hellaswag": {
+            "acc": 0.5206134236207927,
+            "acc_stderr": 0.004985539159783419,
+            "acc_norm": 0.6900019916351324,
+            "acc_norm_stderr": 0.004615472210316043
+        },
+        "rte": {
+            "acc": 0.48736462093862815,
+            "acc_stderr": 0.030086851767188564
+        },
+        "winogrande": {
+            "acc": 0.6077348066298343,
+            "acc_stderr": 0.013722400462000888
+        },
+        "storycloze_2016": {
+            "acc": 0.7509353287012293,
+            "acc_stderr": 0.010000841162740146
+        },
+        "boolq": {
+            "acc": 0.636085626911315,
+            "acc_stderr": 0.00841491890912884
+        },
+        "arc_easy": {
+            "acc": 0.6632996632996633,
+            "acc_stderr": 0.009697166595752475,
+            "acc_norm": 0.6447811447811448,
+            "acc_norm_stderr": 0.009820245899287124
+        },
+        "arc_challenge": {
+            "acc": 0.3199658703071672,
+            "acc_stderr": 0.013631345807016196,
+            "acc_norm": 0.3447098976109215,
+            "acc_norm_stderr": 0.01388881628678211
+        },
+        "sciq": {
+            "acc": 0.927,
+            "acc_stderr": 0.008230354715244062,
+            "acc_norm": 0.908,
+            "acc_norm_stderr": 0.009144376393151108
+        },
+        "piqa": {
+            "acc": 0.7665941240478781,
+            "acc_stderr": 0.009869247889521007,
+            "acc_norm": 0.7682263329706203,
+            "acc_norm_stderr": 0.00984514377279404
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/rankeval/8b7178b13b_5.json b/8b7178b13b/evaluation/rankeval/8b7178b13b_5.json
new file mode 100644
index 0000000000000000000000000000000000000000..70f7bf98fc89840e085695409830500dfbf177d6
--- /dev/null
+++ b/8b7178b13b/evaluation/rankeval/8b7178b13b_5.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.34,
+            "acc_stderr": 0.014987482264363935
+        },
+        "anli_r2": {
+            "acc": 0.337,
+            "acc_stderr": 0.014955087918653605
+        },
+        "anli_r3": {
+            "acc": 0.3325,
+            "acc_stderr": 0.013605417345710526
+        },
+        "cb": {
+            "acc": 0.23214285714285715,
+            "acc_stderr": 0.0569293902400011,
+            "f1": 0.223351041141572
+        },
+        "copa": {
+            "acc": 0.76,
+            "acc_stderr": 0.042923469599092816
+        },
+        "hellaswag": {
+            "acc": 0.5209121688906593,
+            "acc_stderr": 0.004985415250690905,
+            "acc_norm": 0.689205337582155,
+            "acc_norm_stderr": 0.004618730353217064
+        },
+        "rte": {
+            "acc": 0.5054151624548736,
+            "acc_stderr": 0.030094698123239966
+        },
+        "winogrande": {
+            "acc": 0.5935280189423836,
+            "acc_stderr": 0.01380444869775337
+        },
+        "storycloze_2016": {
+            "acc": 0.7498663816141101,
+            "acc_stderr": 0.010015143382536456
+        },
+        "boolq": {
+            "acc": 0.6409785932721712,
+            "acc_stderr": 0.008390241754319908
+        },
+        "arc_easy": {
+            "acc": 0.6536195286195287,
+            "acc_stderr": 0.009763542075695738,
+            "acc_norm": 0.6401515151515151,
+            "acc_norm_stderr": 0.009848484848484836
+        },
+        "arc_challenge": {
+            "acc": 0.3310580204778157,
+            "acc_stderr": 0.013752062419817834,
+            "acc_norm": 0.3447098976109215,
+            "acc_norm_stderr": 0.013888816286782112
+        },
+        "sciq": {
+            "acc": 0.923,
+            "acc_stderr": 0.008434580140240648,
+            "acc_norm": 0.912,
+            "acc_norm_stderr": 0.008963053962592074
+        },
+        "piqa": {
+            "acc": 0.7606093579978237,
+            "acc_stderr": 0.009955884250291688,
+            "acc_norm": 0.7747551686615887,
+            "acc_norm_stderr": 0.009746643471032136
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b13b/evaluation/rankeval/8b7178b13b_5_lm-eval_global_step84877_2023-05-15-10-06-46_5shots_backup.json b/8b7178b13b/evaluation/rankeval/8b7178b13b_5_lm-eval_global_step84877_2023-05-15-10-06-46_5shots_backup.json
new file mode 100644
index 0000000000000000000000000000000000000000..70f7bf98fc89840e085695409830500dfbf177d6
--- /dev/null
+++ b/8b7178b13b/evaluation/rankeval/8b7178b13b_5_lm-eval_global_step84877_2023-05-15-10-06-46_5shots_backup.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.34,
+            "acc_stderr": 0.014987482264363935
+        },
+        "anli_r2": {
+            "acc": 0.337,
+            "acc_stderr": 0.014955087918653605
+        },
+        "anli_r3": {
+            "acc": 0.3325,
+            "acc_stderr": 0.013605417345710526
+        },
+        "cb": {
+            "acc": 0.23214285714285715,
+            "acc_stderr": 0.0569293902400011,
+            "f1": 0.223351041141572
+        },
+        "copa": {
+            "acc": 0.76,
+            "acc_stderr": 0.042923469599092816
+        },
+        "hellaswag": {
+            "acc": 0.5209121688906593,
+            "acc_stderr": 0.004985415250690905,
+            "acc_norm": 0.689205337582155,
+            "acc_norm_stderr": 0.004618730353217064
+        },
+        "rte": {
+            "acc": 0.5054151624548736,
+            "acc_stderr": 0.030094698123239966
+        },
+        "winogrande": {
+            "acc": 0.5935280189423836,
+            "acc_stderr": 0.01380444869775337
+        },
+        "storycloze_2016": {
+            "acc": 0.7498663816141101,
+            "acc_stderr": 0.010015143382536456
+        },
+        "boolq": {
+            "acc": 0.6409785932721712,
+            "acc_stderr": 0.008390241754319908
+        },
+        "arc_easy": {
+            "acc": 0.6536195286195287,
+            "acc_stderr": 0.009763542075695738,
+            "acc_norm": 0.6401515151515151,
+            "acc_norm_stderr": 0.009848484848484836
+        },
+        "arc_challenge": {
+            "acc": 0.3310580204778157,
+            "acc_stderr": 0.013752062419817834,
+            "acc_norm": 0.3447098976109215,
+            "acc_norm_stderr": 0.013888816286782112
+        },
+        "sciq": {
+            "acc": 0.923,
+            "acc_stderr": 0.008434580140240648,
+            "acc_norm": 0.912,
+            "acc_norm_stderr": 0.008963053962592074
+        },
+        "piqa": {
+            "acc": 0.7606093579978237,
+            "acc_stderr": 0.009955884250291688,
+            "acc_norm": 0.7747551686615887,
+            "acc_norm_stderr": 0.009746643471032136
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_0.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..17550129f3162d7f72b614b8cfe4b673bfd98d31
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_0.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.5360651854033321, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04256493048481422}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.0899710062520438, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002733564353594117}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.33063321497259557, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005082809158106232}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11703731094627681, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0023099010503697163}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.039336196641808305, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001715101531039642}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.14377777116972676, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003290079655812811}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.050209876854654424, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00139566067725402}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.079768875505422, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0024190846876120864}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.29935042842995513, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004616391395045465}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10410690412481492, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001970211495017576}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.0826555500749783, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002555867928952612}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.30355552506299893, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004656831028738861}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10721436888153839, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0021127388747576538}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_1.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..e94fc9e3dce22c16ede56c4528736f1a261d4081
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_1.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.6802224009591102, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04492815625354331}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07461313068432787, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017662222515359694}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.372595398614972, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005022894768490944}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11390823787630369, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002092748719741799}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03144260517505359, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010954007812074113}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.16216117762517218, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003540557825236836}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.04788274742708309, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001335468400942394}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06430823474158391, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001450822149093989}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.32776952027123735, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004463575595509288}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.09862949956148871, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017348311881018063}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06792680129399332, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016204262837167783}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.33968125496997575, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0045445995282505924}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10371590290401189, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001919446152689098}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_2.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..e7ec67c96ec389e457e8b097577ec5e4854cfc04
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_2.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.7421753234818311, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.058142097599066474}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07467676852089566, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016365713066735295}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.39206002680015317, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005060271412913158}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11568167659841286, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019756616545099706}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.03201968689196959, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0010606073690619061}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.17433943475684258, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0036664073103424713}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.049111982430960446, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0012783990550174547}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06439207073032543, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001373955096716673}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.34371763245797454, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004505609280479333}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.10005506236504283, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001651667409449542}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06782658473689997, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0015027459055493005}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.35767707252188435, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004646683404394275}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.1050019387672539, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018008044515920333}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_3.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_3.json
new file mode 100644
index 0000000000000000000000000000000000000000..2e3735b0458e0513f41a6aa566cc6cbe4a187d63
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_3.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.7188535617714238, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03519944556258603}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07388743389054267, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014922551498186853}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.40655159407197466, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005087707266532105}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.11579215217674567, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018264739390039843}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.0314899543029952, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009557711556575558}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1826251549753492, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.003641907390766911}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.04901217699390464, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011685479931085388}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.06252862451464879, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.00121227139148576}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3504975578791793, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00448181425469438}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.0983386270641459, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001492600011551131}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06730929990997614, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013838254439389821}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3708026802911801, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004623882247190055}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10530117384191293, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016684330187746533}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_4.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_4.json
new file mode 100644
index 0000000000000000000000000000000000000000..747a7897bfe7a3082df97790d5cae6604bcac4ab
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_4.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.7356439715076686, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.034000125678247964}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07276465535536372, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001402274340634281}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.4010732633333032, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005031473102978275}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.1147943522144661, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018063385532961329}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.030441136569863136, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008680169230332024}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.17998860942195416, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0035899170058648713}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.04799828271736168, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011481297831730717}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.061409018888316014, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001162871962197699}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.34312128735122893, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004376699961011487}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.09700958657806398, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014957916703609423}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.06590967070216937, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012895737406364428}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.3647408763184601, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.004596517842636764}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10389368450136928, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016495601390011836}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_5.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_5.json
new file mode 100644
index 0000000000000000000000000000000000000000..b1b2444bd1c95aa9741d41f11f5ddfaa62076c3a
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-web_nlg_en_PALM_prompt_5.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.774498674641057, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.046025666098424615}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.07168554541147722, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013567491666828951}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.40311382031478993, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.005093519815631301}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.1145076719006982, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017986089153009496}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.029640249400459045, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008248970598907547}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.17922361100140816, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0036463149274911922}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.04748834860177524, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011381885469089572}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.05979676606804081, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010829180120786699}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.3429993601688589, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0044165337458333025}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.09581522945526423, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014457046424409396}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.0647765765675484, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012251354927212512}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.36627971544729243, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00466986591996086}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.10343462084662852, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n  ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016190969843153712}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-wiki_lingua_en_tldr_en_0.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-wiki_lingua_en_tldr_en_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..deb96a1b930dcaed1c00ae37bf8e1a6e7e38ab17
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-wiki_lingua_en_tldr_en_0.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.16713881881245704, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019675559614062317}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.2675635999010821, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002514916375462715}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.18849953981212825, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017651084291359991}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.03129812627625643, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008361148272854467}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.05071754722622078, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001311834312898096}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.03506973226946951, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000828365798842279}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.12151132876190665, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001367091894817583}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.20104126997571714, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019684739493456994}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.13822063710515434, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001209603068162531}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.15537471654065166, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018202212529241414}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.25004836431590777, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0023701023893618026}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.17552265116019092, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001631387985609637}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.5232593023758256, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04747265065152494}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-wiki_lingua_en_tldr_en_1.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-wiki_lingua_en_tldr_en_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..a8e4ce9ecd911ecee6bd6bac12cceb4a3f750d25
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_GEM-wiki_lingua_en_tldr_en_1.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.17053090746783298, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019872482776833456}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.2862592054300288, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025208248871725165}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.1984602458949581, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001835524387024883}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.034781757767719336, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007914156120289567}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.05819412473089733, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0013767099922495528}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.04000846126572821, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008452030738686245}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.11920675042988256, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012760690917858948}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.2090473969119961, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019879307284216374}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.14054619741164162, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012057251064722665}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.16072751156146212, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018664748954273353}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.2708352805729064, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0023993133135588726}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.18719993560041495, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017213110797211495}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.8880118627721794, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05942943111971925}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_0.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..22045b31868212c59cba09e6b4ba8b21cc3d1a30
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_0.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 1.7691179321908588, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05199631794487594}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.12404388162125599, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014296085692360062}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.24113298891528734, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0021195221866358305}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.1597457244947749, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015754443648131056}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.028289803328052695, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008570729362836853}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.05442101911192778, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014043108131977875}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.035999793072600185, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009624756735963167}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.11310129172937744, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012492394490123515}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.22135445621870045, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001908698378645529}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.14600635284564262, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013795631395537417}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.10515677540232808, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012984677845430614}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.20544674202250837, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.001966897239298014}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.1355872624921452, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014398399847291973}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_1.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..aa7e2caeaf7a040b2fd359b187913c0635bd3426
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_1.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 5.651044586681334, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07926438990816918}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.2716212972945311, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0026058040333959537}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4236216032222433, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0026180226506166463}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.3078278785254422, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001908062705345462}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.10952561587924448, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0016418857574103474}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.17185473066549486, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018971444386739766}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.12259566762702832, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013108581526452968}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.20793662314020775, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001966736398197739}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3306629249371328, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0021642636770467646}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.23718705321133973, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00138256279237305}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.2271295898568511, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0023078665148769063}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.35503755430035955, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002474276440762186}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.2573509246055057, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017483657121627276}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_2.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..f6521f103eb8ce5d270861bad0e2584739037bdc
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_2.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 5.8424163448955415, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08355827344599492}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.2566016107317033, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0022067745089076256}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.461194891770376, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025273079440682293}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.31288302842111376, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017808501976129172}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.10663824144759744, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.001372692263793003}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.1963344119197119, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0019212562311138061}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.12981003854776116, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001232713595041452}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.19466241734213402, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015883723269025356}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.35748703923277775, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002135743881465239}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.23920101858831294, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012833315214473093}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.2160451049013204, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0019568292320360082}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.38977835292938257, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002475205894007756}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.26382166088143494, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016894009510511382}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_3.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_3.json
new file mode 100644
index 0000000000000000000000000000000000000000..b9a515524675a83f9273b363cc0223444b30b32a
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_3.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 5.810474759979567, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07579006844458654}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.24309828934304148, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019005898753670413}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.4670566019113305, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0023932775436402193}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.307699846077658, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016836798456212607}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.10168781845908033, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0011883824391395869}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.20209647607764591, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0019104221912810875}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.12918070346005242, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011654611122014701}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.18580091003866794, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014382176327432795}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.36221484092783923, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020685753646924234}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.23605748656654202, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012419184728918431}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.20525158463645873, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017314120087883877}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3947742486081849, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002360306367469496}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.25979950139324387, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016108185985968808}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_4.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_4.json
new file mode 100644
index 0000000000000000000000000000000000000000..7c4df208fb995968d71c4cc657828e2ceefa3125
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_4.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 5.856583823504068, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08590752222075627}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.24083912701088211, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001891795976143307}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.46987502312106944, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002373970795855411}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.3072297336966097, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001677600068476274}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.10194079575537923, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0012738284927660533}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.20486607531207773, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0019180923641256352}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.13013649575708963, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011872264816164021}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.18407870093260886, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.00147269880081677}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.3634519313727439, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020191238189298604}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.23535954148408947, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001235256889547661}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.20324551641227329, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001730862935580683}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.3970250584377568, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00232405668117713}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.2592529609283163, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015947975783409823}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_5.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_5.json
new file mode 100644
index 0000000000000000000000000000000000000000..92f8fbee08451d8fc1303c7d134d93815a7cfe49
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_5.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 5.701988223904371, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05974327947875344}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.2357192890434931, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017709653279963382}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.46793875744209246, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0023377619498614883}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.30342180002404195, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016429215832907001}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.09834833044152837, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0011541319656096648}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.2017968785574919, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001929650748227801}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.12711050572770574, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001182240229485702}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.17902630726475285, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012897932939114531}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.36086666712927185, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019704166593789243}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.2315222472386274, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0011861982598106887}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.19964816084000173, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016311639902271977}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.39633205907689883, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0022975718064400996}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.256915761431013, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015851952901884037}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_gem_xsum_article_DOC_summary_0.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_gem_xsum_article_DOC_summary_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..c22f7126136b4a8451d94a2f7f1d85433f7bcdc1
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_gem_xsum_article_DOC_summary_0.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.1655773024489004, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0028576845139351987}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.32851562505932314, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0039803284921081}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.2085969207130687, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002677475599144402}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.036089021388490294, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0015969906559037688}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.07051409622481412, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0025672161932226503}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.044656791266145565, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016785808050176795}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.12315934281916667, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0022342850887312926}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.2450990337124764, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0031721420821518784}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.15500054113220843, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002069637277533473}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.130324877139233, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002282842578765438}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.2618161404002937, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0034400702646342324}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.1648368106728737, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0021862489460295126}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 1.7324193654708817, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13416126334094292}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_gem_xsum_article_DOC_summary_1.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_gem_xsum_article_DOC_summary_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..d560771ecbceadf22ac3ee5d7e22fda8a488cf13
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_gem_xsum_article_DOC_summary_1.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.11802826309532148, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001749157965306058}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2892400892348949, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003843967130143464}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.16538496371657396, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022823923941345326}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.017821354334989277, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0008141612570301904}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.04545518558443322, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00206477086181511}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.025192202332776397, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0011296247751961938}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.08829393492717166, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011978449597419942}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.219464699636153, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0029458921049705118}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.12421960300726272, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016143054069980196}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.09409421229331506, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013239443870656413}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.23385506502219394, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00324955152033377}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.13238865606315464, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0017891980246226586}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.9718103240280566, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0618162240060556}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/agg.8b7178b4b_gem_xsum_article_DOC_summary_2.json b/8b7178b4b/evaluation/generation/agg.8b7178b4b_gem_xsum_article_DOC_summary_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..3591ce37f1d26f7bfb60a1bdee9a6fd390069f42
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/agg.8b7178b4b_gem_xsum_article_DOC_summary_2.json
@@ -0,0 +1 @@
+{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.11300176611968675, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015556940763036152}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.28221717850396943, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003575100110282272}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.15944247539707976, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002076895889562475}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.016338317353700334, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007066281144508859}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.04249427144713084, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018743700982079647}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.023271326997139498, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000998854505955608}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.08541393540574391, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011196684087793841}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.21475231259583918, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0027096441989616395}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.12070835773462767, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015063108121480115}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0910255058370105, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012384505621305965}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.22901792345741215, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0029934117569162446}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.12867240195403112, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016705058332119036}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.8276490187392188, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06507121832672531}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_0.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_0.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..88e7f34fdfc729354a2b4e815977a92dd69c3198
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_0.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf88e33a55c34025a07d79dab53c6766ef807950d54b473cf7f703c65dd53959
+size 4002555
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_1.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_1.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..a89e0ca4641f499dc92e40e884606c0fcbddc4d6
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_1.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0705c6d4a948db8aec235d02c1ff2486623824312c8597fda9adb8353386406
+size 5153682
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_2.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_2.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..a3a6d94d1bce6f0f46064cfd54910ee9f2061464
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_2.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31bf8f713d4f1a9dc876e43d47675e9d088e60a72c226e67cc0252d4277c096f
+size 6063528
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_3.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_3.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..94a8db888fa2288918fb2b006b36848510235ddd
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_3.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:997d5edf5956cd3ddeccac543cd11ae720e3b04c21b0bcb075e614533cb42dfd
+size 6974107
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_4.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_4.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..d5ab5677839d2adff2959ba141ce4ec4e686fe12
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_4.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2ea27bbdd65361eeffdf1415da7f820b120208103b474fc7e2039792359c57f
+size 7855474
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_5.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_5.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..6f00233236bae38dfe069ddde7cd51c7418b552a
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-web_nlg_en_PALM_prompt_5.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53254de4132d7ff9a3f710073ff11235f80c15fec827cbfb21e492e54b0a51a4
+size 8774285
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_0.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_0.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..c9c186885f0b9aecaff2e473fb89b60d99d77f13
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_0.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12c6bb90f2100ca84e9c2f2d9623a119d7d998d723deba5ef4bb73359df096ae
+size 7692206
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_1.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_1.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..0711b40f9c035556c94ecc23c7fb5cb72844e243
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_1.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60b3a8fb612a485701a3a01e9105c0fc2f03bc2cc494a69a1a60cc602e241dee
+size 13321079
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_2.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_2.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_3.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_3.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_4.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_4.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_5.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_GEM-wiki_lingua_en_tldr_en_5.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..6ccd861e0ffa57a76d6dced1a57b45ad9df2b1d3
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46ceb73aedec05124d413a0fcae8bdb8bf4fab207ade1e62df12a032988c11fc
+size 4489612
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..9e5a0585313656078554d59a181be9e605900f92
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42375f7039b6762a6af14e135a9b76e6231d4224a69839655d662bd727db2874
+size 5405120
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..8d95cdae78e96afc1bd829c3de371c39fd82047d
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5735e8345d99ffe079a3ba23a5e2dd8c9c7b538e6bdc2baa082e1b264b5f7e7b
+size 6547631
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..0f917c3582e2da9c6b28778d3fdc75dd61a5f490
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32e484d025c7c1eb3edabe941bd11e6c71fea45873e1aa031d643867cb30aaa5
+size 7655673
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..7d42ee62a1069cf8224fb95aeddfd40ced82880a
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d0a314a224f5d10caa84102231a518a643725fa42ce44d84d6fbae7db49dbc5
+size 8739962
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..bca5a6eb0e12c36345b7518047f1a7a3a82c6abf
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e3ff0e05dc30b2c9f35b210cd5c10a6b6b8e86f69e1b777dae2f9742318caba
+size 9833882
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_0.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_0.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..5a7eec43cc0050f1817d3f132d861f7a1aaa848b
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_0.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f48d04f8df448365eeb3fb32c54a2214fc39a94af254f24af1f81ecdf75aa6c
+size 2799092
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_1.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_1.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..5652783ec3b8f44ab207f6bba6ff10e8b37435f2
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_1.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edc43687a012ad536184411af971a152e682f796c1fbf3e763e36252e6913578
+size 5104164
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_2.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_2.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..64ee67033bfcc97e68d7a730b6f70cfb4efa5190
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_2.jsonl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b67b949168f7795c80af96ef2ddb9e4ae8b5b0622e84451e181f15c5690028d
+size 7382602
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_3.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_3.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_4.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_4.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_5.jsonl b/8b7178b4b/evaluation/generation/examples.8b7178b4b_gem_xsum_article_DOC_summary_5.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_0.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..3442254d433669c22337ed7c7d25d147fed6f20f
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_0.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.5360651854033321,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.04256493048481422
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.0899710062520438,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.002733564353594117
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.33063321497259557,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.005082809158106232
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.11703731094627681,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0023099010503697163
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.039336196641808305,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.001715101531039642
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.14377777116972676,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.003290079655812811
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.050209876854654424,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.00139566067725402
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.079768875505422,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.0024190846876120864
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.29935042842995513,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004616391395045465
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.10410690412481492,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.001970211495017576
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.0826555500749783,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.002555867928952612
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.30355552506299893,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004656831028738861
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.10721436888153839,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0021127388747576538
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 0,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_1.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..50be49201c15804e6356193f5484840586d77438
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_1.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.6802224009591102,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.04492815625354331
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.07461313068432787,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0017662222515359694
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.372595398614972,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.005022894768490944
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.11390823787630369,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.002092748719741799
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.03144260517505359,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0010954007812074113
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.16216117762517218,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.003540557825236836
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.04788274742708309,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.001335468400942394
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.06430823474158391,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.001450822149093989
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.32776952027123735,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004463575595509288
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.09862949956148871,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0017348311881018063
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.06792680129399332,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0016204262837167783
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.33968125496997575,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.0045445995282505924
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.10371590290401189,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.001919446152689098
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 1,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_2.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..0d6022f0384d03cf636c190f05f52982fbc439b7
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_2.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.7421753234818311,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.058142097599066474
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.07467676852089566,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0016365713066735295
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.39206002680015317,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.005060271412913158
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.11568167659841286,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0019756616545099706
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.03201968689196959,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0010606073690619061
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.17433943475684258,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0036664073103424713
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.049111982430960446,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0012783990550174547
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.06439207073032543,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.001373955096716673
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.34371763245797454,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004505609280479333
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.10005506236504283,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.001651667409449542
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.06782658473689997,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0015027459055493005
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.35767707252188435,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004646683404394275
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.1050019387672539,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0018008044515920333
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 2,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_3.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_3.json
new file mode 100644
index 0000000000000000000000000000000000000000..7c398fb433604b25f18289d7ad4718dca59d4ac6
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_3.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.7188535617714238,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.03519944556258603
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.07388743389054267,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0014922551498186853
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.40655159407197466,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.005087707266532105
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.11579215217674567,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0018264739390039843
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.0314899543029952,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0009557711556575558
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.1826251549753492,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.003641907390766911
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.04901217699390464,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0011685479931085388
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.06252862451464879,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.00121227139148576
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.3504975578791793,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.00448181425469438
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.0983386270641459,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.001492600011551131
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.06730929990997614,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0013838254439389821
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.3708026802911801,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004623882247190055
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.10530117384191293,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0016684330187746533
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 3,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_4.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_4.json
new file mode 100644
index 0000000000000000000000000000000000000000..8cf7f804d99fb8b1f7deb7887ce4987ea0c9893e
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_4.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.7356439715076686,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.034000125678247964
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.07276465535536372,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.001402274340634281
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.4010732633333032,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.005031473102978275
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.1147943522144661,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0018063385532961329
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.030441136569863136,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0008680169230332024
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.17998860942195416,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0035899170058648713
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.04799828271736168,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0011481297831730717
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.061409018888316014,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.001162871962197699
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.34312128735122893,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.004376699961011487
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.09700958657806398,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0014957916703609423
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.06590967070216937,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0012895737406364428
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.3647408763184601,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.004596517842636764
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.10389368450136928,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0016495601390011836
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 4,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_5.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_5.json
new file mode 100644
index 0000000000000000000000000000000000000000..c092945027e20b38eb386c19fb78636e358eb8f4
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-web_nlg_en_PALM_prompt_5.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "bleu": 0.774498674641057,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.046025666098424615
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_precision": 0.07168554541147722,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0013567491666828951
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_recall": 0.40311382031478993,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.005093519815631301
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge1_fmeasure": 0.1145076719006982,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0017986089153009496
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_precision": 0.029640249400459045,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0008248970598907547
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_recall": 0.17922361100140816,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0036463149274911922
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rouge2_fmeasure": 0.04748834860177524,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0011381885469089572
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_precision": 0.05979676606804081,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.0010829180120786699
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_recall": 0.3429993601688589,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0044165337458333025
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeL_fmeasure": 0.09581522945526423,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0014457046424409396
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_precision": 0.0647765765675484,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0012251354927212512
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_recall": 0.36627971544729243,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.00466986591996086
+    },
+    {
+      "task_name": "GEM/web_nlg_en",
+      "prompt_name": "PALM_prompt",
+      "rougeLsum_fmeasure": 0.10343462084662852,
+      "dataset_path": "GEM/web_nlg",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0016190969843153712
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 5,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-wiki_lingua_en_tldr_en_0.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-wiki_lingua_en_tldr_en_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbe3063a85f30fe75a705b9a2e568e19d91e7915
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-wiki_lingua_en_tldr_en_0.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_precision": 0.16713881881245704,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0019675559614062317
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_recall": 0.2675635999010821,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.002514916375462715
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_fmeasure": 0.18849953981212825,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0017651084291359991
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_precision": 0.03129812627625643,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0008361148272854467
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_recall": 0.05071754722622078,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.001311834312898096
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_fmeasure": 0.03506973226946951,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.000828365798842279
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_precision": 0.12151132876190665,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.001367091894817583
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_recall": 0.20104126997571714,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0019684739493456994
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_fmeasure": 0.13822063710515434,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.001209603068162531
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_precision": 0.15537471654065166,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0018202212529241414
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_recall": 0.25004836431590777,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.0023701023893618026
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_fmeasure": 0.17552265116019092,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.001631387985609637
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "bleu": 1.5232593023758256,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.04747265065152494
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 0,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-wiki_lingua_en_tldr_en_1.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-wiki_lingua_en_tldr_en_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..388cbf0abf4e90c1841a99279f635fa227495873
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_GEM-wiki_lingua_en_tldr_en_1.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_precision": 0.17053090746783298,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_precision_stderr": 0.0019872482776833456
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_recall": 0.2862592054300288,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_recall_stderr": 0.0025208248871725165
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge1_fmeasure": 0.1984602458949581,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.001835524387024883
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_precision": 0.034781757767719336,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_precision_stderr": 0.0007914156120289567
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_recall": 0.05819412473089733,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_recall_stderr": 0.0013767099922495528
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rouge2_fmeasure": 0.04000846126572821,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0008452030738686245
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_precision": 0.11920675042988256,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_precision_stderr": 0.0012760690917858948
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_recall": 0.2090473969119961,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_recall_stderr": 0.0019879307284216374
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeL_fmeasure": 0.14054619741164162,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0012057251064722665
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_precision": 0.16072751156146212,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0018664748954273353
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_recall": 0.2708352805729064,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.0023993133135588726
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "rougeLsum_fmeasure": 0.18719993560041495,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0017213110797211495
+    },
+    {
+      "task_name": "GEM/wiki_lingua_en",
+      "prompt_name": "tldr_en",
+      "bleu": 1.8880118627721794,
+      "dataset_path": "GEM/wiki_lingua",
+      "dataset_name": "en",
+      "subset": null,
+      "bleu_stderr": 0.05942943111971925
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 1,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_0.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..c34494f2ff0c545dc907c65e0cf5acfe137322c7
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_0.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "bleu": 1.7691179321908588,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "bleu_stderr": 0.05199631794487594
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_precision": 0.12404388162125599,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_precision_stderr": 0.0014296085692360062
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_recall": 0.24113298891528734,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_recall_stderr": 0.0021195221866358305
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_fmeasure": 0.1597457244947749,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0015754443648131056
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_precision": 0.028289803328052695,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_precision_stderr": 0.0008570729362836853
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_recall": 0.05442101911192778,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_recall_stderr": 0.0014043108131977875
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_fmeasure": 0.035999793072600185,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0009624756735963167
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_precision": 0.11310129172937744,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_precision_stderr": 0.0012492394490123515
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_recall": 0.22135445621870045,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_recall_stderr": 0.001908698378645529
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_fmeasure": 0.14600635284564262,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0013795631395537417
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_precision": 0.10515677540232808,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0012984677845430614
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_recall": 0.20544674202250837,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.001966897239298014
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_fmeasure": 0.1355872624921452,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0014398399847291973
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 0,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_1.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..6bd726fa6bd4c0c992a4b26c8d4439dc28562c8a
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_1.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "bleu": 5.651044586681334,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "bleu_stderr": 0.07926438990816918
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_precision": 0.2716212972945311,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_precision_stderr": 0.0026058040333959537
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_recall": 0.4236216032222433,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_recall_stderr": 0.0026180226506166463
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_fmeasure": 0.3078278785254422,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.001908062705345462
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_precision": 0.10952561587924448,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_precision_stderr": 0.0016418857574103474
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_recall": 0.17185473066549486,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_recall_stderr": 0.0018971444386739766
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_fmeasure": 0.12259566762702832,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0013108581526452968
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_precision": 0.20793662314020775,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_precision_stderr": 0.001966736398197739
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_recall": 0.3306629249371328,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_recall_stderr": 0.0021642636770467646
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_fmeasure": 0.23718705321133973,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.00138256279237305
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_precision": 0.2271295898568511,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0023078665148769063
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_recall": 0.35503755430035955,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.002474276440762186
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_fmeasure": 0.2573509246055057,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0017483657121627276
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 1,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_2.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a1d8583363ddc1ebde6c87e123d9f97cd36bc62
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_2.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "bleu": 5.8424163448955415,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "bleu_stderr": 0.08355827344599492
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_precision": 0.2566016107317033,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_precision_stderr": 0.0022067745089076256
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_recall": 0.461194891770376,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_recall_stderr": 0.0025273079440682293
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_fmeasure": 0.31288302842111376,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0017808501976129172
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_precision": 0.10663824144759744,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_precision_stderr": 0.001372692263793003
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_recall": 0.1963344119197119,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_recall_stderr": 0.0019212562311138061
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_fmeasure": 0.12981003854776116,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.001232713595041452
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_precision": 0.19466241734213402,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_precision_stderr": 0.0015883723269025356
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_recall": 0.35748703923277775,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_recall_stderr": 0.002135743881465239
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_fmeasure": 0.23920101858831294,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0012833315214473093
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_precision": 0.2160451049013204,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0019568292320360082
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_recall": 0.38977835292938257,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.002475205894007756
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_fmeasure": 0.26382166088143494,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0016894009510511382
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 2,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_3.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_3.json
new file mode 100644
index 0000000000000000000000000000000000000000..93ccbfa926e7c960ab13ce505d5e8eacd94663a0
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_3.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "bleu": 5.810474759979567,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "bleu_stderr": 0.07579006844458654
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_precision": 0.24309828934304148,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_precision_stderr": 0.0019005898753670413
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_recall": 0.4670566019113305,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_recall_stderr": 0.0023932775436402193
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_fmeasure": 0.307699846077658,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0016836798456212607
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_precision": 0.10168781845908033,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_precision_stderr": 0.0011883824391395869
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_recall": 0.20209647607764591,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_recall_stderr": 0.0019104221912810875
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_fmeasure": 0.12918070346005242,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0011654611122014701
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_precision": 0.18580091003866794,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_precision_stderr": 0.0014382176327432795
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_recall": 0.36221484092783923,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_recall_stderr": 0.0020685753646924234
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_fmeasure": 0.23605748656654202,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0012419184728918431
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_precision": 0.20525158463645873,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0017314120087883877
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_recall": 0.3947742486081849,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.002360306367469496
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_fmeasure": 0.25979950139324387,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0016108185985968808
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 3,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_4.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_4.json
new file mode 100644
index 0000000000000000000000000000000000000000..8d7b758739ede3ee8ad03e8b464ac21f613ede5c
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_4.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "bleu": 5.856583823504068,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "bleu_stderr": 0.08590752222075627
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_precision": 0.24083912701088211,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_precision_stderr": 0.001891795976143307
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_recall": 0.46987502312106944,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_recall_stderr": 0.002373970795855411
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_fmeasure": 0.3072297336966097,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.001677600068476274
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_precision": 0.10194079575537923,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_precision_stderr": 0.0012738284927660533
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_recall": 0.20486607531207773,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_recall_stderr": 0.0019180923641256352
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_fmeasure": 0.13013649575708963,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.0011872264816164021
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_precision": 0.18407870093260886,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_precision_stderr": 0.00147269880081677
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_recall": 0.3634519313727439,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_recall_stderr": 0.0020191238189298604
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_fmeasure": 0.23535954148408947,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.001235256889547661
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_precision": 0.20324551641227329,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.001730862935580683
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_recall": 0.3970250584377568,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.00232405668117713
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_fmeasure": 0.2592529609283163,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0015947975783409823
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 4,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_5.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_5.json
new file mode 100644
index 0000000000000000000000000000000000000000..a4733db008512b4768ebd7aceb1ae062926d279d
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_e2e_nlg_cleaned_generate_text_restaurant_5.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "bleu": 5.701988223904371,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "bleu_stderr": 0.05974327947875344
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_precision": 0.2357192890434931,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_precision_stderr": 0.0017709653279963382
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_recall": 0.46793875744209246,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_recall_stderr": 0.0023377619498614883
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge1_fmeasure": 0.30342180002404195,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge1_fmeasure_stderr": 0.0016429215832907001
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_precision": 0.09834833044152837,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_precision_stderr": 0.0011541319656096648
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_recall": 0.2017968785574919,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_recall_stderr": 0.001929650748227801
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rouge2_fmeasure": 0.12711050572770574,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rouge2_fmeasure_stderr": 0.001182240229485702
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_precision": 0.17902630726475285,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_precision_stderr": 0.0012897932939114531
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_recall": 0.36086666712927185,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_recall_stderr": 0.0019704166593789243
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeL_fmeasure": 0.2315222472386274,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeL_fmeasure_stderr": 0.0011861982598106887
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_precision": 0.19964816084000173,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_precision_stderr": 0.0016311639902271977
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_recall": 0.39633205907689883,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_recall_stderr": 0.0022975718064400996
+    },
+    {
+      "task_name": "e2e_nlg_cleaned",
+      "prompt_name": "generate_text_restaurant",
+      "rougeLsum_fmeasure": 0.256915761431013,
+      "dataset_path": "e2e_nlg_cleaned",
+      "dataset_name": null,
+      "subset": null,
+      "rougeLsum_fmeasure_stderr": 0.0015851952901884037
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 5,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_gem_xsum_article_DOC_summary_0.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_gem_xsum_article_DOC_summary_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..3cbe369c57367239a3ad30912279c59073a39839
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_gem_xsum_article_DOC_summary_0.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_precision": 0.1655773024489004,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_precision_stderr": 0.0028576845139351987
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_recall": 0.32851562505932314,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_recall_stderr": 0.0039803284921081
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_fmeasure": 0.2085969207130687,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_fmeasure_stderr": 0.002677475599144402
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_precision": 0.036089021388490294,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_precision_stderr": 0.0015969906559037688
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_recall": 0.07051409622481412,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_recall_stderr": 0.0025672161932226503
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_fmeasure": 0.044656791266145565,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_fmeasure_stderr": 0.0016785808050176795
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_precision": 0.12315934281916667,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_precision_stderr": 0.0022342850887312926
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_recall": 0.2450990337124764,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_recall_stderr": 0.0031721420821518784
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_fmeasure": 0.15500054113220843,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_fmeasure_stderr": 0.002069637277533473
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_precision": 0.130324877139233,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_precision_stderr": 0.002282842578765438
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_recall": 0.2618161404002937,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_recall_stderr": 0.0034400702646342324
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_fmeasure": 0.1648368106728737,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_fmeasure_stderr": 0.0021862489460295126
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "bleu": 1.7324193654708817,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "bleu_stderr": 0.13416126334094292
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 0,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_gem_xsum_article_DOC_summary_1.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_gem_xsum_article_DOC_summary_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..efb309864356029687079cb4212980fad57b4ed9
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_gem_xsum_article_DOC_summary_1.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_precision": 0.11802826309532148,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_precision_stderr": 0.001749157965306058
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_recall": 0.2892400892348949,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_recall_stderr": 0.003843967130143464
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_fmeasure": 0.16538496371657396,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_fmeasure_stderr": 0.0022823923941345326
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_precision": 0.017821354334989277,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_precision_stderr": 0.0008141612570301904
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_recall": 0.04545518558443322,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_recall_stderr": 0.00206477086181511
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_fmeasure": 0.025192202332776397,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_fmeasure_stderr": 0.0011296247751961938
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_precision": 0.08829393492717166,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_precision_stderr": 0.0011978449597419942
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_recall": 0.219464699636153,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_recall_stderr": 0.0029458921049705118
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_fmeasure": 0.12421960300726272,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_fmeasure_stderr": 0.0016143054069980196
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_precision": 0.09409421229331506,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_precision_stderr": 0.0013239443870656413
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_recall": 0.23385506502219394,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_recall_stderr": 0.00324955152033377
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_fmeasure": 0.13238865606315464,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_fmeasure_stderr": 0.0017891980246226586
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "bleu": 0.9718103240280566,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "bleu_stderr": 0.0618162240060556
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 1,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/generation/slim.8b7178b4b_gem_xsum_article_DOC_summary_2.json b/8b7178b4b/evaluation/generation/slim.8b7178b4b_gem_xsum_article_DOC_summary_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..91ea4a6252a118eeadec1193dce6cf38e6618e45
--- /dev/null
+++ b/8b7178b4b/evaluation/generation/slim.8b7178b4b_gem_xsum_article_DOC_summary_2.json
@@ -0,0 +1,133 @@
+{
+  "results": [
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_precision": 0.11300176611968675,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_precision_stderr": 0.0015556940763036152
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_recall": 0.28221717850396943,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_recall_stderr": 0.003575100110282272
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge1_fmeasure": 0.15944247539707976,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge1_fmeasure_stderr": 0.002076895889562475
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_precision": 0.016338317353700334,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_precision_stderr": 0.0007066281144508859
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_recall": 0.04249427144713084,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_recall_stderr": 0.0018743700982079647
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rouge2_fmeasure": 0.023271326997139498,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rouge2_fmeasure_stderr": 0.000998854505955608
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_precision": 0.08541393540574391,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_precision_stderr": 0.0011196684087793841
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_recall": 0.21475231259583918,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_recall_stderr": 0.0027096441989616395
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeL_fmeasure": 0.12070835773462767,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeL_fmeasure_stderr": 0.0015063108121480115
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_precision": 0.0910255058370105,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_precision_stderr": 0.0012384505621305965
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_recall": 0.22901792345741215,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_recall_stderr": 0.0029934117569162446
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "rougeLsum_fmeasure": 0.12867240195403112,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "rougeLsum_fmeasure_stderr": 0.0016705058332119036
+    },
+    {
+      "task_name": "gem_xsum",
+      "prompt_name": "article_DOC_summary",
+      "bleu": 0.8276490187392188,
+      "dataset_path": "GEM/xsum",
+      "dataset_name": null,
+      "subset": "",
+      "bleu_stderr": 0.06507121832672531
+    }
+  ],
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-8b7-178b-c4-repetitions/8b7178b4b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16",
+    "task_args": "",
+    "num_fewshot": 2,
+    "batch_size": 16,
+    "device": "cuda",
+    "use_cache": false,
+    "limit": 3000,
+    "bootstrap_iters": 10,
+    "seed": 1234
+  }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/rankeval/8b7178b4b_0.json b/8b7178b4b/evaluation/rankeval/8b7178b4b_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..cea3eebe18894dbcf770ee9a54525a4955d070ec
--- /dev/null
+++ b/8b7178b4b/evaluation/rankeval/8b7178b4b_0.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.333,
+            "acc_stderr": 0.014910846164229863
+        },
+        "anli_r2": {
+            "acc": 0.35,
+            "acc_stderr": 0.015090650341444231
+        },
+        "anli_r3": {
+            "acc": 0.3491666666666667,
+            "acc_stderr": 0.013767075395077249
+        },
+        "cb": {
+            "acc": 0.48214285714285715,
+            "acc_stderr": 0.0673769750864465,
+            "f1": 0.3432539682539683
+        },
+        "copa": {
+            "acc": 0.75,
+            "acc_stderr": 0.04351941398892446
+        },
+        "hellaswag": {
+            "acc": 0.45757817167894843,
+            "acc_stderr": 0.004971789638563322,
+            "acc_norm": 0.5881298546106354,
+            "acc_norm_stderr": 0.004911659884506154
+        },
+        "rte": {
+            "acc": 0.48736462093862815,
+            "acc_stderr": 0.030086851767188564
+        },
+        "winogrande": {
+            "acc": 0.5580110497237569,
+            "acc_stderr": 0.01395758407910899
+        },
+        "storycloze_2016": {
+            "acc": 0.6900053447354356,
+            "acc_stderr": 0.010695042806212553
+        },
+        "boolq": {
+            "acc": 0.5504587155963303,
+            "acc_stderr": 0.008700409761350798
+        },
+        "arc_easy": {
+            "acc": 0.5361952861952862,
+            "acc_stderr": 0.010232865550346727,
+            "acc_norm": 0.4823232323232323,
+            "acc_norm_stderr": 0.010253369805698971
+        },
+        "arc_challenge": {
+            "acc": 0.2636518771331058,
+            "acc_stderr": 0.012875929151297056,
+            "acc_norm": 0.30631399317406144,
+            "acc_norm_stderr": 0.013470584417276513
+        },
+        "sciq": {
+            "acc": 0.765,
+            "acc_stderr": 0.013414729030247116,
+            "acc_norm": 0.68,
+            "acc_norm_stderr": 0.014758652303574885
+        },
+        "piqa": {
+            "acc": 0.7334058759521219,
+            "acc_stderr": 0.010316749863541369,
+            "acc_norm": 0.733949945593036,
+            "acc_norm_stderr": 0.010310039263352827
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/rankeval/8b7178b4b_0_lm-eval_global_step84877_2023-05-15-10-06-37_0shots_backup.json b/8b7178b4b/evaluation/rankeval/8b7178b4b_0_lm-eval_global_step84877_2023-05-15-10-06-37_0shots_backup.json
new file mode 100644
index 0000000000000000000000000000000000000000..cea3eebe18894dbcf770ee9a54525a4955d070ec
--- /dev/null
+++ b/8b7178b4b/evaluation/rankeval/8b7178b4b_0_lm-eval_global_step84877_2023-05-15-10-06-37_0shots_backup.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.333,
+            "acc_stderr": 0.014910846164229863
+        },
+        "anli_r2": {
+            "acc": 0.35,
+            "acc_stderr": 0.015090650341444231
+        },
+        "anli_r3": {
+            "acc": 0.3491666666666667,
+            "acc_stderr": 0.013767075395077249
+        },
+        "cb": {
+            "acc": 0.48214285714285715,
+            "acc_stderr": 0.0673769750864465,
+            "f1": 0.3432539682539683
+        },
+        "copa": {
+            "acc": 0.75,
+            "acc_stderr": 0.04351941398892446
+        },
+        "hellaswag": {
+            "acc": 0.45757817167894843,
+            "acc_stderr": 0.004971789638563322,
+            "acc_norm": 0.5881298546106354,
+            "acc_norm_stderr": 0.004911659884506154
+        },
+        "rte": {
+            "acc": 0.48736462093862815,
+            "acc_stderr": 0.030086851767188564
+        },
+        "winogrande": {
+            "acc": 0.5580110497237569,
+            "acc_stderr": 0.01395758407910899
+        },
+        "storycloze_2016": {
+            "acc": 0.6900053447354356,
+            "acc_stderr": 0.010695042806212553
+        },
+        "boolq": {
+            "acc": 0.5504587155963303,
+            "acc_stderr": 0.008700409761350798
+        },
+        "arc_easy": {
+            "acc": 0.5361952861952862,
+            "acc_stderr": 0.010232865550346727,
+            "acc_norm": 0.4823232323232323,
+            "acc_norm_stderr": 0.010253369805698971
+        },
+        "arc_challenge": {
+            "acc": 0.2636518771331058,
+            "acc_stderr": 0.012875929151297056,
+            "acc_norm": 0.30631399317406144,
+            "acc_norm_stderr": 0.013470584417276513
+        },
+        "sciq": {
+            "acc": 0.765,
+            "acc_stderr": 0.013414729030247116,
+            "acc_norm": 0.68,
+            "acc_norm_stderr": 0.014758652303574885
+        },
+        "piqa": {
+            "acc": 0.7334058759521219,
+            "acc_stderr": 0.010316749863541369,
+            "acc_norm": 0.733949945593036,
+            "acc_norm_stderr": 0.010310039263352827
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/rankeval/8b7178b4b_1.json b/8b7178b4b/evaluation/rankeval/8b7178b4b_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..a97390b22c5b74ef7af35988448b467b350cf7d1
--- /dev/null
+++ b/8b7178b4b/evaluation/rankeval/8b7178b4b_1.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.357,
+            "acc_stderr": 0.015158521721486774
+        },
+        "anli_r2": {
+            "acc": 0.334,
+            "acc_stderr": 0.014922019523732967
+        },
+        "anli_r3": {
+            "acc": 0.37416666666666665,
+            "acc_stderr": 0.01397501560175897
+        },
+        "cb": {
+            "acc": 0.39285714285714285,
+            "acc_stderr": 0.0658538889806635,
+            "f1": 0.26788664379209554
+        },
+        "copa": {
+            "acc": 0.75,
+            "acc_stderr": 0.04351941398892446
+        },
+        "hellaswag": {
+            "acc": 0.45339573790081655,
+            "acc_stderr": 0.004968058944472161,
+            "acc_norm": 0.5857398924517029,
+            "acc_norm_stderr": 0.004915870966174404
+        },
+        "rte": {
+            "acc": 0.4657039711191336,
+            "acc_stderr": 0.030025579819366422
+        },
+        "winogrande": {
+            "acc": 0.5509076558800315,
+            "acc_stderr": 0.01397945938914086
+        },
+        "storycloze_2016": {
+            "acc": 0.6878674505611972,
+            "acc_stderr": 0.010715220346279681
+        },
+        "boolq": {
+            "acc": 0.517125382262997,
+            "acc_stderr": 0.008739923994130054
+        },
+        "arc_easy": {
+            "acc": 0.5307239057239057,
+            "acc_stderr": 0.010240395584815237,
+            "acc_norm": 0.5084175084175084,
+            "acc_norm_stderr": 0.010258329515226462
+        },
+        "arc_challenge": {
+            "acc": 0.2568259385665529,
+            "acc_stderr": 0.0127669237941168,
+            "acc_norm": 0.31399317406143346,
+            "acc_norm_stderr": 0.013562691224726297
+        },
+        "sciq": {
+            "acc": 0.799,
+            "acc_stderr": 0.012679107214617328,
+            "acc_norm": 0.751,
+            "acc_norm_stderr": 0.0136816002787023
+        },
+        "piqa": {
+            "acc": 0.7399347116430903,
+            "acc_stderr": 0.010234893249061303,
+            "acc_norm": 0.7328618063112078,
+            "acc_norm_stderr": 0.01032344049261243
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/rankeval/8b7178b4b_1_lm-eval_global_step84877_2023-05-15-10-06-37_1shots_backup.json b/8b7178b4b/evaluation/rankeval/8b7178b4b_1_lm-eval_global_step84877_2023-05-15-10-06-37_1shots_backup.json
new file mode 100644
index 0000000000000000000000000000000000000000..a97390b22c5b74ef7af35988448b467b350cf7d1
--- /dev/null
+++ b/8b7178b4b/evaluation/rankeval/8b7178b4b_1_lm-eval_global_step84877_2023-05-15-10-06-37_1shots_backup.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.357,
+            "acc_stderr": 0.015158521721486774
+        },
+        "anli_r2": {
+            "acc": 0.334,
+            "acc_stderr": 0.014922019523732967
+        },
+        "anli_r3": {
+            "acc": 0.37416666666666665,
+            "acc_stderr": 0.01397501560175897
+        },
+        "cb": {
+            "acc": 0.39285714285714285,
+            "acc_stderr": 0.0658538889806635,
+            "f1": 0.26788664379209554
+        },
+        "copa": {
+            "acc": 0.75,
+            "acc_stderr": 0.04351941398892446
+        },
+        "hellaswag": {
+            "acc": 0.45339573790081655,
+            "acc_stderr": 0.004968058944472161,
+            "acc_norm": 0.5857398924517029,
+            "acc_norm_stderr": 0.004915870966174404
+        },
+        "rte": {
+            "acc": 0.4657039711191336,
+            "acc_stderr": 0.030025579819366422
+        },
+        "winogrande": {
+            "acc": 0.5509076558800315,
+            "acc_stderr": 0.01397945938914086
+        },
+        "storycloze_2016": {
+            "acc": 0.6878674505611972,
+            "acc_stderr": 0.010715220346279681
+        },
+        "boolq": {
+            "acc": 0.517125382262997,
+            "acc_stderr": 0.008739923994130054
+        },
+        "arc_easy": {
+            "acc": 0.5307239057239057,
+            "acc_stderr": 0.010240395584815237,
+            "acc_norm": 0.5084175084175084,
+            "acc_norm_stderr": 0.010258329515226462
+        },
+        "arc_challenge": {
+            "acc": 0.2568259385665529,
+            "acc_stderr": 0.0127669237941168,
+            "acc_norm": 0.31399317406143346,
+            "acc_norm_stderr": 0.013562691224726297
+        },
+        "sciq": {
+            "acc": 0.799,
+            "acc_stderr": 0.012679107214617328,
+            "acc_norm": 0.751,
+            "acc_norm_stderr": 0.0136816002787023
+        },
+        "piqa": {
+            "acc": 0.7399347116430903,
+            "acc_stderr": 0.010234893249061303,
+            "acc_norm": 0.7328618063112078,
+            "acc_norm_stderr": 0.01032344049261243
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/rankeval/8b7178b4b_2.json b/8b7178b4b/evaluation/rankeval/8b7178b4b_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..b11af2a2b21df78c7fbe0624c9997898bb3e381a
--- /dev/null
+++ b/8b7178b4b/evaluation/rankeval/8b7178b4b_2.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.349,
+            "acc_stderr": 0.0150806639915631
+        },
+        "anli_r2": {
+            "acc": 0.335,
+            "acc_stderr": 0.014933117490932566
+        },
+        "anli_r3": {
+            "acc": 0.34833333333333333,
+            "acc_stderr": 0.013759437498874075
+        },
+        "cb": {
+            "acc": 0.375,
+            "acc_stderr": 0.06527912098338669,
+            "f1": 0.25396825396825395
+        },
+        "copa": {
+            "acc": 0.74,
+            "acc_stderr": 0.044084400227680794
+        },
+        "hellaswag": {
+            "acc": 0.45797649870543716,
+            "acc_stderr": 0.004972126523031945,
+            "acc_norm": 0.5928101971718781,
+            "acc_norm_stderr": 0.004903066639761946
+        },
+        "rte": {
+            "acc": 0.4981949458483754,
+            "acc_stderr": 0.030096267148976626
+        },
+        "winogrande": {
+            "acc": 0.5651144435674822,
+            "acc_stderr": 0.013932814110418013
+        },
+        "storycloze_2016": {
+            "acc": 0.6857295563869589,
+            "acc_stderr": 0.010735132285108176
+        },
+        "boolq": {
+            "acc": 0.5137614678899083,
+            "acc_stderr": 0.008741742106878655
+        },
+        "arc_easy": {
+            "acc": 0.5294612794612794,
+            "acc_stderr": 0.010241957728409683,
+            "acc_norm": 0.5105218855218855,
+            "acc_norm_stderr": 0.010257511546488227
+        },
+        "arc_challenge": {
+            "acc": 0.26791808873720135,
+            "acc_stderr": 0.012942030195136421,
+            "acc_norm": 0.3054607508532423,
+            "acc_norm_stderr": 0.013460080478002501
+        },
+        "sciq": {
+            "acc": 0.799,
+            "acc_stderr": 0.012679107214617331,
+            "acc_norm": 0.778,
+            "acc_norm_stderr": 0.013148721948877366
+        },
+        "piqa": {
+            "acc": 0.7328618063112078,
+            "acc_stderr": 0.010323440492612437,
+            "acc_norm": 0.7377584330794341,
+            "acc_norm_stderr": 0.010262502565172447
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/rankeval/8b7178b4b_2_lm-eval_global_step84877_2023-05-15-10-06-37_2shots_backup.json b/8b7178b4b/evaluation/rankeval/8b7178b4b_2_lm-eval_global_step84877_2023-05-15-10-06-37_2shots_backup.json
new file mode 100644
index 0000000000000000000000000000000000000000..b11af2a2b21df78c7fbe0624c9997898bb3e381a
--- /dev/null
+++ b/8b7178b4b/evaluation/rankeval/8b7178b4b_2_lm-eval_global_step84877_2023-05-15-10-06-37_2shots_backup.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.349,
+            "acc_stderr": 0.0150806639915631
+        },
+        "anli_r2": {
+            "acc": 0.335,
+            "acc_stderr": 0.014933117490932566
+        },
+        "anli_r3": {
+            "acc": 0.34833333333333333,
+            "acc_stderr": 0.013759437498874075
+        },
+        "cb": {
+            "acc": 0.375,
+            "acc_stderr": 0.06527912098338669,
+            "f1": 0.25396825396825395
+        },
+        "copa": {
+            "acc": 0.74,
+            "acc_stderr": 0.044084400227680794
+        },
+        "hellaswag": {
+            "acc": 0.45797649870543716,
+            "acc_stderr": 0.004972126523031945,
+            "acc_norm": 0.5928101971718781,
+            "acc_norm_stderr": 0.004903066639761946
+        },
+        "rte": {
+            "acc": 0.4981949458483754,
+            "acc_stderr": 0.030096267148976626
+        },
+        "winogrande": {
+            "acc": 0.5651144435674822,
+            "acc_stderr": 0.013932814110418013
+        },
+        "storycloze_2016": {
+            "acc": 0.6857295563869589,
+            "acc_stderr": 0.010735132285108176
+        },
+        "boolq": {
+            "acc": 0.5137614678899083,
+            "acc_stderr": 0.008741742106878655
+        },
+        "arc_easy": {
+            "acc": 0.5294612794612794,
+            "acc_stderr": 0.010241957728409683,
+            "acc_norm": 0.5105218855218855,
+            "acc_norm_stderr": 0.010257511546488227
+        },
+        "arc_challenge": {
+            "acc": 0.26791808873720135,
+            "acc_stderr": 0.012942030195136421,
+            "acc_norm": 0.3054607508532423,
+            "acc_norm_stderr": 0.013460080478002501
+        },
+        "sciq": {
+            "acc": 0.799,
+            "acc_stderr": 0.012679107214617331,
+            "acc_norm": 0.778,
+            "acc_norm_stderr": 0.013148721948877366
+        },
+        "piqa": {
+            "acc": 0.7328618063112078,
+            "acc_stderr": 0.010323440492612437,
+            "acc_norm": 0.7377584330794341,
+            "acc_norm_stderr": 0.010262502565172447
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/rankeval/8b7178b4b_3.json b/8b7178b4b/evaluation/rankeval/8b7178b4b_3.json
new file mode 100644
index 0000000000000000000000000000000000000000..8fccb7c1bfef42b9d3f433ddca9b0b1a4a931390
--- /dev/null
+++ b/8b7178b4b/evaluation/rankeval/8b7178b4b_3.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.356,
+            "acc_stderr": 0.01514904265930662
+        },
+        "anli_r2": {
+            "acc": 0.366,
+            "acc_stderr": 0.01524061272640576
+        },
+        "anli_r3": {
+            "acc": 0.35583333333333333,
+            "acc_stderr": 0.013826518748493315
+        },
+        "cb": {
+            "acc": 0.5,
+            "acc_stderr": 0.06741998624632421,
+            "f1": 0.34491725768321513
+        },
+        "copa": {
+            "acc": 0.71,
+            "acc_stderr": 0.045604802157206845
+        },
+        "hellaswag": {
+            "acc": 0.4587731527584147,
+            "acc_stderr": 0.004972790690640181,
+            "acc_norm": 0.5902210714997013,
+            "acc_norm_stderr": 0.004907877144720023
+        },
+        "rte": {
+            "acc": 0.4729241877256318,
+            "acc_stderr": 0.030052303463143706
+        },
+        "winogrande": {
+            "acc": 0.5406471981057617,
+            "acc_stderr": 0.014005973823825136
+        },
+        "storycloze_2016": {
+            "acc": 0.689470871191876,
+            "acc_stderr": 0.010700112173178448
+        },
+        "boolq": {
+            "acc": 0.5131498470948013,
+            "acc_stderr": 0.008742030090044975
+        },
+        "arc_easy": {
+            "acc": 0.5231481481481481,
+            "acc_stderr": 0.010248782484554471,
+            "acc_norm": 0.5046296296296297,
+            "acc_norm_stderr": 0.010259343705889728
+        },
+        "arc_challenge": {
+            "acc": 0.2713310580204778,
+            "acc_stderr": 0.012993807727545792,
+            "acc_norm": 0.310580204778157,
+            "acc_norm_stderr": 0.013522292098053054
+        },
+        "sciq": {
+            "acc": 0.795,
+            "acc_stderr": 0.012772554096113109,
+            "acc_norm": 0.783,
+            "acc_norm_stderr": 0.01304151375727071
+        },
+        "piqa": {
+            "acc": 0.7236126224156693,
+            "acc_stderr": 0.010434162388275624,
+            "acc_norm": 0.7312295973884657,
+            "acc_norm_stderr": 0.010343392940090011
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/rankeval/8b7178b4b_3_lm-eval_global_step84877_2023-05-15-10-06-37_3shots_backup.json b/8b7178b4b/evaluation/rankeval/8b7178b4b_3_lm-eval_global_step84877_2023-05-15-10-06-37_3shots_backup.json
new file mode 100644
index 0000000000000000000000000000000000000000..8fccb7c1bfef42b9d3f433ddca9b0b1a4a931390
--- /dev/null
+++ b/8b7178b4b/evaluation/rankeval/8b7178b4b_3_lm-eval_global_step84877_2023-05-15-10-06-37_3shots_backup.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.356,
+            "acc_stderr": 0.01514904265930662
+        },
+        "anli_r2": {
+            "acc": 0.366,
+            "acc_stderr": 0.01524061272640576
+        },
+        "anli_r3": {
+            "acc": 0.35583333333333333,
+            "acc_stderr": 0.013826518748493315
+        },
+        "cb": {
+            "acc": 0.5,
+            "acc_stderr": 0.06741998624632421,
+            "f1": 0.34491725768321513
+        },
+        "copa": {
+            "acc": 0.71,
+            "acc_stderr": 0.045604802157206845
+        },
+        "hellaswag": {
+            "acc": 0.4587731527584147,
+            "acc_stderr": 0.004972790690640181,
+            "acc_norm": 0.5902210714997013,
+            "acc_norm_stderr": 0.004907877144720023
+        },
+        "rte": {
+            "acc": 0.4729241877256318,
+            "acc_stderr": 0.030052303463143706
+        },
+        "winogrande": {
+            "acc": 0.5406471981057617,
+            "acc_stderr": 0.014005973823825136
+        },
+        "storycloze_2016": {
+            "acc": 0.689470871191876,
+            "acc_stderr": 0.010700112173178448
+        },
+        "boolq": {
+            "acc": 0.5131498470948013,
+            "acc_stderr": 0.008742030090044975
+        },
+        "arc_easy": {
+            "acc": 0.5231481481481481,
+            "acc_stderr": 0.010248782484554471,
+            "acc_norm": 0.5046296296296297,
+            "acc_norm_stderr": 0.010259343705889728
+        },
+        "arc_challenge": {
+            "acc": 0.2713310580204778,
+            "acc_stderr": 0.012993807727545792,
+            "acc_norm": 0.310580204778157,
+            "acc_norm_stderr": 0.013522292098053054
+        },
+        "sciq": {
+            "acc": 0.795,
+            "acc_stderr": 0.012772554096113109,
+            "acc_norm": 0.783,
+            "acc_norm_stderr": 0.01304151375727071
+        },
+        "piqa": {
+            "acc": 0.7236126224156693,
+            "acc_stderr": 0.010434162388275624,
+            "acc_norm": 0.7312295973884657,
+            "acc_norm_stderr": 0.010343392940090011
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/rankeval/8b7178b4b_4.json b/8b7178b4b/evaluation/rankeval/8b7178b4b_4.json
new file mode 100644
index 0000000000000000000000000000000000000000..de8d79ccfcb5c2bd6a45d878223d0caca2846740
--- /dev/null
+++ b/8b7178b4b/evaluation/rankeval/8b7178b4b_4.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.335,
+            "acc_stderr": 0.014933117490932572
+        },
+        "anli_r2": {
+            "acc": 0.344,
+            "acc_stderr": 0.015029633724408948
+        },
+        "anli_r3": {
+            "acc": 0.36916666666666664,
+            "acc_stderr": 0.01393666834928527
+        },
+        "cb": {
+            "acc": 0.4642857142857143,
+            "acc_stderr": 0.06724777654937658,
+            "f1": 0.3255198942180551
+        },
+        "copa": {
+            "acc": 0.69,
+            "acc_stderr": 0.04648231987117316
+        },
+        "hellaswag": {
+            "acc": 0.45518820952001593,
+            "acc_stderr": 0.004969701081068371,
+            "acc_norm": 0.5893248356901015,
+            "acc_norm_stderr": 0.004909509538525173
+        },
+        "rte": {
+            "acc": 0.4729241877256318,
+            "acc_stderr": 0.030052303463143706
+        },
+        "winogrande": {
+            "acc": 0.5438042620363063,
+            "acc_stderr": 0.01399845361092433
+        },
+        "storycloze_2016": {
+            "acc": 0.6964190272581507,
+            "acc_stderr": 0.010632901358518371
+        },
+        "boolq": {
+            "acc": 0.5165137614678899,
+            "acc_stderr": 0.008740284046486645
+        },
+        "arc_easy": {
+            "acc": 0.5378787878787878,
+            "acc_stderr": 0.010230299628864799,
+            "acc_norm": 0.5206228956228957,
+            "acc_norm_stderr": 0.010251052755716122
+        },
+        "arc_challenge": {
+            "acc": 0.26535836177474403,
+            "acc_stderr": 0.012902554762313967,
+            "acc_norm": 0.3037542662116041,
+            "acc_norm_stderr": 0.01343890918477876
+        },
+        "sciq": {
+            "acc": 0.806,
+            "acc_stderr": 0.012510816141264362,
+            "acc_norm": 0.777,
+            "acc_norm_stderr": 0.013169830843425672
+        },
+        "piqa": {
+            "acc": 0.719804134929271,
+            "acc_stderr": 0.01047812201557708,
+            "acc_norm": 0.7274211099020674,
+            "acc_norm_stderr": 0.010389256803296018
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/rankeval/8b7178b4b_4_lm-eval_global_step84877_2023-05-15-10-07-33_4shots_backup.json b/8b7178b4b/evaluation/rankeval/8b7178b4b_4_lm-eval_global_step84877_2023-05-15-10-07-33_4shots_backup.json
new file mode 100644
index 0000000000000000000000000000000000000000..de8d79ccfcb5c2bd6a45d878223d0caca2846740
--- /dev/null
+++ b/8b7178b4b/evaluation/rankeval/8b7178b4b_4_lm-eval_global_step84877_2023-05-15-10-07-33_4shots_backup.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.335,
+            "acc_stderr": 0.014933117490932572
+        },
+        "anli_r2": {
+            "acc": 0.344,
+            "acc_stderr": 0.015029633724408948
+        },
+        "anli_r3": {
+            "acc": 0.36916666666666664,
+            "acc_stderr": 0.01393666834928527
+        },
+        "cb": {
+            "acc": 0.4642857142857143,
+            "acc_stderr": 0.06724777654937658,
+            "f1": 0.3255198942180551
+        },
+        "copa": {
+            "acc": 0.69,
+            "acc_stderr": 0.04648231987117316
+        },
+        "hellaswag": {
+            "acc": 0.45518820952001593,
+            "acc_stderr": 0.004969701081068371,
+            "acc_norm": 0.5893248356901015,
+            "acc_norm_stderr": 0.004909509538525173
+        },
+        "rte": {
+            "acc": 0.4729241877256318,
+            "acc_stderr": 0.030052303463143706
+        },
+        "winogrande": {
+            "acc": 0.5438042620363063,
+            "acc_stderr": 0.01399845361092433
+        },
+        "storycloze_2016": {
+            "acc": 0.6964190272581507,
+            "acc_stderr": 0.010632901358518371
+        },
+        "boolq": {
+            "acc": 0.5165137614678899,
+            "acc_stderr": 0.008740284046486645
+        },
+        "arc_easy": {
+            "acc": 0.5378787878787878,
+            "acc_stderr": 0.010230299628864799,
+            "acc_norm": 0.5206228956228957,
+            "acc_norm_stderr": 0.010251052755716122
+        },
+        "arc_challenge": {
+            "acc": 0.26535836177474403,
+            "acc_stderr": 0.012902554762313967,
+            "acc_norm": 0.3037542662116041,
+            "acc_norm_stderr": 0.01343890918477876
+        },
+        "sciq": {
+            "acc": 0.806,
+            "acc_stderr": 0.012510816141264362,
+            "acc_norm": 0.777,
+            "acc_norm_stderr": 0.013169830843425672
+        },
+        "piqa": {
+            "acc": 0.719804134929271,
+            "acc_stderr": 0.01047812201557708,
+            "acc_norm": 0.7274211099020674,
+            "acc_norm_stderr": 0.010389256803296018
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/rankeval/8b7178b4b_5.json b/8b7178b4b/evaluation/rankeval/8b7178b4b_5.json
new file mode 100644
index 0000000000000000000000000000000000000000..a05a37eb46126eba95cca470120f827097cad7d5
--- /dev/null
+++ b/8b7178b4b/evaluation/rankeval/8b7178b4b_5.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.354,
+            "acc_stderr": 0.015129868238451772
+        },
+        "anli_r2": {
+            "acc": 0.36,
+            "acc_stderr": 0.015186527932040122
+        },
+        "anli_r3": {
+            "acc": 0.365,
+            "acc_stderr": 0.013903485981413582
+        },
+        "cb": {
+            "acc": 0.48214285714285715,
+            "acc_stderr": 0.0673769750864465,
+            "f1": 0.33071988595866
+        },
+        "copa": {
+            "acc": 0.71,
+            "acc_stderr": 0.045604802157206845
+        },
+        "hellaswag": {
+            "acc": 0.4561840270862378,
+            "acc_stderr": 0.004970585328297622,
+            "acc_norm": 0.5903206532563234,
+            "acc_norm_stderr": 0.004907694727935688
+        },
+        "rte": {
+            "acc": 0.5054151624548736,
+            "acc_stderr": 0.03009469812323996
+        },
+        "winogrande": {
+            "acc": 0.5406471981057617,
+            "acc_stderr": 0.014005973823825124
+        },
+        "storycloze_2016": {
+            "acc": 0.6990913949759487,
+            "acc_stderr": 0.010606289538707334
+        },
+        "boolq": {
+            "acc": 0.5116207951070336,
+            "acc_stderr": 0.008742692742551265
+        },
+        "arc_easy": {
+            "acc": 0.5269360269360269,
+            "acc_stderr": 0.01024488474062011,
+            "acc_norm": 0.5122053872053872,
+            "acc_norm_stderr": 0.010256726235129012
+        },
+        "arc_challenge": {
+            "acc": 0.27559726962457337,
+            "acc_stderr": 0.013057169655761838,
+            "acc_norm": 0.30119453924914674,
+            "acc_norm_stderr": 0.013406741767847617
+        },
+        "sciq": {
+            "acc": 0.817,
+            "acc_stderr": 0.012233587399477823,
+            "acc_norm": 0.79,
+            "acc_norm_stderr": 0.01288666233227455
+        },
+        "piqa": {
+            "acc": 0.720348204570185,
+            "acc_stderr": 0.01047189953030656,
+            "acc_norm": 0.7252448313384113,
+            "acc_norm_stderr": 0.010415033676676056
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file
diff --git a/8b7178b4b/evaluation/rankeval/8b7178b4b_5_lm-eval_global_step84877_2023-05-15-10-07-44_5shots_backup.json b/8b7178b4b/evaluation/rankeval/8b7178b4b_5_lm-eval_global_step84877_2023-05-15-10-07-44_5shots_backup.json
new file mode 100644
index 0000000000000000000000000000000000000000..a05a37eb46126eba95cca470120f827097cad7d5
--- /dev/null
+++ b/8b7178b4b/evaluation/rankeval/8b7178b4b_5_lm-eval_global_step84877_2023-05-15-10-07-44_5shots_backup.json
@@ -0,0 +1,87 @@
+{
+    "results": {
+        "anli_r1": {
+            "acc": 0.354,
+            "acc_stderr": 0.015129868238451772
+        },
+        "anli_r2": {
+            "acc": 0.36,
+            "acc_stderr": 0.015186527932040122
+        },
+        "anli_r3": {
+            "acc": 0.365,
+            "acc_stderr": 0.013903485981413582
+        },
+        "cb": {
+            "acc": 0.48214285714285715,
+            "acc_stderr": 0.0673769750864465,
+            "f1": 0.33071988595866
+        },
+        "copa": {
+            "acc": 0.71,
+            "acc_stderr": 0.045604802157206845
+        },
+        "hellaswag": {
+            "acc": 0.4561840270862378,
+            "acc_stderr": 0.004970585328297622,
+            "acc_norm": 0.5903206532563234,
+            "acc_norm_stderr": 0.004907694727935688
+        },
+        "rte": {
+            "acc": 0.5054151624548736,
+            "acc_stderr": 0.03009469812323996
+        },
+        "winogrande": {
+            "acc": 0.5406471981057617,
+            "acc_stderr": 0.014005973823825124
+        },
+        "storycloze_2016": {
+            "acc": 0.6990913949759487,
+            "acc_stderr": 0.010606289538707334
+        },
+        "boolq": {
+            "acc": 0.5116207951070336,
+            "acc_stderr": 0.008742692742551265
+        },
+        "arc_easy": {
+            "acc": 0.5269360269360269,
+            "acc_stderr": 0.01024488474062011,
+            "acc_norm": 0.5122053872053872,
+            "acc_norm_stderr": 0.010256726235129012
+        },
+        "arc_challenge": {
+            "acc": 0.27559726962457337,
+            "acc_stderr": 0.013057169655761838,
+            "acc_norm": 0.30119453924914674,
+            "acc_norm_stderr": 0.013406741767847617
+        },
+        "sciq": {
+            "acc": 0.817,
+            "acc_stderr": 0.012233587399477823,
+            "acc_norm": 0.79,
+            "acc_norm_stderr": 0.01288666233227455
+        },
+        "piqa": {
+            "acc": 0.720348204570185,
+            "acc_stderr": 0.01047189953030656,
+            "acc_norm": 0.7252448313384113,
+            "acc_norm_stderr": 0.010415033676676056
+        }
+    },
+    "versions": {
+        "anli_r1": 0,
+        "anli_r2": 0,
+        "anli_r3": 0,
+        "cb": 1,
+        "copa": 0,
+        "hellaswag": 0,
+        "rte": 0,
+        "winogrande": 0,
+        "storycloze_2016": 0,
+        "boolq": 1,
+        "arc_easy": 0,
+        "arc_challenge": 0,
+        "sciq": 0,
+        "piqa": 0
+    }
+}
\ No newline at end of file