diff --git a/app.py b/app.py index ec7c43ad54c24e86f78a8aab4b55bdfdf6bf8dda..396d3d4728d90d35e874dacb09f682beb4ea8d6f 100644 --- a/app.py +++ b/app.py @@ -1,9 +1,10 @@ -import os import json -import glob from collections import defaultdict +from pathlib import Path + import pandas as pd import gradio as gr + from content import * from css import * import glob @@ -16,74 +17,74 @@ BENCHMARKS = [ARC, HELLASWAG, MMLU, TRUTHFULQA] METRICS = ["acc_norm", "acc_norm", "acc_norm", "mc2"] -LANGS = 'ar,bn,ca,da,de,es,eu,fr,gu,hi,hr,hu,hy,id,it,kn,ml,mr,ne,nl,pt,ro,ru,sk,sr,sv,ta,te,uk,vi,zh'.split(',') +LANGS = "ar,bn,ca,da,de,es,eu,fr,gu,hi,hr,hu,hy,id,it,kn,ml,mr,ne,nl,pt,ro,ru,sk,sr,sv,ta,te,uk,vi,zh".split(",") LANG_NAME = { - 'ar': 'Arabic', - 'bn': 'Bengali', - 'ca': 'Catalan', - 'da': 'Danish', - 'de': 'German', - 'es': 'Spanish', - 'eu': 'Basque', - 'fr': 'French', - 'gu': 'Gujarati', - 'hi': 'Hindi', - 'hr': 'Croatian', - 'hu': 'Hungarian', - 'hy': 'Armenian', - 'id': 'Indonesian', - 'it': 'Italian', - 'kn': 'Kannada', - 'ml': 'Malayalam', - 'mr': 'Marathi', - 'ne': 'Nepali', - 'nl': 'Dutch', - 'pt': 'Portuguese', - 'ro': 'Romanian', - 'ru': 'Russian', - 'sk': 'Slovak', - 'sr': 'Serbian', - 'sv': 'Swedish', - 'ta': 'Tamil', - 'te': 'Telugu', - 'uk': 'Ukrainian', - 'vi': 'Vietnamese', - 'zh': 'Chinese' + "ar": "Arabic", + "bn": "Bengali", + "ca": "Catalan", + "da": "Danish", + "de": "German", + "es": "Spanish", + "eu": "Basque", + "fr": "French", + "gu": "Gujarati", + "hi": "Hindi", + "hr": "Croatian", + "hu": "Hungarian", + "hy": "Armenian", + "id": "Indonesian", + "it": "Italian", + "kn": "Kannada", + "ml": "Malayalam", + "mr": "Marathi", + "ne": "Nepali", + "nl": "Dutch", + "pt": "Portuguese", + "ro": "Romanian", + "ru": "Russian", + "sk": "Slovak", + "sr": "Serbian", + "sv": "Swedish", + "ta": "Tamil", + "te": "Telugu", + "uk": "Ukrainian", + "vi": "Vietnamese", + "zh": "Chinese", } def collect_results(): performance_dict = defaultdict(dict) pretrained_models = set() - for file in glob.glob('evals/*/*.json'): - with open(file, 'r') as f: - data = json.load(f) - if 'results' not in data: + for pfin in Path("evals").rglob("*.json"): + data = json.loads(pfin.read_text(encoding="utf-8")) + if "results" not in data: continue - if 'config' not in data: + if "config" not in data: continue - results = data['results'] - config = data['config'] - if 'model_args' not in config: + results = data["results"] + config = data["config"] + if "model_args" not in config: continue - model_args = config['model_args'].split(',') - pretrained = [x for x in model_args if x.startswith('pretrained=')] + model_args = config["model_args"].split(",") + pretrained = [x for x in model_args if x.startswith("pretrained=")] if len(pretrained) != 1: continue - pretrained = pretrained[0].split('=')[1] - pretrained = pretrained.split('/')[-1] + pretrained = pretrained[0].split("=")[1] + pretrained = pretrained.split("/")[-1] pretrained_models.add(pretrained) for lang_task, perfs in results.items(): - task, lang = lang_task.split('_') + task, lang = lang_task.split("_") assert task in BENCHMARKS if lang and task: metric = METRICS[BENCHMARKS.index(task)] p = round(perfs[metric] * 100, 1) performance_dict[(pretrained, lang)][task] = p + return performance_dict, pretrained_models @@ -96,15 +97,13 @@ def get_leaderboard_df(performance_dict, pretrained_models): mmlu_perf = perfs.get(MMLU, 0.0) truthfulqa_perf = perfs.get(TRUTHFULQA, 0.0) - if arc_perf * hellaswag_perf * mmlu_perf * truthfulqa_perf == 0: - continue avg = round((arc_perf + hellaswag_perf + mmlu_perf + truthfulqa_perf) / 4, 1) - notes = ' '.join([pretrained, lang_name]) - row = [pretrained, lang_name, lang, avg, arc_perf, hellaswag_perf, mmlu_perf, truthfulqa_perf, notes] + notes = " ".join([pretrained, lang_name]) + row = [pretrained, avg, arc_perf, hellaswag_perf, mmlu_perf, truthfulqa_perf, notes] df.append(row) df = pd.DataFrame.from_records(df, columns=COLS) - df = df.sort_values(by=[LANG_COL, AVERAGE_COL], ascending=False) + df = df.sort_values(by=[AVERAGE_COL], ascending=False) df = df[COLS] return df @@ -115,10 +114,7 @@ def search_table(df, query): return filtered_df - MODEL_COL = "Model" -LANG_COL = "Language" -CODE_COL = "Code" AVERAGE_COL = "Average" ARC_COL = "ARC (25-shot)" HELLASWAG_COL = "HellaSwag (10-shot)️" @@ -126,8 +122,8 @@ MMLU_COL = "MMLU (5-shot)" TRUTHFULQA_COL = "TruthfulQA (0-shot)" NOTES_COL = "Notes" # For search only -COLS = [MODEL_COL, LANG_COL, CODE_COL, AVERAGE_COL, ARC_COL, HELLASWAG_COL, MMLU_COL, TRUTHFULQA_COL, NOTES_COL] -TYPES = ["str", "str", "str", "number", "number", "number", "number", "number", "str"] +COLS = [MODEL_COL, AVERAGE_COL, ARC_COL, HELLASWAG_COL, MMLU_COL, TRUTHFULQA_COL, NOTES_COL] +TYPES = ["str", "number", "number", "number", "number", "number", "str"] args = collect_results() original_df = get_leaderboard_df(*args) @@ -139,9 +135,7 @@ with demo: gr.Markdown(HOW_TO, elem_classes="markdown-text") with gr.Box(): - search_bar = gr.Textbox( - placeholder="Search models and languages...", show_label=False, elem_id="search-bar" - ) + search_bar = gr.Textbox(placeholder="Search models and languages...", show_label=False, elem_id="search-bar") leaderboard_table = gr.components.Dataframe( value=original_df, diff --git a/css.py b/css.py index a476733d83bfe934665f06fb222097392e2db88c..d0af6b59ea2531da0624c61b07d250c76b787a70 100644 --- a/css.py +++ b/css.py @@ -1,4 +1,4 @@ -CUSTOM_CSS= """ +CUSTOM_CSS = """ /* Hides the final column */ table td:last-child, table th:last-child { @@ -10,4 +10,4 @@ table th:last-child { # overflow: auto; # white-space: nowrap; # } -""" \ No newline at end of file +""" diff --git a/evals/arc/arc_ar-bloom-7b1.json b/evals/arc/arc_ar-bloom-7b1.json deleted file mode 100644 index 66c115459f73a74be6bd4b1b3933509010a82342..0000000000000000000000000000000000000000 --- a/evals/arc/arc_ar-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ar": { - "acc": 0.2634730538922156, - "acc_stderr": 0.012889646336321774, - "acc_norm": 0.31394354148845166, - "acc_norm_stderr": 0.013579515768185788 - } - }, - "versions": { - "arc_ar": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_ar-llama-7B.json b/evals/arc/arc_ar-llama-7B.json deleted file mode 100644 index 31293a19637055f69dbf3fb11cadfd2fde391402..0000000000000000000000000000000000000000 --- a/evals/arc/arc_ar-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ar": { - "acc": 0.19760479041916168, - "acc_stderr": 0.011651221980953499, - "acc_norm": 0.24636441402908468, - "acc_norm_stderr": 0.012608059960468694 - } - }, - "versions": { - "arc_ar": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_bn-bloom-7b1.json b/evals/arc/arc_bn-bloom-7b1.json deleted file mode 100644 index b7b877a4a649f59197b24de7b3ec917785979683..0000000000000000000000000000000000000000 --- a/evals/arc/arc_bn-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_bn": { - "acc": 0.22412318220701455, - "acc_stderr": 0.012201644195165715, - "acc_norm": 0.2617621899059025, - "acc_norm_stderr": 0.012862641889254466 - } - }, - "versions": { - "arc_bn": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_bn-llama-7B.json b/evals/arc/arc_bn-llama-7B.json deleted file mode 100644 index 1dafcad0f0dbcae9d42395e2697e1ddc5c1ba0c2..0000000000000000000000000000000000000000 --- a/evals/arc/arc_bn-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_bn": { - "acc": 0.1899059024807528, - "acc_stderr": 0.011476660752315397, - "acc_norm": 0.2583404619332763, - "acc_norm_stderr": 0.012807875214816267 - } - }, - "versions": { - "arc_bn": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_ca-bloom-7b1.json b/evals/arc/arc_ca-bloom-7b1.json deleted file mode 100644 index f0a15e06750a49e5570198c619957cce3e35cf0c..0000000000000000000000000000000000000000 --- a/evals/arc/arc_ca-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ca": { - "acc": 0.31989708404802747, - "acc_stderr": 0.01366562491926326, - "acc_norm": 0.34734133790737565, - "acc_norm_stderr": 0.013949489903701517 - } - }, - "versions": { - "arc_ca": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_ca-llama-7B.json b/evals/arc/arc_ca-llama-7B.json deleted file mode 100644 index f0e3b53912555842b913d4cc78b61de1b70a2380..0000000000000000000000000000000000000000 --- a/evals/arc/arc_ca-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ca": { - "acc": 0.3276157804459691, - "acc_stderr": 0.01375080741597368, - "acc_norm": 0.3507718696397942, - "acc_norm_stderr": 0.013981316936172217 - } - }, - "versions": { - "arc_ca": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_da-bloom-7b1.json b/evals/arc/arc_da-bloom-7b1.json deleted file mode 100644 index 1f4e588f7cac0716c4285f186e6d2aa122ee795d..0000000000000000000000000000000000000000 --- a/evals/arc/arc_da-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_da": { - "acc": 0.20137103684661525, - "acc_stderr": 0.011744154502532795, - "acc_norm": 0.24592973436161097, - "acc_norm_stderr": 0.012611366681285752 - } - }, - "versions": { - "arc_da": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_da-llama-7B.json b/evals/arc/arc_da-llama-7B.json deleted file mode 100644 index 814a2fb017691ccd12afbf034c490e10a646843e..0000000000000000000000000000000000000000 --- a/evals/arc/arc_da-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_da": { - "acc": 0.286203941730934, - "acc_stderr": 0.013236574332463879, - "acc_norm": 0.3273350471293916, - "acc_norm_stderr": 0.013741887176251822 - } - }, - "versions": { - "arc_da": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_de-bloom-7b1.json b/evals/arc/arc_de-bloom-7b1.json deleted file mode 100644 index 205cbe1e5a60177701994fa2eca97338da50bd02..0000000000000000000000000000000000000000 --- a/evals/arc/arc_de-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_de": { - "acc": 0.22241231822070145, - "acc_stderr": 0.012168377742629776, - "acc_norm": 0.262617621899059, - "acc_norm_stderr": 0.01287617552045283 - } - }, - "versions": { - "arc_de": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_de-llama-7B.json b/evals/arc/arc_de-llama-7B.json deleted file mode 100644 index f13cfc00bfd0ac6e8b6e48a5c0bc3b99c3140b69..0000000000000000000000000000000000000000 --- a/evals/arc/arc_de-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_de": { - "acc": 0.2951240376390077, - "acc_stderr": 0.013345572865502645, - "acc_norm": 0.35072711719418304, - "acc_norm_stderr": 0.013962940383743043 - } - }, - "versions": { - "arc_de": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_es-bloom-7b1.json b/evals/arc/arc_es-bloom-7b1.json deleted file mode 100644 index 39a5c5211ff20ef49014baa232a8ea2a9d8884be..0000000000000000000000000000000000000000 --- a/evals/arc/arc_es-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_es": { - "acc": 0.3316239316239316, - "acc_stderr": 0.013769752111910177, - "acc_norm": 0.3811965811965812, - "acc_norm_stderr": 0.01420507709573084 - } - }, - "versions": { - "arc_es": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_es-llama-7B.json b/evals/arc/arc_es-llama-7B.json deleted file mode 100644 index 11544ff8942a30c3fb128aa473ea30d88443b0e6..0000000000000000000000000000000000000000 --- a/evals/arc/arc_es-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_es": { - "acc": 0.3606837606837607, - "acc_stderr": 0.014044746572948867, - "acc_norm": 0.3683760683760684, - "acc_norm_stderr": 0.014108074259155369 - } - }, - "versions": { - "arc_es": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_eu-bloom-7b1.json b/evals/arc/arc_eu-bloom-7b1.json deleted file mode 100644 index 156fd60ab449125d255226262654e5337e4cb697..0000000000000000000000000000000000000000 --- a/evals/arc/arc_eu-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_eu": { - "acc": 0.22056239015817222, - "acc_stderr": 0.01229634886589257, - "acc_norm": 0.2521968365553603, - "acc_norm_stderr": 0.012879032347922939 - } - }, - "versions": { - "arc_eu": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_eu-llama-7B.json b/evals/arc/arc_eu-llama-7B.json deleted file mode 100644 index 10a039f055cb172c7978f840a54bec6cc724948c..0000000000000000000000000000000000000000 --- a/evals/arc/arc_eu-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_eu": { - "acc": 0.20738137082601055, - "acc_stderr": 0.012023662461166562, - "acc_norm": 0.2451669595782074, - "acc_norm_stderr": 0.012757811738008544 - } - }, - "versions": { - "arc_eu": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_fr-bloom-7b1.json b/evals/arc/arc_fr-bloom-7b1.json deleted file mode 100644 index 78cbf1e3cfc337f169be33735f919ab397b8d085..0000000000000000000000000000000000000000 --- a/evals/arc/arc_fr-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_fr": { - "acc": 0.32677502138579984, - "acc_stderr": 0.01372407602199982, - "acc_norm": 0.3669803250641574, - "acc_norm_stderr": 0.014102904772197396 - } - }, - "versions": { - "arc_fr": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_fr-llama-7B.json b/evals/arc/arc_fr-llama-7B.json deleted file mode 100644 index c79866a45e043e6b6e5e139f5ac63dfb8b522f27..0000000000000000000000000000000000000000 --- a/evals/arc/arc_fr-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_fr": { - "acc": 0.3473053892215569, - "acc_stderr": 0.013931226499492353, - "acc_norm": 0.3729683490162532, - "acc_norm_stderr": 0.014150093168782438 - } - }, - "versions": { - "arc_fr": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_gu-bloom-7b1.json b/evals/arc/arc_gu-bloom-7b1.json deleted file mode 100644 index c78878020cb8341b5adb388627ffa309dde3ad3a..0000000000000000000000000000000000000000 --- a/evals/arc/arc_gu-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_gu": { - "acc": 0.2206896551724138, - "acc_stderr": 0.012181604374453973, - "acc_norm": 0.2336206896551724, - "acc_norm_stderr": 0.012428989430945793 - } - }, - "versions": { - "arc_gu": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_gu-llama-7B.json b/evals/arc/arc_gu-llama-7B.json deleted file mode 100644 index afadd880b353d2482c13ab85d24811ac5ea5fd57..0000000000000000000000000000000000000000 --- a/evals/arc/arc_gu-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_gu": { - "acc": 0.2120689655172414, - "acc_stderr": 0.012007177871292825, - "acc_norm": 0.23189655172413792, - "acc_norm_stderr": 0.012396962423413033 - } - }, - "versions": { - "arc_gu": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_hi-bloom-7b1.json b/evals/arc/arc_hi-bloom-7b1.json deleted file mode 100644 index 70136df6c1f9731ab888c323fa0128c0beb43524..0000000000000000000000000000000000000000 --- a/evals/arc/arc_hi-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hi": { - "acc": 0.2363013698630137, - "acc_stderr": 0.012435369590403731, - "acc_norm": 0.2919520547945205, - "acc_norm_stderr": 0.013309191484613488 - } - }, - "versions": { - "arc_hi": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_hi-llama-7B.json b/evals/arc/arc_hi-llama-7B.json deleted file mode 100644 index ddcd58ade570221ad656710d0944a241789b1d8b..0000000000000000000000000000000000000000 --- a/evals/arc/arc_hi-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hi": { - "acc": 0.21232876712328766, - "acc_stderr": 0.011971304657273123, - "acc_norm": 0.25, - "acc_norm_stderr": 0.012675503164084846 - } - }, - "versions": { - "arc_hi": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_hr-bloom-7b1.json b/evals/arc/arc_hr-bloom-7b1.json deleted file mode 100644 index 80efc06ef94471b0b04935089a967e72d9e2095e..0000000000000000000000000000000000000000 --- a/evals/arc/arc_hr-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hr": { - "acc": 0.19332763045337895, - "acc_stderr": 0.011555111310342437, - "acc_norm": 0.2369546621043627, - "acc_norm_stderr": 0.012441890624187792 - } - }, - "versions": { - "arc_hr": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_hr-llama-7B.json b/evals/arc/arc_hr-llama-7B.json deleted file mode 100644 index 9c50fa3252a0133486190ed9d5cbc497e1a17fe9..0000000000000000000000000000000000000000 --- a/evals/arc/arc_hr-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hr": { - "acc": 0.2754491017964072, - "acc_stderr": 0.01307174925264165, - "acc_norm": 0.330196749358426, - "acc_norm_stderr": 0.013760638974726852 - } - }, - "versions": { - "arc_hr": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_hu-bloom-7b1.json b/evals/arc/arc_hu-bloom-7b1.json deleted file mode 100644 index 3c7e8773a07af63cf8522b314bbd0611c37c7b98..0000000000000000000000000000000000000000 --- a/evals/arc/arc_hu-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hu": { - "acc": 0.1969178082191781, - "acc_stderr": 0.011640913614197496, - "acc_norm": 0.2585616438356164, - "acc_norm_stderr": 0.0128169339627777 - } - }, - "versions": { - "arc_hu": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_hu-llama-7B.json b/evals/arc/arc_hu-llama-7B.json deleted file mode 100644 index ac3191180768a88cd6c937d51bf005adb11c7ccf..0000000000000000000000000000000000000000 --- a/evals/arc/arc_hu-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hu": { - "acc": 0.2517123287671233, - "acc_stderr": 0.012704310825494622, - "acc_norm": 0.2979452054794521, - "acc_norm_stderr": 0.013388079339102703 - } - }, - "versions": { - "arc_hu": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_hy-bloom-7b1.json b/evals/arc/arc_hy-bloom-7b1.json deleted file mode 100644 index d138545e18f6bb49f13d11bd9cd3b515db23815b..0000000000000000000000000000000000000000 --- a/evals/arc/arc_hy-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hy": { - "acc": 0.21181818181818182, - "acc_stderr": 0.01232525683396216, - "acc_norm": 0.26181818181818184, - "acc_norm_stderr": 0.013261197012809796 - } - }, - "versions": { - "arc_hy": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_hy-llama-7B.json b/evals/arc/arc_hy-llama-7B.json deleted file mode 100644 index 35e46c981f8bc3bf9374fdf6ad4b483f4c65762b..0000000000000000000000000000000000000000 --- a/evals/arc/arc_hy-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_hy": { - "acc": 0.19454545454545455, - "acc_stderr": 0.011940766785664334, - "acc_norm": 0.2718181818181818, - "acc_norm_stderr": 0.013420241182110736 - } - }, - "versions": { - "arc_hy": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_id-bloom-7b1.json b/evals/arc/arc_id-bloom-7b1.json deleted file mode 100644 index a2cc8cf230eda88935959ff54b9ded1986940b84..0000000000000000000000000000000000000000 --- a/evals/arc/arc_id-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_id": { - "acc": 0.3128205128205128, - "acc_stderr": 0.013560492090917607, - "acc_norm": 0.3598290598290598, - "acc_norm_stderr": 0.014037469945597791 - } - }, - "versions": { - "arc_id": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_id-llama-7B.json b/evals/arc/arc_id-llama-7B.json deleted file mode 100644 index 59fcc7ff10a29c0f82833ce5df7a260a8d4bbd42..0000000000000000000000000000000000000000 --- a/evals/arc/arc_id-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_id": { - "acc": 0.19316239316239317, - "acc_stderr": 0.011546413314069014, - "acc_norm": 0.26666666666666666, - "acc_norm_stderr": 0.012933850109759573 - } - }, - "versions": { - "arc_id": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_it-bloom-7b1.json b/evals/arc/arc_it-bloom-7b1.json deleted file mode 100644 index 7eda117416da15b68b1713aa6ef9ff77e69fd826..0000000000000000000000000000000000000000 --- a/evals/arc/arc_it-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_it": { - "acc": 0.24037639007698888, - "acc_stderr": 0.01250327289928353, - "acc_norm": 0.28999144568006846, - "acc_norm_stderr": 0.01327709194338097 - } - }, - "versions": { - "arc_it": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_it-llama-7B.json b/evals/arc/arc_it-llama-7B.json deleted file mode 100644 index 76b8875276c1b0078d3d087c16397df3b3ea9200..0000000000000000000000000000000000000000 --- a/evals/arc/arc_it-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_it": { - "acc": 0.31736526946107785, - "acc_stderr": 0.013619227292898307, - "acc_norm": 0.3575705731394354, - "acc_norm_stderr": 0.014024008839912006 - } - }, - "versions": { - "arc_it": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_kn-bloom-7b1.json b/evals/arc/arc_kn-bloom-7b1.json deleted file mode 100644 index e92b7d0d555bc117110f34dbbc68d327f5092f5f..0000000000000000000000000000000000000000 --- a/evals/arc/arc_kn-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_kn": { - "acc": 0.2221254355400697, - "acc_stderr": 0.012273607270054452, - "acc_norm": 0.24738675958188153, - "acc_norm_stderr": 0.012740675198098838 - } - }, - "versions": { - "arc_kn": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_kn-llama-7B.json b/evals/arc/arc_kn-llama-7B.json deleted file mode 100644 index 39ae5661b6403f677d4427689194c417f1f2f8b5..0000000000000000000000000000000000000000 --- a/evals/arc/arc_kn-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_kn": { - "acc": 0.20470383275261325, - "acc_stderr": 0.011913674295957856, - "acc_norm": 0.24738675958188153, - "acc_norm_stderr": 0.012740675198098834 - } - }, - "versions": { - "arc_kn": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_ml-bloom-7b1.json b/evals/arc/arc_ml-bloom-7b1.json deleted file mode 100644 index f7c83104b2f7701b8a7af344179886c58a0e89a0..0000000000000000000000000000000000000000 --- a/evals/arc/arc_ml-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ml": { - "acc": 0.2075306479859895, - "acc_stderr": 0.01200575665793095, - "acc_norm": 0.2635726795096322, - "acc_norm_stderr": 0.013042844591075362 - } - }, - "versions": { - "arc_ml": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_ml-llama-7B.json b/evals/arc/arc_ml-llama-7B.json deleted file mode 100644 index fc465c13860754471e99430d5e6c5e1df5046b2e..0000000000000000000000000000000000000000 --- a/evals/arc/arc_ml-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ml": { - "acc": 0.21628721541155868, - "acc_stderr": 0.012188522634632977, - "acc_norm": 0.27845884413309985, - "acc_norm_stderr": 0.013269918016014967 - } - }, - "versions": { - "arc_ml": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_mr-bloom-7b1.json b/evals/arc/arc_mr-bloom-7b1.json deleted file mode 100644 index cb854d6690652622f9f24d8c241c70b1cab749f9..0000000000000000000000000000000000000000 --- a/evals/arc/arc_mr-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_mr": { - "acc": 0.23376623376623376, - "acc_stderr": 0.012458582396003653, - "acc_norm": 0.2727272727272727, - "acc_norm_stderr": 0.013110221561502926 - } - }, - "versions": { - "arc_mr": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_mr-llama-7B.json b/evals/arc/arc_mr-llama-7B.json deleted file mode 100644 index 0755f8ce24bf655025ef6eb6414570573beb9858..0000000000000000000000000000000000000000 --- a/evals/arc/arc_mr-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_mr": { - "acc": 0.2051948051948052, - "acc_stderr": 0.011888050053276677, - "acc_norm": 0.2545454545454545, - "acc_norm_stderr": 0.012823020964319998 - } - }, - "versions": { - "arc_mr": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_ne-bloom-7b1.json b/evals/arc/arc_ne-bloom-7b1.json deleted file mode 100644 index 8642b825a874e720a4bb8c0f92ff6fc304357c9f..0000000000000000000000000000000000000000 --- a/evals/arc/arc_ne-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ne": { - "acc": 0.21300256629597947, - "acc_stderr": 0.01198002307808546, - "acc_norm": 0.223267750213858, - "acc_norm_stderr": 0.012185048029719049 - } - }, - "versions": { - "arc_ne": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_ne-llama-7B.json b/evals/arc/arc_ne-llama-7B.json deleted file mode 100644 index e20341882d82d53d339ccb9e726250d842765069..0000000000000000000000000000000000000000 --- a/evals/arc/arc_ne-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ne": { - "acc": 0.2172797262617622, - "acc_stderr": 0.012066782166932105, - "acc_norm": 0.24294268605645852, - "acc_norm_stderr": 0.012548588352773893 - } - }, - "versions": { - "arc_ne": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_nl_Llama-2-7b-chat-hf.json b/evals/arc/arc_nl_Llama-2-7b-chat-hf.json new file mode 100644 index 0000000000000000000000000000000000000000..d95e22e17c312755971c6aec7f376d25ab3f159e --- /dev/null +++ b/evals/arc/arc_nl_Llama-2-7b-chat-hf.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_nl": { + "acc": 0.3609923011120616, + "acc_stderr": 0.014053373664144792, + "acc_norm": 0.3618477331052181, + "acc_norm_stderr": 0.014060593893704966 + } + }, + "versions": { + "arc_nl": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=meta-llama/Llama-2-7b-chat-hf,use_accelerate=True,device_map_option=auto,dtype=bfloat16", + "batch_size": 8, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_nl_Llama-2-7b-hf.json b/evals/arc/arc_nl_Llama-2-7b-hf.json new file mode 100644 index 0000000000000000000000000000000000000000..3a203f2de2ff7ea6dca18093dfde7757ccf55eca --- /dev/null +++ b/evals/arc/arc_nl_Llama-2-7b-hf.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_nl": { + "acc": 0.33704020530367834, + "acc_stderr": 0.013831300903580639, + "acc_norm": 0.3567151411462789, + "acc_norm_stderr": 0.014016546277185005 + } + }, + "versions": { + "arc_nl": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,device_map_option=auto,dtype=bfloat16", + "batch_size": 8, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_nl_Mistral-7B-v0.1.json b/evals/arc/arc_nl_Mistral-7B-v0.1.json new file mode 100644 index 0000000000000000000000000000000000000000..e69cc570fa8ab39caac1d704af5c19c2a53baf3d --- /dev/null +++ b/evals/arc/arc_nl_Mistral-7B-v0.1.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_nl": { + "acc": 0.42087254063301965, + "acc_stderr": 0.014445778557368833, + "acc_norm": 0.4294268605645851, + "acc_norm_stderr": 0.014483677397351059 + } + }, + "versions": { + "arc_nl": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=mistralai/Mistral-7B-v0.1,use_accelerate=True,device_map_option=auto,dtype=bfloat16", + "batch_size": 8, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_nl_zephyr-7b-beta.json b/evals/arc/arc_nl_zephyr-7b-beta.json new file mode 100644 index 0000000000000000000000000000000000000000..af6a67755466fb649a9285fcd45d0ccdf6fa1116 --- /dev/null +++ b/evals/arc/arc_nl_zephyr-7b-beta.json @@ -0,0 +1,23 @@ +{ + "results": { + "arc_nl": { + "acc": 0.43798118049615054, + "acc_stderr": 0.01451716231691793, + "acc_norm": 0.4328485885372113, + "acc_norm_stderr": 0.01449759923259859 + } + }, + "versions": { + "arc_nl": 0 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=HuggingFaceH4/zephyr-7b-beta,use_accelerate=True,device_map_option=auto,dtype=bfloat16", + "batch_size": 8, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/arc/arc_pt-bloom-7b1.json b/evals/arc/arc_pt-bloom-7b1.json deleted file mode 100644 index 880d8570463408853523eec06407b3c8ed9e5b11..0000000000000000000000000000000000000000 --- a/evals/arc/arc_pt-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_pt": { - "acc": 0.3401709401709402, - "acc_stderr": 0.013856612397310694, - "acc_norm": 0.4, - "acc_norm_stderr": 0.014328422047021531 - } - }, - "versions": { - "arc_pt": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_pt-llama-7B.json b/evals/arc/arc_pt-llama-7B.json deleted file mode 100644 index 0a856face8fef0cab72d3cda7305f6949d011ce3..0000000000000000000000000000000000000000 --- a/evals/arc/arc_pt-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_pt": { - "acc": 0.3367521367521368, - "acc_stderr": 0.01382247630777062, - "acc_norm": 0.37777777777777777, - "acc_norm_stderr": 0.014180244103534094 - } - }, - "versions": { - "arc_pt": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_ro-bloom-7b1.json b/evals/arc/arc_ro-bloom-7b1.json deleted file mode 100644 index 083766c1f50d79393939908a8f8837dcc7cb697d..0000000000000000000000000000000000000000 --- a/evals/arc/arc_ro-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ro": { - "acc": 0.2099400171379606, - "acc_stderr": 0.011926921791273557, - "acc_norm": 0.26906598114824337, - "acc_norm_stderr": 0.012987310039914976 - } - }, - "versions": { - "arc_ro": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_ro-llama-7B.json b/evals/arc/arc_ro-llama-7B.json deleted file mode 100644 index eab2e4a70b967696417355b0d11bd69cabf3ddc5..0000000000000000000000000000000000000000 --- a/evals/arc/arc_ro-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ro": { - "acc": 0.30077120822622105, - "acc_stderr": 0.013430077114209907, - "acc_norm": 0.32390745501285345, - "acc_norm_stderr": 0.013704533924425027 - } - }, - "versions": { - "arc_ro": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_ru-bloom-7b1.json b/evals/arc/arc_ru-bloom-7b1.json deleted file mode 100644 index 1ff9ed6089fca2642658a8e6f9f74471739e87e6..0000000000000000000000000000000000000000 --- a/evals/arc/arc_ru-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ru": { - "acc": 0.21043627031650983, - "acc_stderr": 0.01192703439080346, - "acc_norm": 0.2754491017964072, - "acc_norm_stderr": 0.01307174925264165 - } - }, - "versions": { - "arc_ru": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_ru-llama-7B.json b/evals/arc/arc_ru-llama-7B.json deleted file mode 100644 index f62854eef188594fdc60a93341410fac7a49fa14..0000000000000000000000000000000000000000 --- a/evals/arc/arc_ru-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ru": { - "acc": 0.2934131736526946, - "acc_stderr": 0.013322973103306575, - "acc_norm": 0.32078699743370404, - "acc_norm_stderr": 0.013658089444975752 - } - }, - "versions": { - "arc_ru": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_sk-bloom-7b1.json b/evals/arc/arc_sk-bloom-7b1.json deleted file mode 100644 index 4404e57e2290a69cce8029b89f0939593bbe7d8e..0000000000000000000000000000000000000000 --- a/evals/arc/arc_sk-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sk": { - "acc": 0.20359281437125748, - "acc_stderr": 0.011782227020010716, - "acc_norm": 0.24893071000855432, - "acc_norm_stderr": 0.012651960282598879 - } - }, - "versions": { - "arc_sk": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_sk-llama-7B.json b/evals/arc/arc_sk-llama-7B.json deleted file mode 100644 index b018df9a5453495bb3ff51f8908c88c064d888a4..0000000000000000000000000000000000000000 --- a/evals/arc/arc_sk-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sk": { - "acc": 0.23609923011120615, - "acc_stderr": 0.012426371635795894, - "acc_norm": 0.28999144568006846, - "acc_norm_stderr": 0.013277091943380979 - } - }, - "versions": { - "arc_sk": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_sr-bloom-7b1.json b/evals/arc/arc_sr-bloom-7b1.json deleted file mode 100644 index ca68a7fae3c2920f66e9f6948396528ea7efe421..0000000000000000000000000000000000000000 --- a/evals/arc/arc_sr-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sr": { - "acc": 0.2172797262617622, - "acc_stderr": 0.012066782166932079, - "acc_norm": 0.25149700598802394, - "acc_norm_stderr": 0.01269526466186626 - } - }, - "versions": { - "arc_sr": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_sr-llama-7B.json b/evals/arc/arc_sr-llama-7B.json deleted file mode 100644 index dbe0e415ecd651a7afbe25423df0f79ddbf30b59..0000000000000000000000000000000000000000 --- a/evals/arc/arc_sr-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sr": { - "acc": 0.25748502994011974, - "acc_stderr": 0.012794024494042348, - "acc_norm": 0.30795551753635586, - "acc_norm_stderr": 0.013507954174822524 - } - }, - "versions": { - "arc_sr": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_sv-bloom-7b1.json b/evals/arc/arc_sv-bloom-7b1.json deleted file mode 100644 index e602b4d12926dbb93b567be032a836cb50b2ff51..0000000000000000000000000000000000000000 --- a/evals/arc/arc_sv-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sv": { - "acc": 0.20515021459227467, - "acc_stderr": 0.011835920197074948, - "acc_norm": 0.2515021459227468, - "acc_norm_stderr": 0.012717145410329311 - } - }, - "versions": { - "arc_sv": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_sv-llama-7B.json b/evals/arc/arc_sv-llama-7B.json deleted file mode 100644 index 3cacd9bbf330b2d6be85b2903f5d124c0045cc94..0000000000000000000000000000000000000000 --- a/evals/arc/arc_sv-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_sv": { - "acc": 0.303862660944206, - "acc_stderr": 0.013480613043590443, - "acc_norm": 0.34935622317596565, - "acc_norm_stderr": 0.013974278424227307 - } - }, - "versions": { - "arc_sv": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_ta-bloom-7b1.json b/evals/arc/arc_ta-bloom-7b1.json deleted file mode 100644 index 68a6f4875888d86505752626ba4a52fd12cc3c84..0000000000000000000000000000000000000000 --- a/evals/arc/arc_ta-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ta": { - "acc": 0.22942206654991243, - "acc_stderr": 0.01244752638770244, - "acc_norm": 0.24168126094570927, - "acc_norm_stderr": 0.012673733216040754 - } - }, - "versions": { - "arc_ta": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_ta-llama-7B.json b/evals/arc/arc_ta-llama-7B.json deleted file mode 100644 index d7c697739212d1bec5e84f1a4e6f0017d500ecc7..0000000000000000000000000000000000000000 --- a/evals/arc/arc_ta-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_ta": { - "acc": 0.2075306479859895, - "acc_stderr": 0.012005756657930957, - "acc_norm": 0.27495621716287216, - "acc_norm_stderr": 0.013218161880960047 - } - }, - "versions": { - "arc_ta": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_te-bloom-7b1.json b/evals/arc/arc_te-bloom-7b1.json deleted file mode 100644 index 1be31afe5307f0b3c626e305437b1932d4457b68..0000000000000000000000000000000000000000 --- a/evals/arc/arc_te-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_te": { - "acc": 0.20175438596491227, - "acc_stderr": 0.01189098690363561, - "acc_norm": 0.24298245614035088, - "acc_norm_stderr": 0.01270803987901337 - } - }, - "versions": { - "arc_te": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_te-llama-7B.json b/evals/arc/arc_te-llama-7B.json deleted file mode 100644 index f84a1b907c92965f5829cbd68e89759d2d1ef9d7..0000000000000000000000000000000000000000 --- a/evals/arc/arc_te-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_te": { - "acc": 0.2026315789473684, - "acc_stderr": 0.011910259341316062, - "acc_norm": 0.2517543859649123, - "acc_norm_stderr": 0.012860230436368953 - } - }, - "versions": { - "arc_te": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_uk-bloom-7b1.json b/evals/arc/arc_uk-bloom-7b1.json deleted file mode 100644 index 05233ff08727d5cac7dd74429dbc024eb5fd5f4f..0000000000000000000000000000000000000000 --- a/evals/arc/arc_uk-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_uk": { - "acc": 0.1958939264328486, - "acc_stderr": 0.011613035012800898, - "acc_norm": 0.2275449101796407, - "acc_norm_stderr": 0.012267293637033645 - } - }, - "versions": { - "arc_uk": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_uk-llama-7B.json b/evals/arc/arc_uk-llama-7B.json deleted file mode 100644 index 717afd73b3550c42e809f9bdb7fac834e805b5ee..0000000000000000000000000000000000000000 --- a/evals/arc/arc_uk-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_uk": { - "acc": 0.28999144568006846, - "acc_stderr": 0.013277091943380968, - "acc_norm": 0.32934131736526945, - "acc_norm_stderr": 0.013751575689336035 - } - }, - "versions": { - "arc_uk": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_vi-bloom-7b1.json b/evals/arc/arc_vi-bloom-7b1.json deleted file mode 100644 index 4bc8e4783cc71214d4ba57feef30a0bfee5774c2..0000000000000000000000000000000000000000 --- a/evals/arc/arc_vi-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_vi": { - "acc": 0.28974358974358977, - "acc_stderr": 0.013268054405378885, - "acc_norm": 0.3367521367521368, - "acc_norm_stderr": 0.01382247630777062 - } - }, - "versions": { - "arc_vi": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_vi-llama-7B.json b/evals/arc/arc_vi-llama-7B.json deleted file mode 100644 index 7c14775b05df6587593cb1cbb921ee6ac86a8370..0000000000000000000000000000000000000000 --- a/evals/arc/arc_vi-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_vi": { - "acc": 0.20256410256410257, - "acc_stderr": 0.011754979539893694, - "acc_norm": 0.23675213675213674, - "acc_norm_stderr": 0.01243290160581911 - } - }, - "versions": { - "arc_vi": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_zh-bloom-7b1.json b/evals/arc/arc_zh-bloom-7b1.json deleted file mode 100644 index c4deb085367a11032bec8e265cc4cb91fe75a0f5..0000000000000000000000000000000000000000 --- a/evals/arc/arc_zh-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_zh": { - "acc": 0.3076923076923077, - "acc_stderr": 0.013498970320941413, - "acc_norm": 0.37264957264957266, - "acc_norm_stderr": 0.014141587247061969 - } - }, - "versions": { - "arc_zh": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/arc/arc_zh-llama-7B.json b/evals/arc/arc_zh-llama-7B.json deleted file mode 100644 index 9cca2a2335f34f3b9eb36c125304f260fc3f8cd9..0000000000000000000000000000000000000000 --- a/evals/arc/arc_zh-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "arc_zh": { - "acc": 0.2564102564102564, - "acc_stderr": 0.012771065618749024, - "acc_norm": 0.2982905982905983, - "acc_norm_stderr": 0.013381080232166387 - } - }, - "versions": { - "arc_zh": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_ar_bloom-7b1.json b/evals/hellaswag/hellaswag_ar_bloom-7b1.json deleted file mode 100644 index 69248e00b845c50b1eb8379e9d0ec05aaffc075d..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_ar_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_ar": { - "acc": 0.3561464690496949, - "acc_stderr": 0.004999249661771764, - "acc_norm": 0.43341325196163905, - "acc_norm_stderr": 0.005173461992734505 - } - }, - "versions": { - "hellaswag_ar": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_ar_llama-7B.json b/evals/hellaswag/hellaswag_ar_llama-7B.json deleted file mode 100644 index 53797549241b15b072b9f0ce5f8b12ea57bce437..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_ar_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_ar": { - "acc": 0.28040540540540543, - "acc_stderr": 0.004689581635445738, - "acc_norm": 0.3085222319093287, - "acc_norm_stderr": 0.004822023322058258 - } - }, - "versions": { - "hellaswag_ar": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_bn_bloom-7b1.json b/evals/hellaswag/hellaswag_bn_bloom-7b1.json deleted file mode 100644 index 7e6f1a343c04d236c977fa61b55e3bd8c74fa3f1..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_bn_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_bn": { - "acc": 0.28381302748322873, - "acc_stderr": 0.004689968075947356, - "acc_norm": 0.3277429127894395, - "acc_norm_stderr": 0.004882866652334284 - } - }, - "versions": { - "hellaswag_bn": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_bn_llama-7B.json b/evals/hellaswag/hellaswag_bn_llama-7B.json deleted file mode 100644 index cb1676e09ecdce592c17a4ff25f63c87e2a2a971..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_bn_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_bn": { - "acc": 0.26011685782298205, - "acc_stderr": 0.00456358696087763, - "acc_norm": 0.28251460722787275, - "acc_norm_stderr": 0.004683467388784859 - } - }, - "versions": { - "hellaswag_bn": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_ca_bloom-7b1.json b/evals/hellaswag/hellaswag_ca_bloom-7b1.json deleted file mode 100644 index fa322ff2eccfdf62925b1b79ced281791b64de0e..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_ca_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_ca": { - "acc": 0.40186712983065564, - "acc_stderr": 0.005108421054557395, - "acc_norm": 0.5120495006513244, - "acc_norm_stderr": 0.005208233728494265 - } - }, - "versions": { - "hellaswag_ca": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_ca_llama-7B.json b/evals/hellaswag/hellaswag_ca_llama-7B.json deleted file mode 100644 index 4e0b22ebaf8ac031767a3f3ab1e4789d623a3c02..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_ca_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_ca": { - "acc": 0.38460703430308296, - "acc_stderr": 0.0050691072999641, - "acc_norm": 0.49565783760312637, - "acc_norm_stderr": 0.005209550302588167 - } - }, - "versions": { - "hellaswag_ca": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_da_bloom-7b1.json b/evals/hellaswag/hellaswag_da_bloom-7b1.json deleted file mode 100644 index 248065e86f7721ea28ce5b176e014af8e2c365bf..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_da_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_da": { - "acc": 0.2806018269747448, - "acc_stderr": 0.00465795256586935, - "acc_norm": 0.31176786673831275, - "acc_norm_stderr": 0.004802289060894963 - } - }, - "versions": { - "hellaswag_da": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_da_llama-7B.json b/evals/hellaswag/hellaswag_da_llama-7B.json deleted file mode 100644 index 158172ac8091f5c183cde64b120c8c32ef6b2da7..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_da_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_da": { - "acc": 0.3730252552391188, - "acc_stderr": 0.005013710932255912, - "acc_norm": 0.46695325094035467, - "acc_norm_stderr": 0.005172309453152385 - } - }, - "versions": { - "hellaswag_da": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_de_bloom-7b1.json b/evals/hellaswag/hellaswag_de_bloom-7b1.json deleted file mode 100644 index 1a42078cb7cf48cd71502713357d1faa121702cc..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_de_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_de": { - "acc": 0.2982493595217763, - "acc_stderr": 0.004726948912322779, - "acc_norm": 0.32418872758326217, - "acc_norm_stderr": 0.004836279708509382 - } - }, - "versions": { - "hellaswag_de": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_de_llama-7B.json b/evals/hellaswag/hellaswag_de_llama-7B.json deleted file mode 100644 index a027e43f548e49b4fd7dd60cc606b68dc314cb9d..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_de_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_de": { - "acc": 0.39427900523001386, - "acc_stderr": 0.005049108443939032, - "acc_norm": 0.49855907780979825, - "acc_norm_stderr": 0.005165885308732062 - } - }, - "versions": { - "hellaswag_de": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_es_bloom-7b1.json b/evals/hellaswag/hellaswag_es_bloom-7b1.json deleted file mode 100644 index 7fd9710255ac60d17ca496eac2cdcfe416fd02be..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_es_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_es": { - "acc": 0.4372733091529763, - "acc_stderr": 0.0051237264293392815, - "acc_norm": 0.566567100490719, - "acc_norm_stderr": 0.005118554174253425 - } - }, - "versions": { - "hellaswag_es": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_es_llama-7B.json b/evals/hellaswag/hellaswag_es_llama-7B.json deleted file mode 100644 index 571b2651d1c438f6d95ef828887f685893f506ff..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_es_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_es": { - "acc": 0.4311466666666667, - "acc_stderr": 0.005115053675969629, - "acc_norm": 0.5640533333333333, - "acc_norm_stderr": 0.0051217018246512425 - } - }, - "versions": { - "hellaswag_es": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_eu_bloom-7b1.json b/evals/hellaswag/hellaswag_eu_bloom-7b1.json deleted file mode 100644 index aaa2bac442dd619e5e485a1c9bb7770c1aaad3e8..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_eu_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_eu": { - "acc": 0.27380695314187, - "acc_stderr": 0.004633608505053738, - "acc_norm": 0.31235154394299286, - "acc_norm_stderr": 0.00481588516396214 - } - }, - "versions": { - "hellaswag_eu": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_eu_llama-7B.json b/evals/hellaswag/hellaswag_eu_llama-7B.json deleted file mode 100644 index f969135230558d6c41262c2194dbbe0e29c848f6..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_eu_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_eu": { - "acc": 0.25847549125458863, - "acc_stderr": 0.004549288692503547, - "acc_norm": 0.28719499028287626, - "acc_norm_stderr": 0.004701591142825526 - } - }, - "versions": { - "hellaswag_eu": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_fr_bloom-7b1.json b/evals/hellaswag/hellaswag_fr_bloom-7b1.json deleted file mode 100644 index 737e5f885ea8810e330462182af605bac6f7338e..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_fr_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_fr": { - "acc": 0.4255729278218034, - "acc_stderr": 0.005116827391881862, - "acc_norm": 0.5656457485542943, - "acc_norm_stderr": 0.005129684120180618 - } - }, - "versions": { - "hellaswag_fr": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_fr_llama-7B.json b/evals/hellaswag/hellaswag_fr_llama-7B.json deleted file mode 100644 index 3f0fd2446e8e689f67cfb568e162f7b4dba1a617..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_fr_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_fr": { - "acc": 0.4255729278218034, - "acc_stderr": 0.00511682739188186, - "acc_norm": 0.5566502463054187, - "acc_norm_stderr": 0.005141155729141772 - } - }, - "versions": { - "hellaswag_fr": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_gu_bloom-7b1.json b/evals/hellaswag/hellaswag_gu_bloom-7b1.json deleted file mode 100644 index 0ef2b298131daf31fa9c77d37366818ba539e0bb..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_gu_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_gu": { - "acc": 0.2683176189935249, - "acc_stderr": 0.004722752779022285, - "acc_norm": 0.30625922980802, - "acc_norm_stderr": 0.0049130651137809294 - } - }, - "versions": { - "hellaswag_gu": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_gu_llama-7B.json b/evals/hellaswag/hellaswag_gu_llama-7B.json deleted file mode 100644 index a610259f2ef19c9db88847c04b399dfcbcc4a463..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_gu_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_gu": { - "acc": 0.2560490741792571, - "acc_stderr": 0.004652036002377334, - "acc_norm": 0.28899238895830964, - "acc_norm_stderr": 0.004831585233585411 - } - }, - "versions": { - "hellaswag_gu": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_hi_bloom-7b1.json b/evals/hellaswag/hellaswag_hi_bloom-7b1.json deleted file mode 100644 index 63eeb2a2481895efb7ecade2660f0911184073b6..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_hi_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_hi": { - "acc": 0.31202209005947323, - "acc_stderr": 0.004774960194792877, - "acc_norm": 0.36363636363636365, - "acc_norm_stderr": 0.004957653483174718 - } - }, - "versions": { - "hellaswag_hi": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_hi_llama-7B.json b/evals/hellaswag/hellaswag_hi_llama-7B.json deleted file mode 100644 index 35969545033bac79e8237b539ab99ce740103734..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_hi_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_hi": { - "acc": 0.2729396771452846, - "acc_stderr": 0.0045910116736375154, - "acc_norm": 0.2917374681393373, - "acc_norm_stderr": 0.004684713934059222 - } - }, - "versions": { - "hellaswag_hi": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_hr_bloom-7b1.json b/evals/hellaswag/hellaswag_hr_bloom-7b1.json deleted file mode 100644 index 2571f200efda69d65fed248bfa1462accaa0e80f..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_hr_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_hr": { - "acc": 0.27478095640240685, - "acc_stderr": 0.004586771132918674, - "acc_norm": 0.3000105563179563, - "acc_norm_stderr": 0.004708614858618206 - } - }, - "versions": { - "hellaswag_hr": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_hr_llama-7B.json b/evals/hellaswag/hellaswag_hr_llama-7B.json deleted file mode 100644 index 0c8aa308a99a4d9917300d2b6bca88d4fbd44a07..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_hr_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_hr": { - "acc": 0.3393856222949435, - "acc_stderr": 0.004865190903217322, - "acc_norm": 0.41148527393645096, - "acc_norm_stderr": 0.005056324888258699 - } - }, - "versions": { - "hellaswag_hr": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_hu_bloom-7b1.json b/evals/hellaswag/hellaswag_hu_bloom-7b1.json deleted file mode 100644 index cfb0859d6479ddd7e6caa9ab28436da9061fafe0..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_hu_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_hu": { - "acc": 0.2749780893952673, - "acc_stderr": 0.004673697346652944, - "acc_norm": 0.30127081507449605, - "acc_norm_stderr": 0.004802517407348953 - } - }, - "versions": { - "hellaswag_hu": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_hu_llama-7B.json b/evals/hellaswag/hellaswag_hu_llama-7B.json deleted file mode 100644 index 7f1300419e0e8727d8da4787e4074336d82c6d64..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_hu_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_hu": { - "acc": 0.31879929886064856, - "acc_stderr": 0.004877892181685683, - "acc_norm": 0.3785056967572305, - "acc_norm_stderr": 0.005076808255387223 - } - }, - "versions": { - "hellaswag_hu": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_hy_bloom-7b1.json b/evals/hellaswag/hellaswag_hy_bloom-7b1.json deleted file mode 100644 index b7aadfc69e7d37e7a69be9da8de7f6f479daa078..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_hy_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_hy": { - "acc": 0.2517377201112141, - "acc_stderr": 0.00467165233929534, - "acc_norm": 0.2761816496756256, - "acc_norm_stderr": 0.004812620824973181 - } - }, - "versions": { - "hellaswag_hy": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_hy_llama-7B.json b/evals/hellaswag/hellaswag_hy_llama-7B.json deleted file mode 100644 index 85198baf9a0a8e2dcb229b74cd9c22b5421c95b3..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_hy_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_hy": { - "acc": 0.2545180722891566, - "acc_stderr": 0.004688644596808388, - "acc_norm": 0.2849860982391103, - "acc_norm_stderr": 0.004858906279128767 - } - }, - "versions": { - "hellaswag_hy": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_id_bloom-7b1.json b/evals/hellaswag/hellaswag_id_bloom-7b1.json deleted file mode 100644 index b4bcc31e157c6a9c8fc29d08fd6088001c2a4e2b..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_id_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_id": { - "acc": 0.3894849785407725, - "acc_stderr": 0.005051366474018924, - "acc_norm": 0.49484978540772534, - "acc_norm_stderr": 0.005179195541251435 - } - }, - "versions": { - "hellaswag_id": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_id_llama-7B.json b/evals/hellaswag/hellaswag_id_llama-7B.json deleted file mode 100644 index d408a6b8209abf2afa7b33e28f960ce7cf71596b..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_id_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_id": { - "acc": 0.3017167381974249, - "acc_stderr": 0.004754784760510309, - "acc_norm": 0.34431330472103006, - "acc_norm_stderr": 0.004921986658657097 - } - }, - "versions": { - "hellaswag_id": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_it_bloom-7b1.json b/evals/hellaswag/hellaswag_it_bloom-7b1.json deleted file mode 100644 index f071bbb39cf2e6048f33a2ac1444d8d24657c9ab..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_it_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_it": { - "acc": 0.33380465520991953, - "acc_stderr": 0.004918337887582365, - "acc_norm": 0.40765716771807703, - "acc_norm_stderr": 0.005125137013353996 - } - }, - "versions": { - "hellaswag_it": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_it_llama-7B.json b/evals/hellaswag/hellaswag_it_llama-7B.json deleted file mode 100644 index 2698d8e1b02654e67b142631369916d337041789..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_it_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_it": { - "acc": 0.3975851191123681, - "acc_stderr": 0.0051045551272873, - "acc_norm": 0.5201783966061133, - "acc_norm_stderr": 0.005210879697577827 - } - }, - "versions": { - "hellaswag_it": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_kn_bloom-7b1.json b/evals/hellaswag/hellaswag_kn_bloom-7b1.json deleted file mode 100644 index ec110ed487575de37a4630739da2ee9264bd8d08..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_kn_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_kn": { - "acc": 0.26337169939065674, - "acc_stderr": 0.004679154494054024, - "acc_norm": 0.30275332881967953, - "acc_norm_stderr": 0.004880859653925846 - } - }, - "versions": { - "hellaswag_kn": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_kn_llama-7B.json b/evals/hellaswag/hellaswag_kn_llama-7B.json deleted file mode 100644 index 219c76670fe5ee2040cfa43d6e6360e4684a6fe4..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_kn_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_kn": { - "acc": 0.25603701196118256, - "acc_stderr": 0.004636450973386679, - "acc_norm": 0.2887610020311442, - "acc_norm_stderr": 0.0048143280788988845 - } - }, - "versions": { - "hellaswag_kn": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_ml_bloom-7b1.json b/evals/hellaswag/hellaswag_ml_bloom-7b1.json deleted file mode 100644 index a4de930d07f3cb8e48668e5b5f1b53560c0ff7f1..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_ml_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_ml": { - "acc": 0.25444979290272024, - "acc_stderr": 0.004608558887983242, - "acc_norm": 0.2878092466136796, - "acc_norm_stderr": 0.004790448543019756 - } - }, - "versions": { - "hellaswag_ml": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_ml_llama-7B.json b/evals/hellaswag/hellaswag_ml_llama-7B.json deleted file mode 100644 index d0fff179c59dc9c44b1a6de207bcba30d72726a7..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_ml_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_ml": { - "acc": 0.2510914586365163, - "acc_stderr": 0.004588344357712618, - "acc_norm": 0.2890406358446211, - "acc_norm_stderr": 0.004796533523475371 - } - }, - "versions": { - "hellaswag_ml": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_mr_bloom-7b1.json b/evals/hellaswag/hellaswag_mr_bloom-7b1.json deleted file mode 100644 index 5768dcee263277655dc8087f17858a884c937b53..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_mr_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_mr": { - "acc": 0.2701799762905486, - "acc_stderr": 0.004610067484763786, - "acc_norm": 0.3100549628192693, - "acc_norm_stderr": 0.004801748474056546 - } - }, - "versions": { - "hellaswag_mr": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_mr_llama-7B.json b/evals/hellaswag/hellaswag_mr_llama-7B.json deleted file mode 100644 index 6c3e2cc455a43fee3f289e2eab0831003b552a30..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_mr_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_mr": { - "acc": 0.2592951826705464, - "acc_stderr": 0.004549803334314971, - "acc_norm": 0.2879620648776808, - "acc_norm_stderr": 0.004701019162604622 - } - }, - "versions": { - "hellaswag_mr": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_ne_bloom-7b1.json b/evals/hellaswag/hellaswag_ne_bloom-7b1.json deleted file mode 100644 index 3b95e1d5f31b1e69f29c233339889469700c84bd..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_ne_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_ne": { - "acc": 0.27441511053874224, - "acc_stderr": 0.004622852940386713, - "acc_norm": 0.30897188237819273, - "acc_norm_stderr": 0.004787064632332303 - } - }, - "versions": { - "hellaswag_ne": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_ne_llama-7B.json b/evals/hellaswag/hellaswag_ne_llama-7B.json deleted file mode 100644 index 8c4989d19a23d592896ca0b4e6fded1f62cc01f3..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_ne_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_ne": { - "acc": 0.264112470487229, - "acc_stderr": 0.004567327225923831, - "acc_norm": 0.28171281390856406, - "acc_norm_stderr": 0.00466030469849661 - } - }, - "versions": { - "hellaswag_ne": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_nl_Llama-2-7b-chat-hf.json b/evals/hellaswag/hellaswag_nl_Llama-2-7b-chat-hf.json new file mode 100644 index 0000000000000000000000000000000000000000..6839e4e1493b60821794c1a23f7ba02b789cfc95 --- /dev/null +++ b/evals/hellaswag/hellaswag_nl_Llama-2-7b-chat-hf.json @@ -0,0 +1,23 @@ +{ + "results": { + "hellaswag_nl": { + "acc": 0.38467350242849435, + "acc_stderr": 0.005054749888300686, + "acc_norm": 0.4823529411764706, + "acc_norm_stderr": 0.005191586180318448 + } + }, + "versions": { + "hellaswag_nl": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=meta-llama/Llama-2-7b-chat-hf,use_accelerate=True,device_map_option=auto,dtype=bfloat16", + "batch_size": 64, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_nl_Llama-2-7b-hf.json b/evals/hellaswag/hellaswag_nl_Llama-2-7b-hf.json new file mode 100644 index 0000000000000000000000000000000000000000..745826da641f1d99a1211c46e7cdb0d94765fe6f --- /dev/null +++ b/evals/hellaswag/hellaswag_nl_Llama-2-7b-hf.json @@ -0,0 +1,23 @@ +{ + "results": { + "hellaswag_nl": { + "acc": 0.3878035617916892, + "acc_stderr": 0.005062348307428708, + "acc_norm": 0.5000539665407447, + "acc_norm_stderr": 0.005194822688012659 + } + }, + "versions": { + "hellaswag_nl": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,device_map_option=auto,dtype=bfloat16", + "batch_size": 64, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_nl_Mistral-7B-v0.1.json b/evals/hellaswag/hellaswag_nl_Mistral-7B-v0.1.json new file mode 100644 index 0000000000000000000000000000000000000000..36155d9b2cec2a6c48e7e134b99f453d80e9b75f --- /dev/null +++ b/evals/hellaswag/hellaswag_nl_Mistral-7B-v0.1.json @@ -0,0 +1,23 @@ +{ + "results": { + "hellaswag_nl": { + "acc": 0.43486238532110094, + "acc_stderr": 0.005150551758279897, + "acc_norm": 0.5676200755531571, + "acc_norm_stderr": 0.005147097096977192 + } + }, + "versions": { + "hellaswag_nl": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=mistralai/Mistral-7B-v0.1,use_accelerate=True,device_map_option=auto,dtype=bfloat16", + "batch_size": 64, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_nl_zephyr-7b-beta.json b/evals/hellaswag/hellaswag_nl_zephyr-7b-beta.json new file mode 100644 index 0000000000000000000000000000000000000000..5fa9f92c70efa809176a3aff63d8a88be3e78172 --- /dev/null +++ b/evals/hellaswag/hellaswag_nl_zephyr-7b-beta.json @@ -0,0 +1,23 @@ +{ + "results": { + "hellaswag_nl": { + "acc": 0.4478143550998381, + "acc_stderr": 0.005166450687025188, + "acc_norm": 0.575067458175931, + "acc_norm_stderr": 0.005135942094754352 + } + }, + "versions": { + "hellaswag_nl": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=HuggingFaceH4/zephyr-7b-beta,use_accelerate=True,device_map_option=auto,dtype=bfloat16", + "batch_size": 64, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_pt_bloom-7b1.json b/evals/hellaswag/hellaswag_pt_bloom-7b1.json deleted file mode 100644 index 5050ad2ec66e4750cc93be5c7e0c4c942051e7a9..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_pt_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_pt": { - "acc": 0.4227977028930545, - "acc_stderr": 0.005142526543466809, - "acc_norm": 0.5511973128182902, - "acc_norm_stderr": 0.005177587858629525 - } - }, - "versions": { - "hellaswag_pt": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_pt_llama-7B.json b/evals/hellaswag/hellaswag_pt_llama-7B.json deleted file mode 100644 index 7ec9536f323c0aa592fcadb1d9e1333cd323941d..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_pt_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_pt": { - "acc": 0.4037273810813739, - "acc_stderr": 0.005107551363682552, - "acc_norm": 0.532343699209015, - "acc_norm_stderr": 0.005194044440586472 - } - }, - "versions": { - "hellaswag_pt": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_ro_bloom-7b1.json b/evals/hellaswag/hellaswag_ro_bloom-7b1.json deleted file mode 100644 index dafe7356bdb6ae258020ac1efcc6169d4f31dd20..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_ro_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_ro": { - "acc": 0.2795024337479719, - "acc_stderr": 0.00466744369483023, - "acc_norm": 0.3182260681449432, - "acc_norm_stderr": 0.004844601996973363 - } - }, - "versions": { - "hellaswag_ro": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_ro_llama-7B.json b/evals/hellaswag/hellaswag_ro_llama-7B.json deleted file mode 100644 index 03cce6eee60bd007c3835cca157c9f654b0774a7..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_ro_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_ro": { - "acc": 0.36041103299080585, - "acc_stderr": 0.004993666697380137, - "acc_norm": 0.4491076257436452, - "acc_norm_stderr": 0.005173430588992903 - } - }, - "versions": { - "hellaswag_ro": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_ru_bloom-7b1.json b/evals/hellaswag/hellaswag_ru_bloom-7b1.json deleted file mode 100644 index a1114c4bc91539820ff9a813a92206eb0b0aaf89..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_ru_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_ru": { - "acc": 0.2975625539257981, - "acc_stderr": 0.004748207348707273, - "acc_norm": 0.32538826574633306, - "acc_norm_stderr": 0.004865915900810558 - } - }, - "versions": { - "hellaswag_ru": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_ru_llama-7B.json b/evals/hellaswag/hellaswag_ru_llama-7B.json deleted file mode 100644 index 9da4ad4e94c2effcad5429b563495f832b369727..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_ru_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_ru": { - "acc": 0.370685936151855, - "acc_stderr": 0.005016184279255606, - "acc_norm": 0.4568593615185505, - "acc_norm_stderr": 0.005173496063169706 - } - }, - "versions": { - "hellaswag_ru": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_sk_bloom-7b1.json b/evals/hellaswag/hellaswag_sk_bloom-7b1.json deleted file mode 100644 index a452682d669ca439c37ef65351b2482280cb6a25..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_sk_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_sk": { - "acc": 0.27053241960991037, - "acc_stderr": 0.004561596675422169, - "acc_norm": 0.2981549815498155, - "acc_norm_stderr": 0.004697273773957717 - } - }, - "versions": { - "hellaswag_sk": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_sk_llama-7B.json b/evals/hellaswag/hellaswag_sk_llama-7B.json deleted file mode 100644 index 7720fc7912fd16392b3c8ddc4e66fd5530405fce..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_sk_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_sk": { - "acc": 0.30173958882445967, - "acc_stderr": 0.004713343422332119, - "acc_norm": 0.35888244596731683, - "acc_norm_stderr": 0.004925486913523139 - } - }, - "versions": { - "hellaswag_sk": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_sr_bloom-7b1.json b/evals/hellaswag/hellaswag_sr_bloom-7b1.json deleted file mode 100644 index 2d4dc8c27d8e4e36fa4c1a0c3d9a5431716e620d..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_sr_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_sr": { - "acc": 0.27748968144777225, - "acc_stderr": 0.004606546970716383, - "acc_norm": 0.29855011112287017, - "acc_norm_stderr": 0.004708005935082949 - } - }, - "versions": { - "hellaswag_sr": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_sr_llama-7B.json b/evals/hellaswag/hellaswag_sr_llama-7B.json deleted file mode 100644 index 05dc0fdc8921cb49fe2182f475f6d81e20eb5990..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_sr_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_sr": { - "acc": 0.3437400783151656, - "acc_stderr": 0.004886333271945336, - "acc_norm": 0.41147211345115886, - "acc_norm_stderr": 0.005062718548853834 - } - }, - "versions": { - "hellaswag_sr": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_sv_bloom-7b1.json b/evals/hellaswag/hellaswag_sv_bloom-7b1.json deleted file mode 100644 index 4ebba6534a5e9e09a423a4422dbf2aae81a1bc02..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_sv_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_sv": { - "acc": 0.27647445735584303, - "acc_stderr": 0.0046830976447929905, - "acc_norm": 0.3101293575970182, - "acc_norm_stderr": 0.0048432182915872585 - } - }, - "versions": { - "hellaswag_sv": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_sv_llama-7B.json b/evals/hellaswag/hellaswag_sv_llama-7B.json deleted file mode 100644 index ee471bcb53bf2f1459136089da2f6e7ae0cdafd1..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_sv_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_sv": { - "acc": 0.3857706643279982, - "acc_stderr": 0.005096929762325147, - "acc_norm": 0.5051523788642841, - "acc_norm_stderr": 0.005235108858635741 - } - }, - "versions": { - "hellaswag_sv": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_ta_bloom-7b1.json b/evals/hellaswag/hellaswag_ta_bloom-7b1.json deleted file mode 100644 index 584724a6119d2433aab6e11c1971faf29ca9ce8f..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_ta_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_ta": { - "acc": 0.2588850588375134, - "acc_stderr": 0.004775805657688067, - "acc_norm": 0.29406870319743256, - "acc_norm_stderr": 0.0049677071891109335 - } - }, - "versions": { - "hellaswag_ta": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_ta_llama-7B.json b/evals/hellaswag/hellaswag_ta_llama-7B.json deleted file mode 100644 index 2d69d8dc8c743704b031d2ef3894db3a70bb4c9a..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_ta_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_ta": { - "acc": 0.25329846665874245, - "acc_stderr": 0.004741766564082548, - "acc_norm": 0.28313324616664687, - "acc_norm_stderr": 0.004912075369610396 - } - }, - "versions": { - "hellaswag_ta": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_te_bloom-7b1.json b/evals/hellaswag/hellaswag_te_bloom-7b1.json deleted file mode 100644 index 5052ea04c62ee79955014621885295121b68fc76..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_te_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_te": { - "acc": 0.26123337918386064, - "acc_stderr": 0.00470365034659896, - "acc_norm": 0.2922971114167813, - "acc_norm_stderr": 0.004869729181749992 - } - }, - "versions": { - "hellaswag_te": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_te_llama-7B.json b/evals/hellaswag/hellaswag_te_llama-7B.json deleted file mode 100644 index 7bce32700aa0c1c9e176ddae4994c8d3a2b22f3b..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_te_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_te": { - "acc": 0.25767996331957815, - "acc_stderr": 0.0046827716491321504, - "acc_norm": 0.28931682714351215, - "acc_norm_stderr": 0.004855030101325898 - } - }, - "versions": { - "hellaswag_te": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_uk_bloom-7b1.json b/evals/hellaswag/hellaswag_uk_bloom-7b1.json deleted file mode 100644 index cd933afdab71857ec060d16116e0f044d23e7a50..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_uk_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_uk": { - "acc": 0.2781379530237007, - "acc_stderr": 0.004619644722138738, - "acc_norm": 0.30035072802635776, - "acc_norm_stderr": 0.004726132393644123 - } - }, - "versions": { - "hellaswag_uk": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_uk_llama-7B.json b/evals/hellaswag/hellaswag_uk_llama-7B.json deleted file mode 100644 index 545af16e16507026332c8c8c7836ef6d20ccae00..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_uk_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_uk": { - "acc": 0.3544720628850648, - "acc_stderr": 0.0049304266046324334, - "acc_norm": 0.4412577012959422, - "acc_norm_stderr": 0.005117854029524533 - } - }, - "versions": { - "hellaswag_uk": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_vi_bloom-7b1.json b/evals/hellaswag/hellaswag_vi_bloom-7b1.json deleted file mode 100644 index 686132db373135123f1b9720642bfd294f99f328..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_vi_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_vi": { - "acc": 0.3836498581095831, - "acc_stderr": 0.0050805394682356675, - "acc_norm": 0.4827548570181183, - "acc_norm_stderr": 0.005220836527919318 - } - }, - "versions": { - "hellaswag_vi": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_vi_llama-7B.json b/evals/hellaswag/hellaswag_vi_llama-7B.json deleted file mode 100644 index 816307d9258b275603ae30ffb36851a8b3475dd9..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_vi_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_vi": { - "acc": 0.27865094957432873, - "acc_stderr": 0.004684158200782215, - "acc_norm": 0.31608819035145164, - "acc_norm_stderr": 0.0048577229826674215 - } - }, - "versions": { - "hellaswag_vi": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_zh_bloom-7b1.json b/evals/hellaswag/hellaswag_zh_bloom-7b1.json deleted file mode 100644 index 30ac380919e1d6d2c44c46e941ae3dc9929982e1..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_zh_bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_zh": { - "acc": 0.38851715950787824, - "acc_stderr": 0.005063776486157121, - "acc_norm": 0.5115475933520397, - "acc_norm_stderr": 0.005193156826942953 - } - }, - "versions": { - "hellaswag_zh": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": "1", - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/hellaswag/hellaswag_zh_llama-7B.json b/evals/hellaswag/hellaswag_zh_llama-7B.json deleted file mode 100644 index b0d393a5879535e46dfb92d3361d469fc71f97b7..0000000000000000000000000000000000000000 --- a/evals/hellaswag/hellaswag_zh_llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "hellaswag_zh": { - "acc": 0.32358653431160983, - "acc_stderr": 0.004859949552176753, - "acc_norm": 0.3945835131635736, - "acc_norm_stderr": 0.0050772319918162435 - } - }, - "versions": { - "hellaswag_zh": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ar-bloom-7b1.json b/evals/mmlu/mmlu_ar-bloom-7b1.json deleted file mode 100644 index b6e593af4922000fb94fdaab7a48477f593319ba..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_ar-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_ar": { - "acc": 0.26531559405940597, - "acc_stderr": 0.0038831388933726414, - "acc_norm": 0.2754486386138614, - "acc_norm_stderr": 0.003929217133330591 - } - }, - "versions": { - "mmlu_ar": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ar-llama-7B.json b/evals/mmlu/mmlu_ar-llama-7B.json deleted file mode 100644 index f601d0a0a213c652ffd5519a7454ba2a537af3fc..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_ar-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_ar": { - "acc": 0.2589727722772277, - "acc_stderr": 0.0038529667515366556, - "acc_norm": 0.2797803217821782, - "acc_norm_stderr": 0.003948136869379606 - } - }, - "versions": { - "mmlu_ar": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_bn-bloom-7b1.json b/evals/mmlu/mmlu_bn-bloom-7b1.json deleted file mode 100644 index 89c8ade0841c9df16a86355a7b703e726726acfa..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_bn-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_bn": { - "acc": 0.2671137646192852, - "acc_stderr": 0.004001512896559074, - "acc_norm": 0.28150813772797906, - "acc_norm_stderr": 0.004067374934957544 - } - }, - "versions": { - "mmlu_bn": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_bn-llama-7B.json b/evals/mmlu/mmlu_bn-llama-7B.json deleted file mode 100644 index da3322aaf303ad70cf3667aba1a4d73764af5fdc..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_bn-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_bn": { - "acc": 0.2501022327635561, - "acc_stderr": 0.0039166757490002955, - "acc_norm": 0.28461601374008344, - "acc_norm_stderr": 0.0040809105667388166 - } - }, - "versions": { - "mmlu_bn": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ca-bloom-7b1.json b/evals/mmlu/mmlu_ca-bloom-7b1.json deleted file mode 100644 index b760f91f32565b551455d9bf715837b34540ec24..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_ca-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_ca": { - "acc": 0.2785041045910611, - "acc_stderr": 0.003908294722890792, - "acc_norm": 0.28785345089692915, - "acc_norm_stderr": 0.003947525835346328 - } - }, - "versions": { - "mmlu_ca": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ca-llama-7B.json b/evals/mmlu/mmlu_ca-llama-7B.json deleted file mode 100644 index 5183b4df5346ae0e0aa74c3166323602507c4598..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_ca-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_ca": { - "acc": 0.3038917604134995, - "acc_stderr": 0.004010074337091965, - "acc_norm": 0.3022955305564001, - "acc_norm_stderr": 0.004004111747979521 - } - }, - "versions": { - "mmlu_ca": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_da-bloom-7b1.json b/evals/mmlu/mmlu_da-bloom-7b1.json deleted file mode 100644 index 5b81f4f5ab7529c0d7efd0c3b2c040d9e4643cc2..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_da-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_da": { - "acc": 0.2557170982886567, - "acc_stderr": 0.0037964676375075402, - "acc_norm": 0.2705588368923217, - "acc_norm_stderr": 0.003865954982495375 - } - }, - "versions": { - "mmlu_da": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_da-llama-7B.json b/evals/mmlu/mmlu_da-llama-7B.json deleted file mode 100644 index f4957b8b53a4880a0eac49ccabcab4a8c6a584c2..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_da-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_da": { - "acc": 0.2997122520066636, - "acc_stderr": 0.003986771176689293, - "acc_norm": 0.2995608056943813, - "acc_norm_stderr": 0.003986194743561357 - } - }, - "versions": { - "mmlu_da": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_de-bloom-7b1.json b/evals/mmlu/mmlu_de-bloom-7b1.json deleted file mode 100644 index 40c8412a571fbf0d4f63f6290e66bfbbab5fa943..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_de-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_de": { - "acc": 0.2670085985819882, - "acc_stderr": 0.0038422837632401587, - "acc_norm": 0.2812641424045859, - "acc_norm_stderr": 0.003904983582450586 - } - }, - "versions": { - "mmlu_de": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_de-llama-7B.json b/evals/mmlu/mmlu_de-llama-7B.json deleted file mode 100644 index 48403f057f5a6bffdb9e4cb2644c286f80b5ccf0..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_de-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_de": { - "acc": 0.3045708251621662, - "acc_stderr": 0.003997127255569371, - "acc_norm": 0.2988384371700106, - "acc_norm_stderr": 0.003975618018830569 - } - }, - "versions": { - "mmlu_de": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_es-bloom-7b1.json b/evals/mmlu/mmlu_es-bloom-7b1.json deleted file mode 100644 index 1ca552b581fe950c76b7e801b8922438a03f50b6..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_es-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_es": { - "acc": 0.2846857657117144, - "acc_stderr": 0.00390811532232558, - "acc_norm": 0.28926053697315135, - "acc_norm_stderr": 0.003926773662056655 - } - }, - "versions": { - "mmlu_es": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_es-llama-7B.json b/evals/mmlu/mmlu_es-llama-7B.json deleted file mode 100644 index 6c5c8136a88729662690739c773310e7e60685c7..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_es-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_es": { - "acc": 0.30808459577021147, - "acc_stderr": 0.00399850416060033, - "acc_norm": 0.30268486575671216, - "acc_norm_stderr": 0.0039787436578546075 - } - }, - "versions": { - "mmlu_es": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_eu-bloom-7b1.json b/evals/mmlu/mmlu_eu-bloom-7b1.json deleted file mode 100644 index bd26e106ebaee3484061fd6d78bd4e9d52579fcd..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_eu-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_eu": { - "acc": 0.2576611914684972, - "acc_stderr": 0.003953719493412054, - "acc_norm": 0.2735147503473073, - "acc_norm_stderr": 0.0040298051028790725 - } - }, - "versions": { - "mmlu_eu": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_eu-llama-7B.json b/evals/mmlu/mmlu_eu-llama-7B.json deleted file mode 100644 index cbf5d4151c1d0c86b7232d6cbc1cc4623fafce36..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_eu-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_eu": { - "acc": 0.2668954809185258, - "acc_stderr": 0.003998838127920185, - "acc_norm": 0.27923510664378526, - "acc_norm_stderr": 0.00405566512057356 - } - }, - "versions": { - "mmlu_eu": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_fr-bloom-7b1.json b/evals/mmlu/mmlu_fr-bloom-7b1.json deleted file mode 100644 index 518cf70d5d420bdf6c38c7dc1d83ad8289360cb0..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_fr-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_fr": { - "acc": 0.2887479948055916, - "acc_stderr": 0.0039609687595635185, - "acc_norm": 0.29860209304102053, - "acc_norm_stderr": 0.003999989334139082 - } - }, - "versions": { - "mmlu_fr": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_fr-llama-7B.json b/evals/mmlu/mmlu_fr-llama-7B.json deleted file mode 100644 index e22bb03037c1bf7eebd47d64ccb10e43eca00210..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_fr-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_fr": { - "acc": 0.318997784737606, - "acc_stderr": 0.004073786574740586, - "acc_norm": 0.3054006569398824, - "acc_norm_stderr": 0.00402561598834305 - } - }, - "versions": { - "mmlu_fr": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_gu-bloom-7b1.json b/evals/mmlu/mmlu_gu-bloom-7b1.json deleted file mode 100644 index 08db474bfffcd53c11f37cca5a5523de19ab27b2..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_gu-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_gu": { - "acc": 0.24933390631714655, - "acc_stderr": 0.004010971174274014, - "acc_norm": 0.26566394499355395, - "acc_norm_stderr": 0.004094955673385403 - } - }, - "versions": { - "mmlu_gu": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_gu-llama-7B.json b/evals/mmlu/mmlu_gu-llama-7B.json deleted file mode 100644 index 2236b1f5ac01a2de4772fb6fde41222398119985..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_gu-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_gu": { - "acc": 0.24391920928233776, - "acc_stderr": 0.003981461991912142, - "acc_norm": 0.27382896433175763, - "acc_norm_stderr": 0.0041342298983896774 - } - }, - "versions": { - "mmlu_gu": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_hi-bloom-7b1.json b/evals/mmlu/mmlu_hi-bloom-7b1.json deleted file mode 100644 index 8402e114c7f1914a4c05f4a1f91ecb4aad9df2d8..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_hi-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_hi": { - "acc": 0.2666237838707084, - "acc_stderr": 0.00396526756671177, - "acc_norm": 0.2751467395674198, - "acc_norm_stderr": 0.004004671316183439 - } - }, - "versions": { - "mmlu_hi": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_hi-llama-7B.json b/evals/mmlu/mmlu_hi-llama-7B.json deleted file mode 100644 index b9c9d981a7d61e96d94d6c128b4ccfc3f3b0f0e6..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_hi-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_hi": { - "acc": 0.2549650237195465, - "acc_stderr": 0.003908303467263245, - "acc_norm": 0.27860416499155743, - "acc_norm_stderr": 0.0040201315154066415 - } - }, - "versions": { - "mmlu_hi": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_hr-bloom-7b1.json b/evals/mmlu/mmlu_hr-bloom-7b1.json deleted file mode 100644 index 11c2e3822a0ada199f63dd7adb04e6c604d3151e..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_hr-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_hr": { - "acc": 0.25448737450562825, - "acc_stderr": 0.0037988075329188904, - "acc_norm": 0.26954669911773654, - "acc_norm_stderr": 0.0038699014491549413 - } - }, - "versions": { - "mmlu_hr": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_hr-llama-7B.json b/evals/mmlu/mmlu_hr-llama-7B.json deleted file mode 100644 index b2f5ca1c97a96e3d94fd3ae5c2603632e633b975..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_hr-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_hr": { - "acc": 0.294721630666261, - "acc_stderr": 0.003976243355939721, - "acc_norm": 0.2931244295710374, - "acc_norm_stderr": 0.003969942004520753 - } - }, - "versions": { - "mmlu_hr": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_hu-bloom-7b1.json b/evals/mmlu/mmlu_hu-bloom-7b1.json deleted file mode 100644 index b5cd6a42f13e7a2790a24766a0455177825ac001..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_hu-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_hu": { - "acc": 0.25, - "acc_stderr": 0.0037944175097970817, - "acc_norm": 0.269041769041769, - "acc_norm_stderr": 0.0038859804834747223 - } - }, - "versions": { - "mmlu_hu": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_hu-llama-7B.json b/evals/mmlu/mmlu_hu-llama-7B.json deleted file mode 100644 index b74a19de5e6654aef46cf40427dc362a330fa08e..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_hu-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_hu": { - "acc": 0.27794840294840295, - "acc_stderr": 0.0039256419656824035, - "acc_norm": 0.29000307125307123, - "acc_norm_stderr": 0.0039762530331634354 - } - }, - "versions": { - "mmlu_hu": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_hy-bloom-7b1.json b/evals/mmlu/mmlu_hy-bloom-7b1.json deleted file mode 100644 index 5b33b978463855a30343b21fc48c4d5eeefe9ed4..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_hy-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_hy": { - "acc": 0.24754384354053807, - "acc_stderr": 0.004135735206626923, - "acc_norm": 0.2570930125791938, - "acc_norm_stderr": 0.004187920399106458 - } - }, - "versions": { - "mmlu_hy": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_hy-llama-7B.json b/evals/mmlu/mmlu_hy-llama-7B.json deleted file mode 100644 index c10ca85321ddad4c7be01b48cec4e49a1e214777..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_hy-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_hy": { - "acc": 0.24800293820585806, - "acc_stderr": 0.004138305469907604, - "acc_norm": 0.2746304287944174, - "acc_norm_stderr": 0.004277007917763834 - } - }, - "versions": { - "mmlu_hy": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_id-bloom-7b1.json b/evals/mmlu/mmlu_id-bloom-7b1.json deleted file mode 100644 index eab2b6f207224be214da56e0b7642b6e08ab6522..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_id-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_id": { - "acc": 0.26631554843141747, - "acc_stderr": 0.0038620444798720234, - "acc_norm": 0.28058926799480954, - "acc_norm_stderr": 0.003925439934317792 - } - }, - "versions": { - "mmlu_id": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_id-llama-7B.json b/evals/mmlu/mmlu_id-llama-7B.json deleted file mode 100644 index b6135824ebca4f3650da00511c33cc4a21bfb152..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_id-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_id": { - "acc": 0.2795969773299748, - "acc_stderr": 0.003921194198043396, - "acc_norm": 0.2895962140294634, - "acc_norm_stderr": 0.003962902849695825 - } - }, - "versions": { - "mmlu_id": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_it-bloom-7b1.json b/evals/mmlu/mmlu_it-bloom-7b1.json deleted file mode 100644 index f1fd4d72695bef88e7d84fea1cef3fe7a204b1d4..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_it-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_it": { - "acc": 0.26161516960036263, - "acc_stderr": 0.0038202735800333108, - "acc_norm": 0.2760444209413009, - "acc_norm_stderr": 0.0038856803174993136 - } - }, - "versions": { - "mmlu_it": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_it-llama-7B.json b/evals/mmlu/mmlu_it-llama-7B.json deleted file mode 100644 index 4911cc10b24667a5ceebaa64adfc01511364c093..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_it-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_it": { - "acc": 0.29848152904736724, - "acc_stderr": 0.003977405833855968, - "acc_norm": 0.29901034977713986, - "acc_norm_stderr": 0.003979426926074157 - } - }, - "versions": { - "mmlu_it": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_kn-bloom-7b1.json b/evals/mmlu/mmlu_kn-bloom-7b1.json deleted file mode 100644 index cdc6e7a6340ce902630293fdf1c6020b92559efd..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_kn-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_kn": { - "acc": 0.24622316459051152, - "acc_stderr": 0.0040494962676919264, - "acc_norm": 0.26716141001855287, - "acc_norm_stderr": 0.004159165326445932 - } - }, - "versions": { - "mmlu_kn": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_kn-llama-7B.json b/evals/mmlu/mmlu_kn-llama-7B.json deleted file mode 100644 index 606fb0050e37b38e833800c8c6787674d6157cca..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_kn-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_kn": { - "acc": 0.23933209647495363, - "acc_stderr": 0.004010635314254899, - "acc_norm": 0.27096033218482196, - "acc_norm_stderr": 0.004177761014860752 - } - }, - "versions": { - "mmlu_kn": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ml-bloom-7b1.json b/evals/mmlu/mmlu_ml-bloom-7b1.json deleted file mode 100644 index 0dfd9c349dd00e3ccd1fece3fcf4c414525835bb..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_ml-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_ml": { - "acc": 0.24646354733405876, - "acc_stderr": 0.0041039285720239, - "acc_norm": 0.26414581066376497, - "acc_norm_stderr": 0.0041984507173371734 - } - }, - "versions": { - "mmlu_ml": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ml-llama-7B.json b/evals/mmlu/mmlu_ml-llama-7B.json deleted file mode 100644 index 1dc1ffa8a7a5300db7121f13be137d91ddd33088..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_ml-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_ml": { - "acc": 0.24492201668480232, - "acc_stderr": 0.0040952567017621564, - "acc_norm": 0.27529923830250275, - "acc_norm_stderr": 0.004253566006101179 - } - }, - "versions": { - "mmlu_ml": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_mr-bloom-7b1.json b/evals/mmlu/mmlu_mr-bloom-7b1.json deleted file mode 100644 index de6dc10fd113d66213dca64afc3849f020f6285e..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_mr-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_mr": { - "acc": 0.2495736213757817, - "acc_stderr": 0.003900219801135433, - "acc_norm": 0.26289287744660117, - "acc_norm_stderr": 0.003967257688070526 - } - }, - "versions": { - "mmlu_mr": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_mr-llama-7B.json b/evals/mmlu/mmlu_mr-llama-7B.json deleted file mode 100644 index a68274469ffcdac51ed2534e328a082e752259d5..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_mr-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_mr": { - "acc": 0.24941119142369853, - "acc_stderr": 0.0038993723464080766, - "acc_norm": 0.2784861528465849, - "acc_norm_stderr": 0.004039799718714403 - } - }, - "versions": { - "mmlu_mr": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ne-bloom-7b1.json b/evals/mmlu/mmlu_ne-bloom-7b1.json deleted file mode 100644 index 63db04e7a0d9e7387ac032f7c649cd67f1996ea4..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_ne-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_ne": { - "acc": 0.2568858909499719, - "acc_stderr": 0.003915419717331052, - "acc_norm": 0.2658797077009556, - "acc_norm_stderr": 0.0039591928340292366 - } - }, - "versions": { - "mmlu_ne": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ne-llama-7B.json b/evals/mmlu/mmlu_ne-llama-7B.json deleted file mode 100644 index 5f6048f4b5b7f57e7bc90c0226fb4fb987b1f1b5..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_ne-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_ne": { - "acc": 0.245483016140689, - "acc_stderr": 0.0038567872193795804, - "acc_norm": 0.2774431863807918, - "acc_norm_stderr": 0.004012393111736023 - } - }, - "versions": { - "mmlu_ne": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_pt-bloom-7b1.json b/evals/mmlu/mmlu_pt-bloom-7b1.json deleted file mode 100644 index 3887b3366a9810116b594c74c02905628ee78fcf..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_pt-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_pt": { - "acc": 0.2809216451516061, - "acc_stderr": 0.0038938542873620118, - "acc_norm": 0.287676373461423, - "acc_norm_stderr": 0.0039218389764563225 - } - }, - "versions": { - "mmlu_pt": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_pt-llama-7B.json b/evals/mmlu/mmlu_pt-llama-7B.json deleted file mode 100644 index d5ff15ab450754ca303e55e1503611a1b7fd3d44..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_pt-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_pt": { - "acc": 0.3016361453017112, - "acc_stderr": 0.003976322071656026, - "acc_norm": 0.3007355148604023, - "acc_norm_stderr": 0.003972940683152965 - } - }, - "versions": { - "mmlu_pt": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ro-bloom-7b1.json b/evals/mmlu/mmlu_ro-bloom-7b1.json deleted file mode 100644 index b9ced8c74d8ae4d628e7fe9168ff402ce98cd279..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_ro-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_ro": { - "acc": 0.2555891238670695, - "acc_stderr": 0.003790966515146354, - "acc_norm": 0.2737160120845921, - "acc_norm_stderr": 0.0038750360364507622 - } - }, - "versions": { - "mmlu_ro": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ro-llama-7B.json b/evals/mmlu/mmlu_ro-llama-7B.json deleted file mode 100644 index 7474e610db1236709be35a3a648960d8b40a838e..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_ro-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_ro": { - "acc": 0.29342900302114805, - "acc_stderr": 0.003957326026204448, - "acc_norm": 0.2965256797583082, - "acc_norm_stderr": 0.003969425800928827 - } - }, - "versions": { - "mmlu_ro": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ru-bloom-7b1.json b/evals/mmlu/mmlu_ru-bloom-7b1.json deleted file mode 100644 index 597b21a215ebd9c9d442c41b7c7577008553e896..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_ru-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_ru": { - "acc": 0.2525563158299377, - "acc_stderr": 0.0038097500220131194, - "acc_norm": 0.2695471669101253, - "acc_norm_stderr": 0.0038908241231695112 - } - }, - "versions": { - "mmlu_ru": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ru-llama-7B.json b/evals/mmlu/mmlu_ru-llama-7B.json deleted file mode 100644 index 1cc8eed486b867ef15f762b1387fd29a6cf4416b..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_ru-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_ru": { - "acc": 0.29445683093718766, - "acc_stderr": 0.0039966925205054795, - "acc_norm": 0.3016068270931037, - "acc_norm_stderr": 0.004024377402999243 - } - }, - "versions": { - "mmlu_ru": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_sk-bloom-7b1.json b/evals/mmlu/mmlu_sk-bloom-7b1.json deleted file mode 100644 index c5c41d03419b8a4038c58ab0e4166ce0e96c28d9..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_sk-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_sk": { - "acc": 0.24927269943347113, - "acc_stderr": 0.003785212350164864, - "acc_norm": 0.26672791303016385, - "acc_norm_stderr": 0.003869711564658995 - } - }, - "versions": { - "mmlu_sk": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_sk-llama-7B.json b/evals/mmlu/mmlu_sk-llama-7B.json deleted file mode 100644 index 309a344b59b192e0dbc8e50b499a16b67538c1ef..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_sk-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_sk": { - "acc": 0.28127392436074106, - "acc_stderr": 0.003934216199449274, - "acc_norm": 0.2944418925126321, - "acc_norm_stderr": 0.003988209639409228 - } - }, - "versions": { - "mmlu_sk": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_sr-bloom-7b1.json b/evals/mmlu/mmlu_sr-bloom-7b1.json deleted file mode 100644 index 88c6699b6f71aadafabd08193c19c50d25887e85..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_sr-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_sr": { - "acc": 0.25650952706293173, - "acc_stderr": 0.0038050782551146203, - "acc_norm": 0.27245122599256055, - "acc_norm_stderr": 0.003879266167871199 - } - }, - "versions": { - "mmlu_sr": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_sr-llama-7B.json b/evals/mmlu/mmlu_sr-llama-7B.json deleted file mode 100644 index fbe389b6b884d3a9692413dc84031d5ea2363b31..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_sr-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_sr": { - "acc": 0.2902907462233356, - "acc_stderr": 0.003954858675409034, - "acc_norm": 0.2920367418203902, - "acc_norm_stderr": 0.003961851981605455 - } - }, - "versions": { - "mmlu_sr": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_sv-bloom-7b1.json b/evals/mmlu/mmlu_sv-bloom-7b1.json deleted file mode 100644 index 90ee3cd4e9733639263cdcf04b82e171f8485253..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_sv-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_sv": { - "acc": 0.26122788446998335, - "acc_stderr": 0.003820033520031446, - "acc_norm": 0.27491305005292604, - "acc_norm_stderr": 0.0038823517609477554 - } - }, - "versions": { - "mmlu_sv": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_sv-llama-7B.json b/evals/mmlu/mmlu_sv-llama-7B.json deleted file mode 100644 index d962d7acbb38d8ae28b5d3c396c6389a2ae6bf49..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_sv-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_sv": { - "acc": 0.30024194767881446, - "acc_stderr": 0.003985765983480769, - "acc_norm": 0.29321034326326934, - "acc_norm_stderr": 0.003958556933478504 - } - }, - "versions": { - "mmlu_sv": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ta-bloom-7b1.json b/evals/mmlu/mmlu_ta-bloom-7b1.json deleted file mode 100644 index 227c87597c1eb663c59c29f3eb1d52a08a3d189d..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_ta-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_ta": { - "acc": 0.2531252694197776, - "acc_stderr": 0.00403738422854994, - "acc_norm": 0.2664884903871023, - "acc_norm_stderr": 0.004105359016847502 - } - }, - "versions": { - "mmlu_ta": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_ta-llama-7B.json b/evals/mmlu/mmlu_ta-llama-7B.json deleted file mode 100644 index c47ddc1d3941b02c8ef307b03e1af7c3f33d41f8..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_ta-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_ta": { - "acc": 0.24743512371756185, - "acc_stderr": 0.004006923901271705, - "acc_norm": 0.27752392447624796, - "acc_norm_stderr": 0.004157865121797154 - } - }, - "versions": { - "mmlu_ta": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_te-bloom-7b1.json b/evals/mmlu/mmlu_te-bloom-7b1.json deleted file mode 100644 index 6dda2185b223b03895db5556e33db9db1733d107..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_te-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_te": { - "acc": 0.2502857142857143, - "acc_stderr": 0.004061713740284853, - "acc_norm": 0.2618901098901099, - "acc_norm_stderr": 0.00412252643604891 - } - }, - "versions": { - "mmlu_te": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_te-llama-7B.json b/evals/mmlu/mmlu_te-llama-7B.json deleted file mode 100644 index d495ac0b0d562ef0467a6d5a79b03bb80ccfc6a4..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_te-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_te": { - "acc": 0.24562637362637363, - "acc_stderr": 0.00403621353648515, - "acc_norm": 0.26874725274725275, - "acc_norm_stderr": 0.004156704581054155 - } - }, - "versions": { - "mmlu_te": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_uk-bloom-7b1.json b/evals/mmlu/mmlu_uk-bloom-7b1.json deleted file mode 100644 index 7ad6aa7c934875a8ffa40228178610c089842e74..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_uk-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_uk": { - "acc": 0.24719188163296923, - "acc_stderr": 0.0037969053429642604, - "acc_norm": 0.2663258191959098, - "acc_norm_stderr": 0.003890709230487387 - } - }, - "versions": { - "mmlu_uk": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_uk-llama-7B.json b/evals/mmlu/mmlu_uk-llama-7B.json deleted file mode 100644 index 2ac08620ea865817dc03d2021d1c2a89e95bd091..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_uk-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_uk": { - "acc": 0.2894104888062592, - "acc_stderr": 0.003991508434906801, - "acc_norm": 0.2939809435277713, - "acc_norm_stderr": 0.004009944142684111 - } - }, - "versions": { - "mmlu_uk": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_vi-bloom-7b1.json b/evals/mmlu/mmlu_vi-bloom-7b1.json deleted file mode 100644 index 3b29824403bc095477d8a6a0acdb87f1e76c4dfb..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_vi-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_vi": { - "acc": 0.26726381871076405, - "acc_stderr": 0.003872181345366132, - "acc_norm": 0.281427040269484, - "acc_norm_stderr": 0.003934867675165376 - } - }, - "versions": { - "mmlu_vi": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_vi-llama-7B.json b/evals/mmlu/mmlu_vi-llama-7B.json deleted file mode 100644 index 194b2dd47470bee66f0c97bb28f1a825707dccea..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_vi-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_vi": { - "acc": 0.26052671872607563, - "acc_stderr": 0.0038406007591986315, - "acc_norm": 0.28579084366865715, - "acc_norm_stderr": 0.003953198731610307 - } - }, - "versions": { - "mmlu_vi": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_zh-bloom-7b1.json b/evals/mmlu/mmlu_zh-bloom-7b1.json deleted file mode 100644 index e98a766b006fc2ceed3e7d766f77be6fdaf5abe6..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_zh-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_zh": { - "acc": 0.27884542347132546, - "acc_stderr": 0.003908427008060506, - "acc_norm": 0.29137865552601594, - "acc_norm_stderr": 0.003960427300065885 - } - }, - "versions": { - "mmlu_zh": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/mmlu/mmlu_zh-llama-7B.json b/evals/mmlu/mmlu_zh-llama-7B.json deleted file mode 100644 index 963997e00a6c8204be6df0d19adfe241fd53d094..0000000000000000000000000000000000000000 --- a/evals/mmlu/mmlu_zh-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "mmlu_zh": { - "acc": 0.2769464489175845, - "acc_stderr": 0.003900220811105949, - "acc_norm": 0.2883402962400304, - "acc_norm_stderr": 0.003948161607934338 - } - }, - "versions": { - "mmlu_zh": 0 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ar-bloom-7b1.json b/evals/truthfulqa/truthfulqa_ar-bloom-7b1.json deleted file mode 100644 index 4ecb61811afa7d48353c2bef8d82befffceceb07..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_ar-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ar": { - "mc1": 0.26002587322121606, - "mc1_stderr": 0.015787301353849415, - "mc2": 0.4256353881905651, - "mc2_stderr": 0.015737567507798107 - } - }, - "versions": { - "truthfulqa_ar": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ar-gpt2.json b/evals/truthfulqa/truthfulqa_ar-gpt2.json deleted file mode 100644 index f83b2bef80b7c2c4a74c05764b7e0d0996d4b489..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_ar-gpt2.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ar": { - "mc1": 0.23932729624838292, - "mc1_stderr": 0.015356292760819215, - "mc2": 0.44027391572034885, - "mc2_stderr": 0.01696958534622728 - } - }, - "versions": { - "truthfulqa_ar": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=gpt2", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ar-llama-7B.json b/evals/truthfulqa/truthfulqa_ar-llama-7B.json deleted file mode 100644 index 8eaf03b60bf7c8428a848aa8ce0dceeb1b8649da..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_ar-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ar": { - "mc1": 0.278137128072445, - "mc1_stderr": 0.016126799456170973, - "mc2": 0.4510826498021589, - "mc2_stderr": 0.01621099626555797 - } - }, - "versions": { - "truthfulqa_ar": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_bn-bloom-7b1.json b/evals/truthfulqa/truthfulqa_bn-bloom-7b1.json deleted file mode 100644 index 3f0f5acb8958dae16338d6f3538d1c45fd1d5be8..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_bn-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_bn": { - "mc1": 0.26248399487836105, - "mc1_stderr": 0.015753963575796108, - "mc2": 0.48383834952509674, - "mc2_stderr": 0.01620495508989729 - } - }, - "versions": { - "truthfulqa_bn": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_bn-llama-7B.json b/evals/truthfulqa/truthfulqa_bn-llama-7B.json deleted file mode 100644 index 3c9c3b9489ea6ca298a17d5e7f442b2a42217543..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_bn-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_bn": { - "mc1": 0.2765685019206146, - "mc1_stderr": 0.016015952210618845, - "mc2": 0.5123820777474262, - "mc2_stderr": 0.01680032112327857 - } - }, - "versions": { - "truthfulqa_bn": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ca-bloom-7b1.json b/evals/truthfulqa/truthfulqa_ca-bloom-7b1.json deleted file mode 100644 index ef3e258e39add637921d92a92ce41f916a905cce..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_ca-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ca": { - "mc1": 0.24324324324324326, - "mc1_stderr": 0.015401665455019378, - "mc2": 0.4007618819736215, - "mc2_stderr": 0.015273518926419462 - } - }, - "versions": { - "truthfulqa_ca": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ca-llama-7B.json b/evals/truthfulqa/truthfulqa_ca-llama-7B.json deleted file mode 100644 index 279d4a6dd8300c3fdf93c1251995060f831d8f3d..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_ca-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ca": { - "mc1": 0.23423423423423423, - "mc1_stderr": 0.015203455154765249, - "mc2": 0.3889981216363435, - "mc2_stderr": 0.015057090749567676 - } - }, - "versions": { - "truthfulqa_ca": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_da-bloom-7b1.json b/evals/truthfulqa/truthfulqa_da-bloom-7b1.json deleted file mode 100644 index 74bcde7ba97432b4b569a73b77198ee611a380d0..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_da-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_da": { - "mc1": 0.26248399487836105, - "mc1_stderr": 0.01575396357579612, - "mc2": 0.4375025988127945, - "mc2_stderr": 0.01662443223981383 - } - }, - "versions": { - "truthfulqa_da": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_da-llama-7B.json b/evals/truthfulqa/truthfulqa_da-llama-7B.json deleted file mode 100644 index 08c1d956bd1de9206944f2438d9f56022794d2d5..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_da-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_da": { - "mc1": 0.2573623559539053, - "mc1_stderr": 0.01565358047400349, - "mc2": 0.4161317873775415, - "mc2_stderr": 0.015138516880476807 - } - }, - "versions": { - "truthfulqa_da": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_de-bloom-7b1.json b/evals/truthfulqa/truthfulqa_de-bloom-7b1.json deleted file mode 100644 index 068e8c49c1d499f40d02aeb1b4037569845e3f39..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_de-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_de": { - "mc1": 0.24746192893401014, - "mc1_stderr": 0.015382646812261825, - "mc2": 0.43516734073709074, - "mc2_stderr": 0.015914493454090475 - } - }, - "versions": { - "truthfulqa_de": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_de-llama-7B.json b/evals/truthfulqa/truthfulqa_de-llama-7B.json deleted file mode 100644 index 870d9cc5a8bc73c2ca376de43d027b704b474970..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_de-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_de": { - "mc1": 0.233502538071066, - "mc1_stderr": 0.015080432502225448, - "mc2": 0.38322430555832593, - "mc2_stderr": 0.014662714095687 - } - }, - "versions": { - "truthfulqa_de": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_es-bloom-7b1.json b/evals/truthfulqa/truthfulqa_es-bloom-7b1.json deleted file mode 100644 index ff2caf3355fd7554ac124714fa094f7631c4b942..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_es-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_es": { - "mc1": 0.24714828897338403, - "mc1_stderr": 0.015366339219335662, - "mc2": 0.4037104105160595, - "mc2_stderr": 0.014621192787404666 - } - }, - "versions": { - "truthfulqa_es": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_es-llama-7B.json b/evals/truthfulqa/truthfulqa_es-llama-7B.json deleted file mode 100644 index 57d59d5a6d7fcd5e98b4558ed333d506ab551069..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_es-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_es": { - "mc1": 0.22686945500633712, - "mc1_stderr": 0.014919398735157142, - "mc2": 0.3704736235055417, - "mc2_stderr": 0.014441434139778718 - } - }, - "versions": { - "truthfulqa_es": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_eu-bloom-7b1.json b/evals/truthfulqa/truthfulqa_eu-bloom-7b1.json deleted file mode 100644 index 0af0c1ab614e35a49f6251d7b28e594279fd4640..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_eu-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_eu": { - "mc1": 0.26098191214470284, - "mc1_stderr": 0.015795849655411115, - "mc2": 0.4458532690626118, - "mc2_stderr": 0.016282676760451684 - } - }, - "versions": { - "truthfulqa_eu": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_eu-llama-7B.json b/evals/truthfulqa/truthfulqa_eu-llama-7B.json deleted file mode 100644 index 173bbf1cdee4e48adcce1026ba92eea153711152..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_eu-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_eu": { - "mc1": 0.22739018087855298, - "mc1_stderr": 0.015075655972442521, - "mc2": 0.4067861653338961, - "mc2_stderr": 0.016617765169363637 - } - }, - "versions": { - "truthfulqa_eu": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_fr-bloom-7b1.json b/evals/truthfulqa/truthfulqa_fr-bloom-7b1.json deleted file mode 100644 index 59d411be1a435aa79d393d5234b98b20153fa489..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_fr-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_fr": { - "mc1": 0.2604828462515883, - "mc1_stderr": 0.015654976408037494, - "mc2": 0.40875422704780084, - "mc2_stderr": 0.014771598297171899 - } - }, - "versions": { - "truthfulqa_fr": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_fr-llama-7B.json b/evals/truthfulqa/truthfulqa_fr-llama-7B.json deleted file mode 100644 index f2cf1301239dab8cdd09c7e41a803f442a37aaff..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_fr-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_fr": { - "mc1": 0.2388818297331639, - "mc1_stderr": 0.015209198584184304, - "mc2": 0.3992160965584639, - "mc2_stderr": 0.014275541507345014 - } - }, - "versions": { - "truthfulqa_fr": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_gu-bloom-7b1.json b/evals/truthfulqa/truthfulqa_gu-bloom-7b1.json deleted file mode 100644 index 2e428d6ce6e3db9502a089fe9c54da6bd4d4e2fa..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_gu-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_gu": { - "mc1": 0.2585499316005472, - "mc1_stderr": 0.016205100857272815, - "mc2": 0.4553767987804663, - "mc2_stderr": 0.01727282663518889 - } - }, - "versions": { - "truthfulqa_gu": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_gu-llama-7B.json b/evals/truthfulqa/truthfulqa_gu-llama-7B.json deleted file mode 100644 index a439f0578967f86f0d5cd4f63d5c8655fa596680..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_gu-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_gu": { - "mc1": 0.2612859097127223, - "mc1_stderr": 0.016260532228493024, - "mc2": 0.42794967344995166, - "mc2_stderr": 0.017270715140237876 - } - }, - "versions": { - "truthfulqa_gu": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_hi-bloom-7b1.json b/evals/truthfulqa/truthfulqa_hi-bloom-7b1.json deleted file mode 100644 index 8576765f053944525c9eb8954a99cd9ce76a4d1c..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_hi-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_hi": { - "mc1": 0.2613195342820181, - "mc1_stderr": 0.01581268409688839, - "mc2": 0.44399239540333224, - "mc2_stderr": 0.015881067623592954 - } - }, - "versions": { - "truthfulqa_hi": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_hi-llama-7B.json b/evals/truthfulqa/truthfulqa_hi-llama-7B.json deleted file mode 100644 index e21366d36ceaf8601da21d648ee943852d911560..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_hi-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_hi": { - "mc1": 0.2794307891332471, - "mc1_stderr": 0.016149769533382482, - "mc2": 0.47236250377441935, - "mc2_stderr": 0.016709755014514986 - } - }, - "versions": { - "truthfulqa_hi": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_hr-bloom-7b1.json b/evals/truthfulqa/truthfulqa_hr-bloom-7b1.json deleted file mode 100644 index 672cbb9e39a1a7e019ee45709b90eec7588d5235..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_hr-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_hr": { - "mc1": 0.2808842652795839, - "mc1_stderr": 0.016217447153754203, - "mc2": 0.4793142433106635, - "mc2_stderr": 0.01663884163172186 - } - }, - "versions": { - "truthfulqa_hr": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_hr-llama-7B.json b/evals/truthfulqa/truthfulqa_hr-llama-7B.json deleted file mode 100644 index 3d1d11b77357870c8e0a53dcbafb4e8980c01f9f..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_hr-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_hr": { - "mc1": 0.24187256176853056, - "mc1_stderr": 0.015451967985505181, - "mc2": 0.41709863857620866, - "mc2_stderr": 0.01546097371205123 - } - }, - "versions": { - "truthfulqa_hr": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_hu-bloom-7b1.json b/evals/truthfulqa/truthfulqa_hu-bloom-7b1.json deleted file mode 100644 index 54432301293d130afd643eb21b0db15d9f209b67..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_hu-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_hu": { - "mc1": 0.26718547341115434, - "mc1_stderr": 0.015946232556288537, - "mc2": 0.49994152241197887, - "mc2_stderr": 0.01703257765685213 - } - }, - "versions": { - "truthfulqa_hu": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_hu-llama-7B.json b/evals/truthfulqa/truthfulqa_hu-llama-7B.json deleted file mode 100644 index ccaefb69215b32c9208f055af2f3a1cf9c8760bc..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_hu-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_hu": { - "mc1": 0.24643320363164722, - "mc1_stderr": 0.015529773657188122, - "mc2": 0.4311628343540659, - "mc2_stderr": 0.01555491548978951 - } - }, - "versions": { - "truthfulqa_hu": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_hy-bloom-7b1.json b/evals/truthfulqa/truthfulqa_hy-bloom-7b1.json deleted file mode 100644 index debcc1a8876d402702e3c9c496eb89bc3ad0f709..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_hy-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_hy": { - "mc1": 0.2585895117540687, - "mc1_stderr": 0.018636539619637415, - "mc2": 0.44943643103428205, - "mc2_stderr": 0.02033094239607556 - } - }, - "versions": { - "truthfulqa_hy": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_hy-llama-7B.json b/evals/truthfulqa/truthfulqa_hy-llama-7B.json deleted file mode 100644 index 433e953ddf49c551d21da840cc57c95f665a192a..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_hy-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_hy": { - "mc1": 0.2585895117540687, - "mc1_stderr": 0.018636539619637415, - "mc2": 0.4550713950263578, - "mc2_stderr": 0.020036965332656535 - } - }, - "versions": { - "truthfulqa_hy": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_id-bloom-7b1.json b/evals/truthfulqa/truthfulqa_id-bloom-7b1.json deleted file mode 100644 index d6ab9911631d5cf4f7387d705739f249f1da7de2..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_id-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_id": { - "mc1": 0.2532133676092545, - "mc1_stderr": 0.01560023256901984, - "mc2": 0.4031249320049949, - "mc2_stderr": 0.015031705347347539 - } - }, - "versions": { - "truthfulqa_id": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_id-llama-7B.json b/evals/truthfulqa/truthfulqa_id-llama-7B.json deleted file mode 100644 index 0967fc5439ed4e2c5217256c546b2f76aa443e6b..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_id-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_id": { - "mc1": 0.2570694087403599, - "mc1_stderr": 0.015677933234808462, - "mc2": 0.3981714076698207, - "mc2_stderr": 0.015520404506158571 - } - }, - "versions": { - "truthfulqa_id": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_it-bloom-7b1.json b/evals/truthfulqa/truthfulqa_it-bloom-7b1.json deleted file mode 100644 index 9599a6d59070c187811a37aa2dcaec596f4e300c..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_it-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_it": { - "mc1": 0.2707535121328225, - "mc1_stderr": 0.015889888362560486, - "mc2": 0.4374801864181257, - "mc2_stderr": 0.015955762711633903 - } - }, - "versions": { - "truthfulqa_it": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_it-llama-7B.json b/evals/truthfulqa/truthfulqa_it-llama-7B.json deleted file mode 100644 index 221af91b2b82bf70d904265c27c0279db93872af..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_it-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_it": { - "mc1": 0.24521072796934865, - "mc1_stderr": 0.015384352284543929, - "mc2": 0.39642666716879443, - "mc2_stderr": 0.01483705265700183 - } - }, - "versions": { - "truthfulqa_it": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_kn-bloom-7b1.json b/evals/truthfulqa/truthfulqa_kn-bloom-7b1.json deleted file mode 100644 index b116af421e76c9c9f0d685f0a1156de33d48fa41..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_kn-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_kn": { - "mc1": 0.28466076696165193, - "mc1_stderr": 0.017343050775840425, - "mc2": 0.49109028617714945, - "mc2_stderr": 0.017608862092749467 - } - }, - "versions": { - "truthfulqa_kn": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_kn-llama-7B.json b/evals/truthfulqa/truthfulqa_kn-llama-7B.json deleted file mode 100644 index f05f0339406ac5574d7a1dc62bddacb292f097eb..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_kn-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_kn": { - "mc1": 0.275811209439528, - "mc1_stderr": 0.017176612615872052, - "mc2": 0.4635130117214921, - "mc2_stderr": 0.01825683954680752 - } - }, - "versions": { - "truthfulqa_kn": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ml-bloom-7b1.json b/evals/truthfulqa/truthfulqa_ml-bloom-7b1.json deleted file mode 100644 index d2ada8ce66115bbf7e7e2ac501b996bc7b9ab3a1..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_ml-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ml": { - "mc1": 0.260806916426513, - "mc1_stderr": 0.01667907195342198, - "mc2": 0.47996911862138697, - "mc2_stderr": 0.017778690252427683 - } - }, - "versions": { - "truthfulqa_ml": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ml-llama-7B.json b/evals/truthfulqa/truthfulqa_ml-llama-7B.json deleted file mode 100644 index 4dd3caeb8a76c583e812d275589a2c18156d6935..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_ml-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ml": { - "mc1": 0.2824207492795389, - "mc1_stderr": 0.01710080754090615, - "mc2": 0.5024391989231584, - "mc2_stderr": 0.017936047828800445 - } - }, - "versions": { - "truthfulqa_ml": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_mr-bloom-7b1.json b/evals/truthfulqa/truthfulqa_mr-bloom-7b1.json deleted file mode 100644 index 181033bdf126dc47bfc09557ea24531f4fead727..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_mr-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_mr": { - "mc1": 0.2761780104712042, - "mc1_stderr": 0.016186321628712155, - "mc2": 0.4765064151203332, - "mc2_stderr": 0.016772466571288412 - } - }, - "versions": { - "truthfulqa_mr": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_mr-llama-7B.json b/evals/truthfulqa/truthfulqa_mr-llama-7B.json deleted file mode 100644 index a1fcd59738ae0b14a296aba32a13e2bda55370e3..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_mr-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_mr": { - "mc1": 0.2905759162303665, - "mc1_stderr": 0.016436922328865435, - "mc2": 0.49306373435254724, - "mc2_stderr": 0.016980148211258952 - } - }, - "versions": { - "truthfulqa_mr": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ne-bloom-7b1.json b/evals/truthfulqa/truthfulqa_ne-bloom-7b1.json deleted file mode 100644 index 89defff7cdf83326b83aee4c35f6b7ab666393c0..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_ne-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ne": { - "mc1": 0.28811369509043927, - "mc1_stderr": 0.0162891162717815, - "mc2": 0.46164155205805624, - "mc2_stderr": 0.016689007834004295 - } - }, - "versions": { - "truthfulqa_ne": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ne-llama-7B.json b/evals/truthfulqa/truthfulqa_ne-llama-7B.json deleted file mode 100644 index b18b50165478e2f5e3938b2978e51ae65ffb09b0..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_ne-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ne": { - "mc1": 0.29198966408268734, - "mc1_stderr": 0.016353615824015625, - "mc2": 0.4636310825029969, - "mc2_stderr": 0.016928691048242774 - } - }, - "versions": { - "truthfulqa_ne": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_nl_Llama-2-13b-hf.json b/evals/truthfulqa/truthfulqa_nl_Llama-2-13b-hf.json new file mode 100644 index 0000000000000000000000000000000000000000..cc50ce18862c928b9b51cbda3ac9cc0c13b71b40 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_nl_Llama-2-13b-hf.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_nl": { + "mc1": 0.2764331210191083, + "mc1_stderr": 0.01597262688062874, + "mc2": 0.4103755310313891, + "mc2_stderr": 0.014811313488625848 + } + }, + "versions": { + "truthfulqa_nl": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=meta-llama/Llama-2-13b-hf,use_accelerate=True,device_map_option=auto,dtype=bfloat16", + "batch_size": 8, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_nl_Llama-2-7b-chat-hf.json b/evals/truthfulqa/truthfulqa_nl_Llama-2-7b-chat-hf.json new file mode 100644 index 0000000000000000000000000000000000000000..ece12d09b26076267e338648ee7a6d36c649199c --- /dev/null +++ b/evals/truthfulqa/truthfulqa_nl_Llama-2-7b-chat-hf.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_nl": { + "mc1": 0.2917197452229299, + "mc1_stderr": 0.016234071293195287, + "mc2": 0.4462996697687161, + "mc2_stderr": 0.016161710042968205 + } + }, + "versions": { + "truthfulqa_nl": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=meta-llama/Llama-2-7b-chat-hf,use_accelerate=True,device_map_option=auto,dtype=bfloat16", + "batch_size": 64, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_nl_Llama-2-7b-hf.json b/evals/truthfulqa/truthfulqa_nl_Llama-2-7b-hf.json new file mode 100644 index 0000000000000000000000000000000000000000..ab4837ac43ff277111882cfd0e0a9fc9b5e05518 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_nl_Llama-2-7b-hf.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_nl": { + "mc1": 0.28152866242038216, + "mc1_stderr": 0.016062309899461683, + "mc2": 0.41626070733921117, + "mc2_stderr": 0.014914193769419527 + } + }, + "versions": { + "truthfulqa_nl": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,device_map_option=auto,dtype=bfloat16", + "batch_size": 64, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_nl_Mistral-7B-v0.1.json b/evals/truthfulqa/truthfulqa_nl_Mistral-7B-v0.1.json new file mode 100644 index 0000000000000000000000000000000000000000..f63906c6941530a86f973e4da2bce0c2ca6724e4 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_nl_Mistral-7B-v0.1.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_nl": { + "mc1": 0.3070063694267516, + "mc1_stderr": 0.01647328769082192, + "mc2": 0.45280570817630444, + "mc2_stderr": 0.015014728029135574 + } + }, + "versions": { + "truthfulqa_nl": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=mistralai/Mistral-7B-v0.1,use_accelerate=True,device_map_option=auto,dtype=bfloat16", + "batch_size": 64, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_nl_falcon-40b-ft-alpaca-dolly-dutch.json b/evals/truthfulqa/truthfulqa_nl_falcon-40b-ft-alpaca-dolly-dutch.json new file mode 100644 index 0000000000000000000000000000000000000000..ae35054ea6bbd7a2539e1f37f6066ebf947d5354 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_nl_falcon-40b-ft-alpaca-dolly-dutch.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_nl": { + "mc1": 0.310828025477707, + "mc1_stderr": 0.016529733724696277, + "mc2": 0.4460845208916539, + "mc2_stderr": 0.01476856418537487 + } + }, + "versions": { + "truthfulqa_nl": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=BramVanroy/falcon-40b-ft-alpaca-dolly-dutch,use_accelerate=True,device_map_option=auto,dtype=bfloat16", + "batch_size": 8, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_nl_falcon-40b.json b/evals/truthfulqa/truthfulqa_nl_falcon-40b.json new file mode 100644 index 0000000000000000000000000000000000000000..48f1f48a671435726595dc9daa36cccf2f0a1daf --- /dev/null +++ b/evals/truthfulqa/truthfulqa_nl_falcon-40b.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_nl": { + "mc1": 0.2764331210191083, + "mc1_stderr": 0.01597262688062875, + "mc2": 0.4091336161450544, + "mc2_stderr": 0.014605140809282338 + } + }, + "versions": { + "truthfulqa_nl": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=tiiuae/falcon-40b,use_accelerate=True,device_map_option=auto,dtype=bfloat16", + "batch_size": 8, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_nl_llama2-13b-ft-mc4_nl_cleaned_tiny.json b/evals/truthfulqa/truthfulqa_nl_llama2-13b-ft-mc4_nl_cleaned_tiny.json new file mode 100644 index 0000000000000000000000000000000000000000..649cd0e84504b0783d539643aa070c34a5218f96 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_nl_llama2-13b-ft-mc4_nl_cleaned_tiny.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_nl": { + "mc1": 0.2751592356687898, + "mc1_stderr": 0.0159498029022655, + "mc2": 0.41816127879466414, + "mc2_stderr": 0.01474120131034505 + } + }, + "versions": { + "truthfulqa_nl": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=BramVanroy/llama2-13b-ft-mc4_nl_cleaned_tiny,use_accelerate=True,device_map_option=auto,dtype=bfloat16", + "batch_size": 8, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_nl_zephyr-7b-beta.json b/evals/truthfulqa/truthfulqa_nl_zephyr-7b-beta.json new file mode 100644 index 0000000000000000000000000000000000000000..66cbc553a965b6d420caf50420f7c0a71edff7e9 --- /dev/null +++ b/evals/truthfulqa/truthfulqa_nl_zephyr-7b-beta.json @@ -0,0 +1,23 @@ +{ + "results": { + "truthfulqa_nl": { + "mc1": 0.3719745222929936, + "mc1_stderr": 0.0172618443903749, + "mc2": 0.5294532108691418, + "mc2_stderr": 0.016221848481192833 + } + }, + "versions": { + "truthfulqa_nl": 1 + }, + "config": { + "model": "hf-auto", + "model_args": "pretrained=HuggingFaceH4/zephyr-7b-beta,use_accelerate=True,device_map_option=auto,dtype=bfloat16", + "batch_size": 64, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_pt-bloom-7b1.json b/evals/truthfulqa/truthfulqa_pt-bloom-7b1.json deleted file mode 100644 index d9c6cefe30e562acfb981870f9e593f27f720a3d..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_pt-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_pt": { - "mc1": 0.23857868020304568, - "mc1_stderr": 0.015192910034567013, - "mc2": 0.38894722340741417, - "mc2_stderr": 0.014531269277587645 - } - }, - "versions": { - "truthfulqa_pt": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_pt-llama-7B.json b/evals/truthfulqa/truthfulqa_pt-llama-7B.json deleted file mode 100644 index 1ae678becb49d878dc30174f2c390f2c1b5a1f49..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_pt-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_pt": { - "mc1": 0.22842639593908629, - "mc1_stderr": 0.014964922033138022, - "mc2": 0.3823261607330551, - "mc2_stderr": 0.014633193983144183 - } - }, - "versions": { - "truthfulqa_pt": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ro-bloom-7b1.json b/evals/truthfulqa/truthfulqa_ro-bloom-7b1.json deleted file mode 100644 index e9d6490be6beab45fd85e68d9df1e301bf2dff28..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_ro-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ro": { - "mc1": 0.26187419768934533, - "mc1_stderr": 0.015762378425124946, - "mc2": 0.4605371384706094, - "mc2_stderr": 0.016307442681458683 - } - }, - "versions": { - "truthfulqa_ro": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ro-llama-7B.json b/evals/truthfulqa/truthfulqa_ro-llama-7B.json deleted file mode 100644 index 26abd62509f8f15981ca8051421f879ea16ddc2f..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_ro-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ro": { - "mc1": 0.22849807445442877, - "mc1_stderr": 0.015052893222788351, - "mc2": 0.37047262828252514, - "mc2_stderr": 0.015022205435273333 - } - }, - "versions": { - "truthfulqa_ro": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ru-bloom-7b1.json b/evals/truthfulqa/truthfulqa_ru-bloom-7b1.json deleted file mode 100644 index 3347a51ef0c14c0658f692111f9112b52f876a5c..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_ru-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ru": { - "mc1": 0.30710659898477155, - "mc1_stderr": 0.016443354533552747, - "mc2": 0.49874761323987404, - "mc2_stderr": 0.016167778359600482 - } - }, - "versions": { - "truthfulqa_ru": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ru-llama-7B.json b/evals/truthfulqa/truthfulqa_ru-llama-7B.json deleted file mode 100644 index 54b06b11d61f59c9f47d987a96a9290c09921a27..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_ru-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ru": { - "mc1": 0.24619289340101522, - "mc1_stderr": 0.015356084872692898, - "mc2": 0.40938277991151933, - "mc2_stderr": 0.015252017769860154 - } - }, - "versions": { - "truthfulqa_ru": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_sk-bloom-7b1.json b/evals/truthfulqa/truthfulqa_sk-bloom-7b1.json deleted file mode 100644 index 1132cb125d8848afa4abc9ecef17405375f5ccc0..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_sk-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_sk": { - "mc1": 0.2390745501285347, - "mc1_stderr": 0.015301260856408254, - "mc2": 0.43782616190313467, - "mc2_stderr": 0.01657761354751216 - } - }, - "versions": { - "truthfulqa_sk": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_sk-llama-7B.json b/evals/truthfulqa/truthfulqa_sk-llama-7B.json deleted file mode 100644 index 71e866145020816a8524a8bd50cddf94af5042ea..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_sk-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_sk": { - "mc1": 0.2275064267352185, - "mc1_stderr": 0.015039512631474048, - "mc2": 0.40729144857566124, - "mc2_stderr": 0.015845697731465 - } - }, - "versions": { - "truthfulqa_sk": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_sr-bloom-7b1.json b/evals/truthfulqa/truthfulqa_sr-bloom-7b1.json deleted file mode 100644 index 75efa51eca0c0d99414987b87632f9c19f581a21..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_sr-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_sr": { - "mc1": 0.2878980891719745, - "mc1_stderr": 0.016170834614246097, - "mc2": 0.4604993074094113, - "mc2_stderr": 0.01649631560714403 - } - }, - "versions": { - "truthfulqa_sr": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_sr-llama-7B.json b/evals/truthfulqa/truthfulqa_sr-llama-7B.json deleted file mode 100644 index a65b681172e15a187d19448a27058ec125e2b1f1..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_sr-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_sr": { - "mc1": 0.26878980891719745, - "mc1_stderr": 0.01583322873155152, - "mc2": 0.422701657829082, - "mc2_stderr": 0.015374851085961157 - } - }, - "versions": { - "truthfulqa_sr": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_sv-bloom-7b1.json b/evals/truthfulqa/truthfulqa_sv-bloom-7b1.json deleted file mode 100644 index 85698716bf120fd641d6dfcb551bdb145d17bc87..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_sv-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_sv": { - "mc1": 0.2622739018087855, - "mc1_stderr": 0.015821052272364522, - "mc2": 0.44572489319670916, - "mc2_stderr": 0.016517364176123605 - } - }, - "versions": { - "truthfulqa_sv": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_sv-llama-7B.json b/evals/truthfulqa/truthfulqa_sv-llama-7B.json deleted file mode 100644 index f2f88649e17469e2a7fdc44f296619fe407feac6..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_sv-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_sv": { - "mc1": 0.2596899224806202, - "mc1_stderr": 0.01577046983489191, - "mc2": 0.4052891370296314, - "mc2_stderr": 0.01500679891573553 - } - }, - "versions": { - "truthfulqa_sv": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ta-bloom-7b1.json b/evals/truthfulqa/truthfulqa_ta-bloom-7b1.json deleted file mode 100644 index 956d773e26ebf10fc23669bb18d5b9df924be462..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_ta-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ta": { - "mc1": 0.2651413189771198, - "mc1_stderr": 0.016204613164182584, - "mc2": 0.48348066773619114, - "mc2_stderr": 0.016887213348384833 - } - }, - "versions": { - "truthfulqa_ta": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_ta-llama-7B.json b/evals/truthfulqa/truthfulqa_ta-llama-7B.json deleted file mode 100644 index 3edaa546d22cbb705a02af8433a7b3ecb4f29213..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_ta-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_ta": { - "mc1": 0.28263795423956933, - "mc1_stderr": 0.016530366611189357, - "mc2": 0.5032626048969708, - "mc2_stderr": 0.01719880976895468 - } - }, - "versions": { - "truthfulqa_ta": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_te-bloom-7b1.json b/evals/truthfulqa/truthfulqa_te-bloom-7b1.json deleted file mode 100644 index d139c759617d41dd724dc54443b08c7eba5c2a83..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_te-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_te": { - "mc1": 0.2652482269503546, - "mc1_stderr": 0.016638349265004355, - "mc2": 0.4612285746093752, - "mc2_stderr": 0.017504699336599025 - } - }, - "versions": { - "truthfulqa_te": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_te-llama-7B.json b/evals/truthfulqa/truthfulqa_te-llama-7B.json deleted file mode 100644 index b7371487cfa5f3b205258d0c63aa1d722e304a75..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_te-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_te": { - "mc1": 0.2851063829787234, - "mc1_stderr": 0.01701523103469595, - "mc2": 0.4821795923320059, - "mc2_stderr": 0.01784811574301116 - } - }, - "versions": { - "truthfulqa_te": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_uk-bloom-7b1.json b/evals/truthfulqa/truthfulqa_uk-bloom-7b1.json deleted file mode 100644 index da866d1706ae757888bf53041acc427c30e98a06..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_uk-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_uk": { - "mc1": 0.3090909090909091, - "mc1_stderr": 0.01666442755255745, - "mc2": 0.5143873310692731, - "mc2_stderr": 0.016755211041268873 - } - }, - "versions": { - "truthfulqa_uk": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_uk-llama-7B.json b/evals/truthfulqa/truthfulqa_uk-llama-7B.json deleted file mode 100644 index 3a420b35b0478fcc320798e8287f213c061df4fe..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_uk-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_uk": { - "mc1": 0.23636363636363636, - "mc1_stderr": 0.015320412612327241, - "mc2": 0.4141829984231552, - "mc2_stderr": 0.01560702677887637 - } - }, - "versions": { - "truthfulqa_uk": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_vi-bloom-7b1.json b/evals/truthfulqa/truthfulqa_vi-bloom-7b1.json deleted file mode 100644 index f21113c3d005bd269763438b047147bb50ac5125..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_vi-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_vi": { - "mc1": 0.2968152866242038, - "mc1_stderr": 0.016316229722585934, - "mc2": 0.44721474578334436, - "mc2_stderr": 0.015073430494043749 - } - }, - "versions": { - "truthfulqa_vi": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_vi-llama-7B.json b/evals/truthfulqa/truthfulqa_vi-llama-7B.json deleted file mode 100644 index bc5992da0821ee82c8ce26e99fb73e6e2f872651..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_vi-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_vi": { - "mc1": 0.2445859872611465, - "mc1_stderr": 0.015351480770855935, - "mc2": 0.42975481561967727, - "mc2_stderr": 0.01625176801732652 - } - }, - "versions": { - "truthfulqa_vi": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_zh-bloom-7b1.json b/evals/truthfulqa/truthfulqa_zh-bloom-7b1.json deleted file mode 100644 index 7496dee8d8893c925eac3f5a5de1723f69d1ad77..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_zh-bloom-7b1.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_zh": { - "mc1": 0.22842639593908629, - "mc1_stderr": 0.014964922033138017, - "mc2": 0.38822244050439564, - "mc2_stderr": 0.014953544130092178 - } - }, - "versions": { - "truthfulqa_zh": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=bigscience/bloom-7b1", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file diff --git a/evals/truthfulqa/truthfulqa_zh-llama-7B.json b/evals/truthfulqa/truthfulqa_zh-llama-7B.json deleted file mode 100644 index eeab4eff270462460733b050ac068062679cc507..0000000000000000000000000000000000000000 --- a/evals/truthfulqa/truthfulqa_zh-llama-7B.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "results": { - "truthfulqa_zh": { - "mc1": 0.26649746192893403, - "mc1_stderr": 0.015760136800242356, - "mc2": 0.43598966702035913, - "mc2_stderr": 0.015850355717645676 - } - }, - "versions": { - "truthfulqa_zh": 1 - }, - "config": { - "model": "hf-auto", - "model_args": "pretrained=/sensei-fs/users/daclai/uoChatGPT/llama-7B", - "batch_size": 1, - "device": "cuda", - "no_cache": false, - "limit": null, - "bootstrap_iters": 100000, - "description_dict": {} - } -} \ No newline at end of file