Spaces:

Yeyito
/

llm_contamination_detector

Runtime error

App Files Files Community

Yeyito commited on Dec 20, 2023

Commit

5e2e1fb

•

1 Parent(s): d74f5ae

ref model and tuple error fix

Browse files

Files changed (1) hide show

app.py +11 -7

app.py CHANGED Viewed

@@ -34,7 +34,6 @@ COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default an
 TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
 # CONFIGURATION:
-ref_model = "mistralai/Mistral-7B-v0.1"
 test_datasets = ["truthful_qa","cais/mmlu","ai2_arc","gsm8k","Rowan/hellaswag","winogrande"]
 modelQueue = (pd.read_csv('data/queue.csv')).values.tolist()
 print(modelQueue)
@@ -48,11 +47,11 @@ def formatr(result):
     result = result.replace(" ","")
     return result
-def save_to_txt(model, results, model_type):
     file_path = "data/code_eval_board.csv"
     with open(file_path, "a") as f:
-        f.write(f"\n{model_type},{model}," + str(formatr(results["arc"])) + "," + str(formatr(results["hellaswag"])) + "," + str(formatr(results["mmlu"])) + "," + str(formatr(results["truthfulQA"])) + "," + str(formatr(results["winogrande"])) + "," + str(formatr(results["gsm8k"])))
         f.close()
 def run_test(model,ref_model,data):
@@ -67,8 +66,7 @@ def run_test(model,ref_model,data):
                 ratio_gen=0.4
             ) # Call the main function in detect-pretrain-code-contamination/src/run.py
-def evaluate(model,model_type):
-    global ref_model
     print(f"|| EVALUATING {model} ||")
     results = {
         "arc": run_test(model, ref_model, test_datasets[2]),
@@ -81,14 +79,14 @@ def evaluate(model,model_type):
     }
     # Save to .txt file in /Evaluations/{model}
-    save_to_txt(model, results, model_type)
     return "\n".join([f"{k}:{results[k]}" for k in results])
 def worker_thread():
     global modelQueue, server
     while True:
         for submission in modelQueue:
-            #evaluate(submission[1],submission[0].split(" ")[0])
             #modelQueue.pop(modelQueue.index(submission))
             # Uncomment those lines in order to begin testing, I test these models outside of this space and later commit the results back.
@@ -110,6 +108,12 @@ def queue(model,model_type,ref_model):
         f.write(f"\n{model_type},{model},{ref_model}")
         f.close()
     print(f"QUEUE:\n{modelQueue}")
 ### bigcode/bigcode-models-leaderboard

 TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
 # CONFIGURATION:
 test_datasets = ["truthful_qa","cais/mmlu","ai2_arc","gsm8k","Rowan/hellaswag","winogrande"]
 modelQueue = (pd.read_csv('data/queue.csv')).values.tolist()
 print(modelQueue)
     result = result.replace(" ","")
     return result
+def save_to_txt(model, results, model_type,ref_model):
     file_path = "data/code_eval_board.csv"
     with open(file_path, "a") as f:
+        f.write(f"\n{model_type},{model}," + str(formatr(results["arc"])) + "," + str(formatr(results["hellaswag"])) + "," + str(formatr(results["mmlu"])) + "," + str(formatr(results["truthfulQA"])) + "," + str(formatr(results["winogrande"])) + "," + str(formatr(results["gsm8k"])) + f",{ref_model}")
         f.close()
 def run_test(model,ref_model,data):
                 ratio_gen=0.4
             ) # Call the main function in detect-pretrain-code-contamination/src/run.py
+def evaluate(model,model_type,ref_model):
     print(f"|| EVALUATING {model} ||")
     results = {
         "arc": run_test(model, ref_model, test_datasets[2]),
     }
     # Save to .txt file in /Evaluations/{model}
+    save_to_txt(model, results, model_type,ref_model)
     return "\n".join([f"{k}:{results[k]}" for k in results])
 def worker_thread():
     global modelQueue, server
     while True:
         for submission in modelQueue:
+            #evaluate(submission[1],submission[0].split(" ")[0],submission[2])
             #modelQueue.pop(modelQueue.index(submission))
             # Uncomment those lines in order to begin testing, I test these models outside of this space and later commit the results back.
         f.write(f"\n{model_type},{model},{ref_model}")
         f.close()
     print(f"QUEUE:\n{modelQueue}")
+    eval_entry = {
+        "model": model,
+        "model_type": model_type,
+        "ref_model": ref_model,
+    }
 ### bigcode/bigcode-models-leaderboard