natolambert
commited on
Commit
•
bb95637
1
Parent(s):
61c1fca
up
Browse files- src/constants.py +3 -2
src/constants.py
CHANGED
@@ -31,7 +31,7 @@ example_counts = {
|
|
31 |
"mt-bench-easy": 28,
|
32 |
"mt-bench-med": 40,
|
33 |
"mt-bench-hard": 37,
|
34 |
-
"math-prm": 984, # actual length 447, upweighting to be equal to code
|
35 |
"refusals-dangerous": 100,
|
36 |
"refusals-offensive": 100,
|
37 |
"llmbar-natural": 100,
|
@@ -54,5 +54,6 @@ subset_mapping = {
|
|
54 |
"Chat": ["alpacaeval-easy", "alpacaeval-length", "alpacaeval-hard", "mt-bench-easy", "mt-bench-med"],
|
55 |
"Chat Hard": ["mt-bench-hard", "llmbar-natural", "llmbar-adver-neighbor", "llmbar-adver-GPTInst", "llmbar-adver-GPTOut", "llmbar-adver-manual"],
|
56 |
"Safety": ["refusals-dangerous", "refusals-offensive", "xstest-should-refuse", "xstest-should-respond", "donotanswer"],
|
57 |
-
"Reasoning": ["math-prm",
|
|
|
58 |
}
|
|
|
31 |
"mt-bench-easy": 28,
|
32 |
"mt-bench-med": 40,
|
33 |
"mt-bench-hard": 37,
|
34 |
+
# "math-prm": 984, # actual length 447, upweighting to be equal to code
|
35 |
"refusals-dangerous": 100,
|
36 |
"refusals-offensive": 100,
|
37 |
"llmbar-natural": 100,
|
|
|
54 |
"Chat": ["alpacaeval-easy", "alpacaeval-length", "alpacaeval-hard", "mt-bench-easy", "mt-bench-med"],
|
55 |
"Chat Hard": ["mt-bench-hard", "llmbar-natural", "llmbar-adver-neighbor", "llmbar-adver-GPTInst", "llmbar-adver-GPTOut", "llmbar-adver-manual"],
|
56 |
"Safety": ["refusals-dangerous", "refusals-offensive", "xstest-should-refuse", "xstest-should-respond", "donotanswer"],
|
57 |
+
"Reasoning": [#"math-prm",
|
58 |
+
"hep-cpp", "hep-go", "hep-java", "hep-js", "hep-python", "hep-rust"]
|
59 |
}
|