Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
danielz02
commited on
Shorten perspective display name
Browse files- src/display/about.py +13 -13
- src/leaderboard/read_evals.py +1 -1
src/display/about.py
CHANGED
@@ -13,12 +13,12 @@ class Task:
|
|
13 |
class Tasks(Enum):
|
14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
15 |
task0 = Task("toxicity", "aggregated-results", "Toxicity")
|
16 |
-
task1 = Task("stereotype", "aggregated-results", "Stereotype
|
17 |
-
task2 = Task("adv", "aggregated-results", "
|
18 |
-
task3 = Task("ood", "aggregated-results", "OoD
|
19 |
-
task4 = Task("adv_demo", "aggregated-results", "
|
20 |
task5 = Task("privacy", "aggregated-results", "Privacy")
|
21 |
-
task6 = Task("ethics", "aggregated-results", "
|
22 |
task7 = Task("fairness", "aggregated-results", "Fairness")
|
23 |
|
24 |
|
@@ -41,14 +41,14 @@ limitations, and potential risks involved in deploying these state-of-the-art La
|
|
41 |
|
42 |
This project is organized around the following eight primary perspectives of trustworthiness, including:
|
43 |
|
44 |
-
Toxicity
|
45 |
-
Stereotype and bias
|
46 |
-
Adversarial robustness
|
47 |
-
Out-of-Distribution Robustness
|
48 |
-
Privacy
|
49 |
-
Robustness to Adversarial Demonstrations
|
50 |
-
Machine Ethics
|
51 |
-
Fairness
|
52 |
|
53 |
## Reproducibility
|
54 |
To reproduce our results, checkout https://github.com/AI-secure/DecodingTrust
|
|
|
13 |
class Tasks(Enum):
|
14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
15 |
task0 = Task("toxicity", "aggregated-results", "Toxicity")
|
16 |
+
task1 = Task("stereotype", "aggregated-results", "Stereotype")
|
17 |
+
task2 = Task("adv", "aggregated-results", "Adv Robustness")
|
18 |
+
task3 = Task("ood", "aggregated-results", "OoD")
|
19 |
+
task4 = Task("adv_demo", "aggregated-results", "Adv Demo")
|
20 |
task5 = Task("privacy", "aggregated-results", "Privacy")
|
21 |
+
task6 = Task("ethics", "aggregated-results", "Ethics")
|
22 |
task7 = Task("fairness", "aggregated-results", "Fairness")
|
23 |
|
24 |
|
|
|
41 |
|
42 |
This project is organized around the following eight primary perspectives of trustworthiness, including:
|
43 |
|
44 |
+
+ Toxicity
|
45 |
+
+ Stereotype and bias
|
46 |
+
+ Adversarial robustness
|
47 |
+
+ Out-of-Distribution Robustness
|
48 |
+
+ Privacy
|
49 |
+
+ Robustness to Adversarial Demonstrations
|
50 |
+
+ Machine Ethics
|
51 |
+
+ Fairness
|
52 |
|
53 |
## Reproducibility
|
54 |
To reproduce our results, checkout https://github.com/AI-secure/DecodingTrust
|
src/leaderboard/read_evals.py
CHANGED
@@ -37,7 +37,6 @@ class EvalResult:
|
|
37 |
data = json.load(fp)
|
38 |
|
39 |
config = data.get("config")
|
40 |
-
print(config)
|
41 |
|
42 |
# Precision
|
43 |
precision = Precision.from_str(config.get("model_dtype"))
|
@@ -97,6 +96,7 @@ class EvalResult:
|
|
97 |
try:
|
98 |
with open(request_file, "r") as f:
|
99 |
request = json.load(f)
|
|
|
100 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
101 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
102 |
self.license = request.get("license", "?")
|
|
|
37 |
data = json.load(fp)
|
38 |
|
39 |
config = data.get("config")
|
|
|
40 |
|
41 |
# Precision
|
42 |
precision = Precision.from_str(config.get("model_dtype"))
|
|
|
96 |
try:
|
97 |
with open(request_file, "r") as f:
|
98 |
request = json.load(f)
|
99 |
+
print("Read Request", request)
|
100 |
self.model_type = ModelType.from_str(request.get("model_type", ""))
|
101 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
102 |
self.license = request.get("license", "?")
|