Spaces:
Running
Running
Joshua Kravitz
commited on
Commit
·
566f3c9
1
Parent(s):
bb1a637
feat: Launch
Browse files- dgeb/tasks/tasks.py +7 -7
- leaderboard/app.py +6 -5
dgeb/tasks/tasks.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
"""Task abstract class for evaluation and results."""
|
2 |
|
3 |
import logging
|
4 |
-
from
|
5 |
-
from importlib.metadata import version
|
6 |
from enum import Enum
|
|
|
|
|
|
|
7 |
import datasets
|
8 |
from pydantic import BaseModel, model_validator
|
9 |
-
from abc import ABC, abstractmethod
|
10 |
-
|
11 |
|
12 |
# HACK: if Modality is not defined, then import it from modality.py
|
13 |
try:
|
@@ -50,7 +50,7 @@ class LayerResult(BaseModel):
|
|
50 |
metrics: List[TaskMetric]
|
51 |
|
52 |
|
53 |
-
class
|
54 |
hf_name: str
|
55 |
num_layers: int
|
56 |
num_params: int
|
@@ -87,7 +87,7 @@ class TaskResult(BaseModel):
|
|
87 |
dgeb_version: str
|
88 |
task: "TaskMetadata"
|
89 |
# TODO: Convert model to ModelMetadata
|
90 |
-
model:
|
91 |
results: List[LayerResult]
|
92 |
|
93 |
@model_validator(mode="after")
|
@@ -105,7 +105,7 @@ class TaskResult(BaseModel):
|
|
105 |
def from_dict(
|
106 |
task_metadata: "TaskMetadata",
|
107 |
layer_results: LayerResult,
|
108 |
-
model_metadata:
|
109 |
):
|
110 |
return TaskResult(
|
111 |
dgeb_version=version("dgeb"),
|
|
|
1 |
"""Task abstract class for evaluation and results."""
|
2 |
|
3 |
import logging
|
4 |
+
from abc import ABC, abstractmethod
|
|
|
5 |
from enum import Enum
|
6 |
+
from importlib.metadata import version
|
7 |
+
from typing import Any, List, Literal, Optional
|
8 |
+
|
9 |
import datasets
|
10 |
from pydantic import BaseModel, model_validator
|
|
|
|
|
11 |
|
12 |
# HACK: if Modality is not defined, then import it from modality.py
|
13 |
try:
|
|
|
50 |
metrics: List[TaskMetric]
|
51 |
|
52 |
|
53 |
+
class DGEBModel(BaseModel):
|
54 |
hf_name: str
|
55 |
num_layers: int
|
56 |
num_params: int
|
|
|
87 |
dgeb_version: str
|
88 |
task: "TaskMetadata"
|
89 |
# TODO: Convert model to ModelMetadata
|
90 |
+
model: DGEBModel
|
91 |
results: List[LayerResult]
|
92 |
|
93 |
@model_validator(mode="after")
|
|
|
105 |
def from_dict(
|
106 |
task_metadata: "TaskMetadata",
|
107 |
layer_results: LayerResult,
|
108 |
+
model_metadata: DGEBModel,
|
109 |
):
|
110 |
return TaskResult(
|
111 |
dgeb_version=version("dgeb"),
|
leaderboard/app.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1 |
-
import
|
2 |
import json
|
|
|
3 |
from pathlib import Path
|
4 |
-
import gradio as gr
|
5 |
from typing import List
|
|
|
|
|
6 |
import pandas as pd
|
7 |
-
import importlib.util
|
8 |
from pydantic import ValidationError, parse_obj_as
|
9 |
|
10 |
SIG_FIGS = 4
|
@@ -24,7 +25,7 @@ spec = importlib.util.spec_from_file_location("tasks", tasks_path)
|
|
24 |
tasks = importlib.util.module_from_spec(spec)
|
25 |
spec.loader.exec_module(tasks)
|
26 |
TaskResult = tasks.TaskResult
|
27 |
-
|
28 |
|
29 |
|
30 |
# Assuming the class definitions provided above are complete and imported here
|
@@ -84,7 +85,7 @@ def load_results() -> List[TaskResult]:
|
|
84 |
|
85 |
|
86 |
def task_results_to_dgeb_score(
|
87 |
-
model:
|
88 |
) -> dict:
|
89 |
best_scores_per_task = []
|
90 |
modalities_seen = set()
|
|
|
1 |
+
import importlib.util
|
2 |
import json
|
3 |
+
import math
|
4 |
from pathlib import Path
|
|
|
5 |
from typing import List
|
6 |
+
|
7 |
+
import gradio as gr
|
8 |
import pandas as pd
|
|
|
9 |
from pydantic import ValidationError, parse_obj_as
|
10 |
|
11 |
SIG_FIGS = 4
|
|
|
25 |
tasks = importlib.util.module_from_spec(spec)
|
26 |
spec.loader.exec_module(tasks)
|
27 |
TaskResult = tasks.TaskResult
|
28 |
+
DGEBModel = tasks.DGEBModel
|
29 |
|
30 |
|
31 |
# Assuming the class definitions provided above are complete and imported here
|
|
|
85 |
|
86 |
|
87 |
def task_results_to_dgeb_score(
|
88 |
+
model: DGEBModel, model_results: List[TaskResult]
|
89 |
) -> dict:
|
90 |
best_scores_per_task = []
|
91 |
modalities_seen = set()
|