RicardoDominguez commited on
Commit
f224572
·
1 Parent(s): aa8c9ef

style changes

Browse files
Files changed (4) hide show
  1. README.md +6 -1
  2. app.py +6 -6
  3. src/about.py +2 -4
  4. src/display/utils.py +8 -8
README.md CHANGED
@@ -41,4 +41,9 @@ If you encounter problem on the space, don't hesitate to restart it to remove th
41
  You'll find
42
  - the main table' columns names and properties in `src/display/utils.py`
43
  - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
44
- - the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
 
 
 
 
 
 
41
  You'll find
42
  - the main table' columns names and properties in `src/display/utils.py`
43
  - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
44
+ - the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
45
+
46
+
47
+ # Todo
48
+
49
+ * Change background to white
app.py CHANGED
@@ -66,13 +66,13 @@ def init_leaderboard(dataframe):
66
  select_columns=SelectColumns(
67
  default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
68
  cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
- label="Select Columns to Display:",
70
  ),
71
  search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
72
  hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
  filter_columns=[
74
  ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
75
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
76
  ColumnFilter(
77
  AutoEvalColumn.params.name,
78
  type="slider",
@@ -80,9 +80,9 @@ def init_leaderboard(dataframe):
80
  max=150,
81
  label="Select the number of parameters (B)",
82
  ),
83
- ColumnFilter(
84
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
- ),
86
  ],
87
  bool_checkboxgroup_label="Hide models",
88
  interactive=False,
@@ -95,7 +95,7 @@ with demo:
95
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
96
 
97
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
98
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
99
  leaderboard = init_leaderboard(LEADERBOARD_DF)
100
 
101
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
 
66
  select_columns=SelectColumns(
67
  default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
68
  cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
+ label="Select columns to display:",
70
  ),
71
  search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
72
  hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
  filter_columns=[
74
  ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
75
+ # ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
76
  ColumnFilter(
77
  AutoEvalColumn.params.name,
78
  type="slider",
 
80
  max=150,
81
  label="Select the number of parameters (B)",
82
  ),
83
+ # ColumnFilter(
84
+ # AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
+ # ),
86
  ],
87
  bool_checkboxgroup_label="Hide models",
88
  interactive=False,
 
95
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
96
 
97
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
98
+ with gr.TabItem("🏛️ CaselawQA", elem_id="llm-benchmark-tab-table", id=0):
99
  leaderboard = init_leaderboard(LEADERBOARD_DF)
100
 
101
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
src/about.py CHANGED
@@ -25,7 +25,7 @@ TITLE = """<h1 align="center" id="space-title">CaselawQA leaderboard (WIP)</h1>"
25
 
26
  # What does your leaderboard evaluate?
27
  INTRODUCTION_TEXT = """
28
- CaselawQA is a benchmark comprising classification tasks, drawing from the Supreme Court and Songer Court of Appeals legal databases.
29
  From a technical machine learning perspective, these tasks provide highly non-trivial classification problems where even the best models leave much room for improvement.
30
  From a substantive legal perspective, efficient solutions to such classification problems have rich and important applications in legal research.
31
  """
@@ -82,8 +82,7 @@ If everything is done, check you can launch the EleutherAIHarness on your model
82
 
83
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
84
  CITATION_BUTTON_TEXT = r"""
85
- ```bibtex
86
- @misc{dominguezolmedo2024lawmapowerspecializationlegal,
87
  title={Lawma: The Power of Specialization for Legal Tasks},
88
  author={Ricardo Dominguez-Olmedo and Vedant Nanda and Rediet Abebe and Stefan Bechtold and Christoph Engel and Jens Frankenreiter and Krishna Gummadi and Moritz Hardt and Michael Livermore},
89
  year={2024},
@@ -92,5 +91,4 @@ CITATION_BUTTON_TEXT = r"""
92
  primaryClass={cs.CL},
93
  url={https://arxiv.org/abs/2407.16615},
94
  }
95
- ```
96
  """
 
25
 
26
  # What does your leaderboard evaluate?
27
  INTRODUCTION_TEXT = """
28
+ CaselawQA is a benchmark comprising legal classification tasks derived from the Supreme Court and Songer Court of Appeals legal databases.
29
  From a technical machine learning perspective, these tasks provide highly non-trivial classification problems where even the best models leave much room for improvement.
30
  From a substantive legal perspective, efficient solutions to such classification problems have rich and important applications in legal research.
31
  """
 
82
 
83
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
84
  CITATION_BUTTON_TEXT = r"""
85
+ @misc{dominguezolmedo2024lawma,
 
86
  title={Lawma: The Power of Specialization for Legal Tasks},
87
  author={Ricardo Dominguez-Olmedo and Vedant Nanda and Rediet Abebe and Stefan Bechtold and Christoph Engel and Jens Frankenreiter and Krishna Gummadi and Moritz Hardt and Michael Livermore},
88
  year={2024},
 
91
  primaryClass={cs.CL},
92
  url={https://arxiv.org/abs/2407.16615},
93
  }
 
94
  """
src/display/utils.py CHANGED
@@ -26,19 +26,19 @@ auto_eval_column_dict = []
26
  auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
  #Scores
29
- auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
  for task in Tasks:
31
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
32
  # Model information
33
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
34
- auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
35
- auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
36
- auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
37
- auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
38
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
39
- auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
40
- auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
41
- auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
42
 
43
  # We use make dataclass to dynamically fill the scores from Tasks
44
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 
26
  auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
  #Scores
29
+ # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
  for task in Tasks:
31
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
32
  # Model information
33
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
34
+ # auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
35
+ # auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
36
+ # auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
37
+ # auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
38
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
39
+ # auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
40
+ # auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
41
+ # auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
42
 
43
  # We use make dataclass to dynamically fill the scores from Tasks
44
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)