Shane
commited on
Commit
•
ee71995
1
Parent(s):
91cb993
made changes
Browse files
app.py
CHANGED
@@ -103,7 +103,7 @@ with gr.Blocks(css=custom_css) as app:
|
|
103 |
search_1 = gr.Textbox(label="Model Search (delimit with , )",
|
104 |
# placeholder="Model Search (delimit with , )",
|
105 |
show_label=True)
|
106 |
-
category_selector_1 = gr.Dropdown(categories, label="Sorted By", value="Average", multiselect=False, show_label=True)
|
107 |
with gr.Row():
|
108 |
# reference data
|
109 |
rewardbench_table_hidden = gr.Dataframe(
|
|
|
103 |
search_1 = gr.Textbox(label="Model Search (delimit with , )",
|
104 |
# placeholder="Model Search (delimit with , )",
|
105 |
show_label=True)
|
106 |
+
category_selector_1 = gr.Dropdown(categories, label="Sorted By", value="Average", multiselect=False, show_label=True, elem_id="category_selector")
|
107 |
with gr.Row():
|
108 |
# reference data
|
109 |
rewardbench_table_hidden = gr.Dataframe(
|
src/md.py
CHANGED
@@ -24,6 +24,9 @@ For reproductability, we use greedy decoding for all model generation as default
|
|
24 |
- **Contamination-resistant**: HREF's evaluation set is hidden and uses public models for both the baseline model and judge model, which makes it completely free of contamination.
|
25 |
- **Task Oriented**: Instead of naturally collected instructions from the user, HREF contains instructions that are written specifically targetting 8 distinct categories that are used in instruction tuning, which allows it to provide more insights about how to improve language models.
|
26 |
|
|
|
|
|
|
|
27 |
"""
|
28 |
|
29 |
# Get Pacific time zone (handles PST/PDT automatically)
|
@@ -33,7 +36,4 @@ current_time = datetime.now(pacific_tz).strftime("%H:%M %Z, %d %b %Y")
|
|
33 |
TOP_TEXT = f"""# HREF: Human Reference Guided Evaluation for Instructiong Following
|
34 |
[Code]() | [Validation Set]() | [Human Agreement Set]() | [Results]() | [Paper]() | Total models: {{}} | * Unverified models | ⚠️ Dataset Contamination | Last restart (PST): {current_time}
|
35 |
|
36 |
-
## Contact Us
|
37 |
-
TODO
|
38 |
-
"""
|
39 |
|
|
|
24 |
- **Contamination-resistant**: HREF's evaluation set is hidden and uses public models for both the baseline model and judge model, which makes it completely free of contamination.
|
25 |
- **Task Oriented**: Instead of naturally collected instructions from the user, HREF contains instructions that are written specifically targetting 8 distinct categories that are used in instruction tuning, which allows it to provide more insights about how to improve language models.
|
26 |
|
27 |
+
## Contact Us
|
28 |
+
TODO
|
29 |
+
"""
|
30 |
"""
|
31 |
|
32 |
# Get Pacific time zone (handles PST/PDT automatically)
|
|
|
36 |
TOP_TEXT = f"""# HREF: Human Reference Guided Evaluation for Instructiong Following
|
37 |
[Code]() | [Validation Set]() | [Human Agreement Set]() | [Results]() | [Paper]() | Total models: {{}} | * Unverified models | ⚠️ Dataset Contamination | Last restart (PST): {current_time}
|
38 |
|
|
|
|
|
|
|
39 |
|