Shane commited on
Commit
ee71995
1 Parent(s): 91cb993

made changes

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. src/md.py +3 -3
app.py CHANGED
@@ -103,7 +103,7 @@ with gr.Blocks(css=custom_css) as app:
103
  search_1 = gr.Textbox(label="Model Search (delimit with , )",
104
  # placeholder="Model Search (delimit with , )",
105
  show_label=True)
106
- category_selector_1 = gr.Dropdown(categories, label="Sorted By", value="Average", multiselect=False, show_label=True)
107
  with gr.Row():
108
  # reference data
109
  rewardbench_table_hidden = gr.Dataframe(
 
103
  search_1 = gr.Textbox(label="Model Search (delimit with , )",
104
  # placeholder="Model Search (delimit with , )",
105
  show_label=True)
106
+ category_selector_1 = gr.Dropdown(categories, label="Sorted By", value="Average", multiselect=False, show_label=True, elem_id="category_selector")
107
  with gr.Row():
108
  # reference data
109
  rewardbench_table_hidden = gr.Dataframe(
src/md.py CHANGED
@@ -24,6 +24,9 @@ For reproductability, we use greedy decoding for all model generation as default
24
  - **Contamination-resistant**: HREF's evaluation set is hidden and uses public models for both the baseline model and judge model, which makes it completely free of contamination.
25
  - **Task Oriented**: Instead of naturally collected instructions from the user, HREF contains instructions that are written specifically targetting 8 distinct categories that are used in instruction tuning, which allows it to provide more insights about how to improve language models.
26
 
 
 
 
27
  """
28
 
29
  # Get Pacific time zone (handles PST/PDT automatically)
@@ -33,7 +36,4 @@ current_time = datetime.now(pacific_tz).strftime("%H:%M %Z, %d %b %Y")
33
  TOP_TEXT = f"""# HREF: Human Reference Guided Evaluation for Instructiong Following
34
  [Code]() | [Validation Set]() | [Human Agreement Set]() | [Results]() | [Paper]() | Total models: {{}} | * Unverified models | ⚠️ Dataset Contamination | Last restart (PST): {current_time}
35
 
36
- ## Contact Us
37
- TODO
38
- """
39
 
 
24
  - **Contamination-resistant**: HREF's evaluation set is hidden and uses public models for both the baseline model and judge model, which makes it completely free of contamination.
25
  - **Task Oriented**: Instead of naturally collected instructions from the user, HREF contains instructions that are written specifically targetting 8 distinct categories that are used in instruction tuning, which allows it to provide more insights about how to improve language models.
26
 
27
+ ## Contact Us
28
+ TODO
29
+ """
30
  """
31
 
32
  # Get Pacific time zone (handles PST/PDT automatically)
 
36
  TOP_TEXT = f"""# HREF: Human Reference Guided Evaluation for Instructiong Following
37
  [Code]() | [Validation Set]() | [Human Agreement Set]() | [Results]() | [Paper]() | Total models: {{}} | * Unverified models | ⚠️ Dataset Contamination | Last restart (PST): {current_time}
38
 
 
 
 
39