Yotam-Perlitz commited on
Commit
386e6e6
β€’
1 Parent(s): ad12749

fix location of holistic benchmarks list

Browse files

Signed-off-by: Yotam-Perlitz <y.perlitz@ibm.com>

Files changed (1) hide show
  1. app.py +23 -23
app.py CHANGED
@@ -5,7 +5,28 @@ import pandas as pd
5
  import plotly.express as px
6
  import streamlit as st
7
  from bat import Benchmark, Config, Reporter, Tester
8
- from bat.utils import get_holistic_benchmark
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
11
  def get_nice_benchmark_name(bench_name):
@@ -49,7 +70,7 @@ st.markdown(
49
  )
50
 
51
 
52
- all_scenarios_for_aggragate = get_holistic_benchmark().get_scenarios()
53
 
54
  st.subheader("The Leaderboard", divider=True)
55
  # st.subheader("πŸ‹οΈβ€β™‚οΈ BenchBench Leaderboard πŸ‹", divider=True)
@@ -157,27 +178,6 @@ def run_load(
157
  n_exps=n_exps if n_models_taken_list != [0] else 1,
158
  )
159
 
160
- holistic_scenarios = [
161
- "arena_hard",
162
- "mixeval",
163
- "agieval",
164
- "arc_c",
165
- "alpacav1",
166
- "alpacav2",
167
- "alpacaeval2_lc",
168
- "arena_elo",
169
- "bbh",
170
- "eq_benchv2",
171
- "gpt4all",
172
- "hugging_6",
173
- "llmonitor",
174
- "magi",
175
- "mmlu",
176
- "mt_bench",
177
- "biggen_mwr",
178
- "olmes_average",
179
- "mmlu_pro",
180
- ]
181
  holistic = Benchmark()
182
  holistic.load_local_catalog()
183
  holistic.df = holistic.df.query("scenario in @holistic_scenarios")
 
5
  import plotly.express as px
6
  import streamlit as st
7
  from bat import Benchmark, Config, Reporter, Tester
8
+
9
+ holistic_scenarios = [
10
+ "arena_hard",
11
+ "mixeval",
12
+ "agieval",
13
+ "arc_c",
14
+ "alpacav1",
15
+ "alpacav2",
16
+ "alpacaeval2_lc",
17
+ "arena_elo",
18
+ "bbh",
19
+ "eq_benchv2",
20
+ "gpt4all",
21
+ "hugging_6",
22
+ "llmonitor",
23
+ "magi",
24
+ "mmlu",
25
+ "mt_bench",
26
+ "biggen_mwr",
27
+ "olmes_average",
28
+ "mmlu_pro",
29
+ ]
30
 
31
 
32
  def get_nice_benchmark_name(bench_name):
 
70
  )
71
 
72
 
73
+ all_scenarios_for_aggragate = holistic_scenarios
74
 
75
  st.subheader("The Leaderboard", divider=True)
76
  # st.subheader("πŸ‹οΈβ€β™‚οΈ BenchBench Leaderboard πŸ‹", divider=True)
 
178
  n_exps=n_exps if n_models_taken_list != [0] else 1,
179
  )
180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  holistic = Benchmark()
182
  holistic.load_local_catalog()
183
  holistic.df = holistic.df.query("scenario in @holistic_scenarios")