sh1gechan commited on
Commit
0cb52bf
·
verified ·
1 Parent(s): a684f6e

Update src/populate.py

Browse files
Files changed (1) hide show
  1. src/populate.py +4 -49
src/populate.py CHANGED
@@ -8,74 +8,29 @@ from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
11
- # def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
12
- # """Creates a dataframe from all the individual experiment results"""
13
- # raw_data = get_raw_eval_results(results_path, requests_path)
14
- # all_data_json = [v.to_dict() for v in raw_data]
15
-
16
-
17
-
18
- # df = pd.DataFrame.from_records(all_data_json)
19
- # score_cols = [
20
- # 'ALT E to J BLEU', 'ALT J to E BLEU', 'WikiCorpus E to J BLEU', 'WikiCorpus J to E BLEU',
21
- # 'XL-Sum JA BLEU', 'XL-Sum ROUGE1', 'XL-Sum ROUGE2', 'XL-Sum ROUGE-Lsum'
22
- # ]
23
-
24
- # existing_score_cols = [col for col in score_cols if col in df.columns]
25
- # print(f"Existing score columns: {existing_score_cols}")
26
-
27
- # # スコア列を100で割り、.4f形式でフォーマット
28
- # df[existing_score_cols] = (df[existing_score_cols] / 100).applymap(lambda x: f'{x:.4f}')
29
- # df = df.sort_values(by=[AutoEvalColumn.AVG.name], ascending=False)
30
- # df = df[cols].round(decimals=2)
31
-
32
- # # filter out if any of the benchmarks have not been produced
33
- # df = df[has_no_nan_values(df, benchmark_cols)]
34
- # return df
35
-
36
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
37
  """Creates a dataframe from all the individual experiment results"""
38
  raw_data = get_raw_eval_results(results_path, requests_path)
39
-
40
- # デバッグ: Raw data の内容を確認
41
- print(f"Raw data before conversion: {raw_data}")
42
-
43
  all_data_json = [v.to_dict() for v in raw_data]
44
 
45
- # デバッグ: 全てのデータをDataFrameに変換
 
46
  df = pd.DataFrame.from_records(all_data_json)
47
- print(f"Initial DataFrame: {df}")
48
-
49
  score_cols = [
50
  'ALT E to J BLEU', 'ALT J to E BLEU', 'WikiCorpus E to J BLEU', 'WikiCorpus J to E BLEU',
51
  'XL-Sum JA BLEU', 'XL-Sum ROUGE1', 'XL-Sum ROUGE2', 'XL-Sum ROUGE-Lsum'
52
  ]
53
 
54
- # デバッグ: 存在するスコア列を確認
55
  existing_score_cols = [col for col in score_cols if col in df.columns]
56
  print(f"Existing score columns: {existing_score_cols}")
57
 
58
  # スコア列を100で割り、.4f形式でフォーマット
59
  df[existing_score_cols] = (df[existing_score_cols] / 100).applymap(lambda x: f'{x:.4f}')
60
-
61
- # デバッグ: スコア調整後のデータフレームを確認
62
- print(f"DataFrame after score adjustment: {df}")
63
-
64
- # ソート
65
  df = df.sort_values(by=[AutoEvalColumn.AVG.name], ascending=False)
66
-
67
- # デバッグ: ソート後のデータフレームを確認
68
- print(f"Sorted DataFrame: {df}")
69
-
70
- # NaNを持つ行を除外
71
- df = df[has_no_nan_values(df, benchmark_cols)]
72
-
73
- # デバッグ: NaNフィルタリング後のデータフレームを確認
74
- print(f"Final DataFrame after NaN filtering: {df}")
75
-
76
- # 必要なカラムのみに絞り込む
77
  df = df[cols].round(decimals=2)
78
 
 
 
79
  return df
80
 
81
 
 
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
12
  """Creates a dataframe from all the individual experiment results"""
13
  raw_data = get_raw_eval_results(results_path, requests_path)
 
 
 
 
14
  all_data_json = [v.to_dict() for v in raw_data]
15
 
16
+
17
+
18
  df = pd.DataFrame.from_records(all_data_json)
 
 
19
  score_cols = [
20
  'ALT E to J BLEU', 'ALT J to E BLEU', 'WikiCorpus E to J BLEU', 'WikiCorpus J to E BLEU',
21
  'XL-Sum JA BLEU', 'XL-Sum ROUGE1', 'XL-Sum ROUGE2', 'XL-Sum ROUGE-Lsum'
22
  ]
23
 
 
24
  existing_score_cols = [col for col in score_cols if col in df.columns]
25
  print(f"Existing score columns: {existing_score_cols}")
26
 
27
  # スコア列を100で割り、.4f形式でフォーマット
28
  df[existing_score_cols] = (df[existing_score_cols] / 100).applymap(lambda x: f'{x:.4f}')
 
 
 
 
 
29
  df = df.sort_values(by=[AutoEvalColumn.AVG.name], ascending=False)
 
 
 
 
 
 
 
 
 
 
 
30
  df = df[cols].round(decimals=2)
31
 
32
+ # filter out if any of the benchmarks have not been produced
33
+ df = df[has_no_nan_values(df, benchmark_cols)]
34
  return df
35
 
36