Clémentine commited on
Commit
d0c2655
1 Parent(s): 52740a6

change token name

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -15,7 +15,7 @@ from huggingface_hub import HfApi
15
  from scorer import question_scorer
16
  from content import format_warning, format_log, TITLE, INTRODUCTION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT
17
 
18
- BALM_TOKEN = os.environ.get("WTOKEN", None)
19
 
20
  OWNER="gaia-benchmark"
21
  DATA_DATASET = f"{OWNER}/GAIA"
@@ -30,18 +30,18 @@ YEAR_VERSION = "2023"
30
  os.makedirs("scored", exist_ok=True)
31
 
32
  # Display the results
33
- eval_results = load_dataset(RESULTS_DATASET, YEAR_VERSION, use_auth_token=BALM_TOKEN)
34
  eval_dataframe_val = pd.DataFrame(eval_results["validation"].remove_columns("mail"))
35
  eval_dataframe_test = pd.DataFrame(eval_results["test"].remove_columns("mail"))
36
 
37
  # Gold answers
38
  gold_results = {}
39
- gold_dataset = load_dataset(INTERNAL_DATA_DATASET, f"{YEAR_VERSION}_all", use_auth_token=BALM_TOKEN)
40
  gold_results = {split: {row["task_id"]: row for row in gold_dataset[split]} for split in ["test", "validation"]}
41
 
42
 
43
  def restart_space():
44
- api.restart_space(repo_id=LEADERBOARD_PATH, token=BALM_TOKEN)
45
 
46
 
47
  COLS = ["Model", "Score ⬆️", "Organisation"]
@@ -74,7 +74,7 @@ def add_new_eval(
74
  path_or_fileobj=path_to_file.name,
75
  path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_raw_{datetime.datetime.today()}.jsonl",
76
  repo_type="dataset",
77
- token=BALM_TOKEN
78
  )
79
 
80
  # Compute score
@@ -114,7 +114,7 @@ def add_new_eval(
114
  path_or_fileobj=f"scored/{organisation}_{model}.jsonl",
115
  path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_scored_{datetime.datetime.today()}.jsonl",
116
  repo_type="dataset",
117
- token=BALM_TOKEN
118
  )
119
 
120
  # Actual submission
@@ -129,13 +129,13 @@ def add_new_eval(
129
  }
130
  eval_results[val_or_test] = eval_results[val_or_test].add_item(eval_entry)
131
  print(eval_results)
132
- eval_results.push_to_hub(RESULTS_DATASET, config_name = YEAR_VERSION, token=BALM_TOKEN)
133
 
134
  return format_log(f"Model {model} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait for up to an hour to see the score displayed")
135
 
136
 
137
  def refresh():
138
- eval_results = load_dataset(RESULTS_DATASET, YEAR_VERSION, use_auth_token=BALM_TOKEN, download_mode="force_redownload")
139
  eval_dataframe_val = pd.DataFrame(eval_results["validation"].remove_columns("mail"))
140
  eval_dataframe_test = pd.DataFrame(eval_results["test"].remove_columns("mail"))
141
  return eval_dataframe_val, eval_dataframe_test
 
15
  from scorer import question_scorer
16
  from content import format_warning, format_log, TITLE, INTRODUCTION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT
17
 
18
+ TOKEN = os.environ.get("TOKEN", None)
19
 
20
  OWNER="gaia-benchmark"
21
  DATA_DATASET = f"{OWNER}/GAIA"
 
30
  os.makedirs("scored", exist_ok=True)
31
 
32
  # Display the results
33
+ eval_results = load_dataset(RESULTS_DATASET, YEAR_VERSION, use_auth_token=TOKEN)
34
  eval_dataframe_val = pd.DataFrame(eval_results["validation"].remove_columns("mail"))
35
  eval_dataframe_test = pd.DataFrame(eval_results["test"].remove_columns("mail"))
36
 
37
  # Gold answers
38
  gold_results = {}
39
+ gold_dataset = load_dataset(INTERNAL_DATA_DATASET, f"{YEAR_VERSION}_all", use_auth_token=TOKEN)
40
  gold_results = {split: {row["task_id"]: row for row in gold_dataset[split]} for split in ["test", "validation"]}
41
 
42
 
43
  def restart_space():
44
+ api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
45
 
46
 
47
  COLS = ["Model", "Score ⬆️", "Organisation"]
 
74
  path_or_fileobj=path_to_file.name,
75
  path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_raw_{datetime.datetime.today()}.jsonl",
76
  repo_type="dataset",
77
+ token=TOKEN
78
  )
79
 
80
  # Compute score
 
114
  path_or_fileobj=f"scored/{organisation}_{model}.jsonl",
115
  path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_scored_{datetime.datetime.today()}.jsonl",
116
  repo_type="dataset",
117
+ token=TOKEN
118
  )
119
 
120
  # Actual submission
 
129
  }
130
  eval_results[val_or_test] = eval_results[val_or_test].add_item(eval_entry)
131
  print(eval_results)
132
+ eval_results.push_to_hub(RESULTS_DATASET, config_name = YEAR_VERSION, token=TOKEN)
133
 
134
  return format_log(f"Model {model} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait for up to an hour to see the score displayed")
135
 
136
 
137
  def refresh():
138
+ eval_results = load_dataset(RESULTS_DATASET, YEAR_VERSION, use_auth_token=TOKEN, download_mode="force_redownload")
139
  eval_dataframe_val = pd.DataFrame(eval_results["validation"].remove_columns("mail"))
140
  eval_dataframe_test = pd.DataFrame(eval_results["test"].remove_columns("mail"))
141
  return eval_dataframe_val, eval_dataframe_test