Spaces:
Runtime error
Runtime error
Refactor to mathc new AutoTrain API
Browse files- .gitignore +4 -1
- Makefile +8 -0
- app.py +84 -30
- requirements.txt +1 -1
- utils.py +14 -6
.gitignore
CHANGED
@@ -143,4 +143,7 @@ cython_debug/
|
|
143 |
# Submissions
|
144 |
submission_repo/
|
145 |
GEM-outputs/
|
146 |
-
sample-submissions/
|
|
|
|
|
|
|
|
143 |
# Submissions
|
144 |
submission_repo/
|
145 |
GEM-outputs/
|
146 |
+
sample-submissions/
|
147 |
+
|
148 |
+
# mac OS
|
149 |
+
.DS_Store
|
Makefile
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
style:
|
2 |
+
python -m black --line-length 119 --target-version py39 .
|
3 |
+
python -m isort .
|
4 |
+
|
5 |
+
quality:
|
6 |
+
python -m black --check --line-length 119 --target-version py39 .
|
7 |
+
python -m isort --check-only .
|
8 |
+
python -m flake8 --max-line-length 119
|
app.py
CHANGED
@@ -1,26 +1,27 @@
|
|
1 |
import json
|
2 |
import os
|
3 |
import shutil
|
|
|
4 |
from datetime import datetime
|
5 |
from pathlib import Path
|
6 |
|
7 |
import jsonlines
|
8 |
import streamlit as st
|
9 |
from dotenv import load_dotenv
|
10 |
-
from huggingface_hub import
|
11 |
|
12 |
-
from utils import http_post, validate_json
|
13 |
|
14 |
if Path(".env").is_file():
|
15 |
load_dotenv(".env")
|
16 |
|
17 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
18 |
-
|
19 |
-
|
20 |
LOCAL_REPO = "submission_repo"
|
21 |
LOGS_REPO = "submission-logs"
|
22 |
|
23 |
-
|
24 |
# 1. Add check that fields are nested under `tasks` field correctly
|
25 |
# 2. Add check that names of tasks and datasets are valid
|
26 |
|
@@ -68,9 +69,9 @@ def get_submission_names():
|
|
68 |
return [score["submission_name"] for score in scores_data]
|
69 |
|
70 |
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
st.title("GEM Submissions")
|
75 |
st.markdown(
|
76 |
"""
|
@@ -144,8 +145,7 @@ with st.form(key="form"):
|
|
144 |
example_submission = json.load(f)
|
145 |
st.json(example_submission)
|
146 |
|
147 |
-
user_name = st.text_input("Enter your π€ Hub username
|
148 |
-
|
149 |
submit_button = st.form_submit_button("Make Submission")
|
150 |
|
151 |
if submit_button and submission_errors == 0:
|
@@ -155,8 +155,8 @@ if submit_button and submission_errors == 0:
|
|
155 |
submission_time = str(int(datetime.now().timestamp()))
|
156 |
|
157 |
# Create submission dataset under benchmarks ORG
|
158 |
-
submission_repo_id = f"{user_name}__{submission_name_formatted}__{submission_time}"
|
159 |
-
dataset_repo_url = f"https://huggingface.co/datasets/
|
160 |
repo = Repository(
|
161 |
local_dir=LOCAL_REPO,
|
162 |
clone_from=dataset_repo_url,
|
@@ -176,22 +176,72 @@ if submit_button and submission_errors == 0:
|
|
176 |
else:
|
177 |
commit_sha = repo.git_head_commit_url().split("/")[-1]
|
178 |
|
179 |
-
submission_id = submission_name + "__" +
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
|
|
181 |
payload = {
|
182 |
-
"
|
183 |
-
"
|
184 |
-
"
|
185 |
-
"model": "gem",
|
186 |
-
"submission_dataset": f"GEM-submissions/{submission_repo_id}",
|
187 |
-
"submission_id": submission_id,
|
188 |
-
"col_mapping": {},
|
189 |
-
"split": "test",
|
190 |
-
"config": None,
|
191 |
}
|
192 |
-
|
193 |
-
path="/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
).json()
|
|
|
195 |
|
196 |
logs_repo_url = f"https://huggingface.co/datasets/GEM-submissions/{LOGS_REPO}"
|
197 |
logs_repo = Repository(
|
@@ -201,25 +251,29 @@ if submit_button and submission_errors == 0:
|
|
201 |
private=True,
|
202 |
use_auth_token=HF_TOKEN,
|
203 |
)
|
204 |
-
|
|
|
|
|
|
|
|
|
205 |
with jsonlines.open(f"{LOGS_REPO}/logs.jsonl") as r:
|
206 |
lines = []
|
207 |
for obj in r:
|
208 |
lines.append(obj)
|
209 |
|
210 |
-
lines.append(
|
211 |
with jsonlines.open(f"{LOGS_REPO}/logs.jsonl", mode="w") as writer:
|
212 |
for job in lines:
|
213 |
writer.write(job)
|
214 |
-
logs_repo.push_to_hub(commit_message=f"Submission with job ID {
|
215 |
|
216 |
-
if
|
217 |
st.success(
|
218 |
-
f"β
Submission {submission_name} was successfully submitted for evaluation
|
219 |
)
|
220 |
st.markdown(
|
221 |
f"""
|
222 |
-
Evaluation
|
223 |
|
224 |
* π Click [here](https://huggingface.co/spaces/GEM/results) to view the results from your submission
|
225 |
* πΎ Click [here]({dataset_repo_url}) to view your submission file on the Hugging Face Hub
|
|
|
1 |
import json
|
2 |
import os
|
3 |
import shutil
|
4 |
+
import uuid
|
5 |
from datetime import datetime
|
6 |
from pathlib import Path
|
7 |
|
8 |
import jsonlines
|
9 |
import streamlit as st
|
10 |
from dotenv import load_dotenv
|
11 |
+
from huggingface_hub import Repository, cached_download, hf_hub_url
|
12 |
|
13 |
+
from utils import http_get, http_post, validate_json
|
14 |
|
15 |
if Path(".env").is_file():
|
16 |
load_dotenv(".env")
|
17 |
|
18 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
19 |
+
AUTOTRAIN_USERNAME = os.getenv("AUTOTRAIN_USERNAME")
|
20 |
+
AUTOTRAIN_BACKEND_API = os.getenv("AUTOTRAIN_BACKEND_API")
|
21 |
LOCAL_REPO = "submission_repo"
|
22 |
LOGS_REPO = "submission-logs"
|
23 |
|
24 |
+
# TODO
|
25 |
# 1. Add check that fields are nested under `tasks` field correctly
|
26 |
# 2. Add check that names of tasks and datasets are valid
|
27 |
|
|
|
69 |
return [score["submission_name"] for score in scores_data]
|
70 |
|
71 |
|
72 |
+
#######
|
73 |
+
# APP #
|
74 |
+
#######
|
75 |
st.title("GEM Submissions")
|
76 |
st.markdown(
|
77 |
"""
|
|
|
145 |
example_submission = json.load(f)
|
146 |
st.json(example_submission)
|
147 |
|
148 |
+
user_name = st.text_input("Enter your π€ Hub username", help="This field is required to track your submission and cannot be empty")
|
|
|
149 |
submit_button = st.form_submit_button("Make Submission")
|
150 |
|
151 |
if submit_button and submission_errors == 0:
|
|
|
155 |
submission_time = str(int(datetime.now().timestamp()))
|
156 |
|
157 |
# Create submission dataset under benchmarks ORG
|
158 |
+
submission_repo_id = f"GEM-submissions/{user_name}__{submission_name_formatted}__{submission_time}"
|
159 |
+
dataset_repo_url = f"https://huggingface.co/datasets/{submission_repo_id}"
|
160 |
repo = Repository(
|
161 |
local_dir=LOCAL_REPO,
|
162 |
clone_from=dataset_repo_url,
|
|
|
176 |
else:
|
177 |
commit_sha = repo.git_head_commit_url().split("/")[-1]
|
178 |
|
179 |
+
submission_id = submission_name + "__" + str(uuid.uuid4())[:6] + "__" + submission_time
|
180 |
+
|
181 |
+
# Define AutoTrain payload
|
182 |
+
project_config = {}
|
183 |
+
# Need a dummy dataset to use the dataset loader in AutoTrain
|
184 |
+
project_config["dataset_name"] = "lewtun/imdb-dummy"
|
185 |
+
project_config["dataset_config"] = "lewtun--imdb-dummy"
|
186 |
+
project_config["dataset_split"] = "train"
|
187 |
+
project_config["col_mapping"] = {"text": "text", "label": "target"}
|
188 |
+
# Specify benchmark parameters
|
189 |
+
project_config["model"] = "gem"
|
190 |
+
project_config["dataset"] = "GEM/references"
|
191 |
+
project_config["submission_dataset"] = submission_repo_id
|
192 |
+
project_id = str(uuid.uuid4()).split("-")[0]
|
193 |
+
project_payload = {
|
194 |
+
"username": AUTOTRAIN_USERNAME,
|
195 |
+
"proj_name": f"benchmark-gem-{project_id}",
|
196 |
+
"task": 1,
|
197 |
+
"config": {
|
198 |
+
"language": "en",
|
199 |
+
"max_models": 5,
|
200 |
+
"instance": {
|
201 |
+
"provider": "aws",
|
202 |
+
"instance_type": "ml.g4dn.4xlarge",
|
203 |
+
"max_runtime_seconds": 172800,
|
204 |
+
"num_instances": 1,
|
205 |
+
"disk_size_gb": 150,
|
206 |
+
},
|
207 |
+
"benchmark": {
|
208 |
+
"dataset": project_config["dataset"],
|
209 |
+
"model": project_config["model"],
|
210 |
+
"submission_dataset": project_config["submission_dataset"],
|
211 |
+
},
|
212 |
+
},
|
213 |
+
}
|
214 |
+
project_json_resp = http_post(
|
215 |
+
path="/projects/create", payload=project_payload, token=HF_TOKEN, domain=AUTOTRAIN_BACKEND_API
|
216 |
+
).json()
|
217 |
+
print(f"Project creation: {project_json_resp}")
|
218 |
|
219 |
+
# Upload data
|
220 |
payload = {
|
221 |
+
"split": 4,
|
222 |
+
"col_mapping": project_config["col_mapping"],
|
223 |
+
"load_config": {"max_size_bytes": 0, "shuffle": False},
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
}
|
225 |
+
data_json_resp = http_post(
|
226 |
+
path=f"/projects/{project_json_resp['id']}/data/{project_config['dataset_name']}",
|
227 |
+
payload=payload,
|
228 |
+
token=HF_TOKEN,
|
229 |
+
domain=AUTOTRAIN_BACKEND_API,
|
230 |
+
params={
|
231 |
+
"type": "dataset",
|
232 |
+
"config_name": project_config["dataset_config"],
|
233 |
+
"split_name": project_config["dataset_split"],
|
234 |
+
},
|
235 |
+
).json()
|
236 |
+
print(f"Dataset creation: {data_json_resp}")
|
237 |
+
|
238 |
+
# Run training
|
239 |
+
train_json_resp = http_get(
|
240 |
+
path=f"/projects/{project_json_resp['id']}/data/start_process",
|
241 |
+
token=HF_TOKEN,
|
242 |
+
domain=AUTOTRAIN_BACKEND_API,
|
243 |
).json()
|
244 |
+
print(f"Training job response: {train_json_resp}")
|
245 |
|
246 |
logs_repo_url = f"https://huggingface.co/datasets/GEM-submissions/{LOGS_REPO}"
|
247 |
logs_repo = Repository(
|
|
|
251 |
private=True,
|
252 |
use_auth_token=HF_TOKEN,
|
253 |
)
|
254 |
+
evaluation_log = {}
|
255 |
+
evaluation_log["payload"] = project_payload
|
256 |
+
evaluation_log["project_creation_response"] = project_json_resp
|
257 |
+
evaluation_log["dataset_creation_response"] = data_json_resp
|
258 |
+
evaluation_log["autotrain_job_response"] = train_json_resp
|
259 |
with jsonlines.open(f"{LOGS_REPO}/logs.jsonl") as r:
|
260 |
lines = []
|
261 |
for obj in r:
|
262 |
lines.append(obj)
|
263 |
|
264 |
+
lines.append(evaluation_log)
|
265 |
with jsonlines.open(f"{LOGS_REPO}/logs.jsonl", mode="w") as writer:
|
266 |
for job in lines:
|
267 |
writer.write(job)
|
268 |
+
logs_repo.push_to_hub(commit_message=f"Submission with job ID {project_json_resp['id']}")
|
269 |
|
270 |
+
if train_json_resp["success"] == 1:
|
271 |
st.success(
|
272 |
+
f"β
Submission {submission_name} was successfully submitted for evaluation!"
|
273 |
)
|
274 |
st.markdown(
|
275 |
f"""
|
276 |
+
Evaluation can take up to 1 hour to complete, so grab a β or π΅ while you wait:
|
277 |
|
278 |
* π Click [here](https://huggingface.co/spaces/GEM/results) to view the results from your submission
|
279 |
* πΎ Click [here]({dataset_repo_url}) to view your submission file on the Hugging Face Hub
|
requirements.txt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
python-dotenv
|
2 |
-
huggingface-hub==0.
|
3 |
jsonlines
|
|
|
1 |
python-dotenv
|
2 |
+
huggingface-hub==0.8.1
|
3 |
jsonlines
|
utils.py
CHANGED
@@ -2,7 +2,6 @@ import json
|
|
2 |
|
3 |
import jsonschema
|
4 |
import requests
|
5 |
-
import streamlit as st
|
6 |
|
7 |
|
8 |
def load_schema():
|
@@ -28,17 +27,26 @@ def get_auth_headers(token: str, prefix: str = "autonlp"):
|
|
28 |
return {"Authorization": f"{prefix} {token}"}
|
29 |
|
30 |
|
31 |
-
def http_post(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
path: str,
|
33 |
token: str,
|
34 |
-
payload=None,
|
35 |
domain: str = None,
|
36 |
) -> requests.Response:
|
37 |
"""HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
|
38 |
try:
|
39 |
-
response = requests.
|
40 |
-
url=domain + path, json=payload, headers=get_auth_headers(token=token), allow_redirects=True
|
41 |
-
)
|
42 |
except requests.exceptions.ConnectionError:
|
43 |
print("β Failed to reach AutoNLP API, check your internet connection")
|
44 |
response.raise_for_status()
|
|
|
2 |
|
3 |
import jsonschema
|
4 |
import requests
|
|
|
5 |
|
6 |
|
7 |
def load_schema():
|
|
|
27 |
return {"Authorization": f"{prefix} {token}"}
|
28 |
|
29 |
|
30 |
+
def http_post(path: str, token: str, payload=None, domain: str = None, params=None) -> requests.Response:
|
31 |
+
"""HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
|
32 |
+
try:
|
33 |
+
response = requests.post(
|
34 |
+
url=domain + path, json=payload, headers=get_auth_headers(token=token), allow_redirects=True, params=params
|
35 |
+
)
|
36 |
+
except requests.exceptions.ConnectionError:
|
37 |
+
print("β Failed to reach AutoNLP API, check your internet connection")
|
38 |
+
response.raise_for_status()
|
39 |
+
return response
|
40 |
+
|
41 |
+
|
42 |
+
def http_get(
|
43 |
path: str,
|
44 |
token: str,
|
|
|
45 |
domain: str = None,
|
46 |
) -> requests.Response:
|
47 |
"""HTTP POST request to the AutoNLP API, raises UnreachableAPIError if the API cannot be reached"""
|
48 |
try:
|
49 |
+
response = requests.get(url=domain + path, headers=get_auth_headers(token=token), allow_redirects=True)
|
|
|
|
|
50 |
except requests.exceptions.ConnectionError:
|
51 |
print("β Failed to reach AutoNLP API, check your internet connection")
|
52 |
response.raise_for_status()
|