Spaces:
Runtime error
Runtime error
File size: 4,608 Bytes
cf4f63b 0e74637 cf4f63b 0e74637 cf4f63b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import datetime
from urllib import parse
import requests
import wandb
URL_QUICKSEARCH = "https://huggingface.co/api/quicksearch?"
WANDB_REPO = "learning-at-home/Worker_logs"
def get_new_bubble_data():
serialized_data_points, latest_timestamp = get_serialized_data_points()
serialized_data = get_serialized_data(serialized_data_points, latest_timestamp)
profiles = get_profiles(serialized_data_points)
return serialized_data, profiles
def get_profiles(serialized_data_points):
profiles = []
for username in serialized_data_points.keys():
params = {"type": "user", "q": username}
new_url = URL_QUICKSEARCH + parse.urlencode(params)
r = requests.get(new_url)
response = r.json()
try:
avatarUrl = response["users"][0]["avatarUrl"]
except:
avatarUrl = "/avatars/57584cb934354663ac65baa04e6829bf.svg"
if avatarUrl.startswith("/avatars/"):
avatarUrl = f"https://huggingface.co{avatarUrl}"
profiles.append(
{"id": username, "name": username, "src": avatarUrl, "url": f"https://huggingface.co/{username}"}
)
return profiles
def get_serialized_data_points():
api = wandb.Api()
runs = api.runs(WANDB_REPO)
serialized_data_points = {}
latest_timestamp = None
print("**start api call")
for run in runs:
run_summary = run.summary._json_dict
run_name = run.name
if run_name in serialized_data_points:
try:
timestamp = run_summary["_timestamp"]
serialized_data_points[run_name]["Runs"].append(
{
"batches": run_summary["_step"],
"runtime": run_summary["_runtime"],
"loss": run_summary["train/loss"],
"velocity": run_summary["_step"] / run_summary["_runtime"],
"date": datetime.datetime.utcfromtimestamp(timestamp),
}
)
if not latest_timestamp or timestamp > latest_timestamp:
latest_timestamp = timestamp
except Exception as e:
pass
# print(e)
# print([key for key in list(run_summary.keys()) if "gradients" not in key])
else:
try:
timestamp = run_summary["_timestamp"]
serialized_data_points[run_name] = {
"profileId": run_name,
"Runs": [
{
"batches": run_summary["_step"],
"runtime": run_summary["_runtime"],
"loss": run_summary["train/loss"],
"velocity": run_summary["_step"] / run_summary["_runtime"],
"date": datetime.datetime.utcfromtimestamp(timestamp),
}
],
}
if not latest_timestamp or timestamp > latest_timestamp:
latest_timestamp = timestamp
except Exception as e:
pass
# print(e)
# print([key for key in list(run_summary.keys()) if "gradients" not in key])
latest_timestamp = datetime.datetime.utcfromtimestamp(latest_timestamp)
print("**finish api call")
return serialized_data_points, latest_timestamp
def get_serialized_data(serialized_data_points, latest_timestamp):
serialized_data_points_v2 = []
max_velocity = 1
for run_name, serialized_data_point in serialized_data_points.items():
activeRuns = []
loss = 0
runtime = 0
batches = 0
velocity = 0
for run in serialized_data_point["Runs"]:
if run["date"] == latest_timestamp:
run["date"] = run["date"].isoformat()
activeRuns.append(run)
loss += run["loss"]
velocity += run["velocity"]
loss = loss / len(activeRuns) if activeRuns else 0
runtime += run["runtime"]
batches += run["batches"]
new_item = {
"date": latest_timestamp.isoformat(),
"profileId": run_name,
"batches": batches,
"runtime": runtime,
"loss": loss,
"velocity": velocity,
"activeRuns": activeRuns,
}
serialized_data_points_v2.append(new_item)
serialized_data = {"points": [serialized_data_points_v2], "maxVelocity": max_velocity}
return serialized_data
|