Create utils/HFManager.py
Browse files- utils/HFManager.py +91 -0
utils/HFManager.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from huggingface_hub import HfApi, Repository, hf_hub_download
|
3 |
+
from datetime import datetime
|
4 |
+
from typing import List, Dict, Optional
|
5 |
+
import os
|
6 |
+
import logging
|
7 |
+
|
8 |
+
def fetch_training_metrics_commits(repo_id: str, token: Optional[str] = None) -> List[Dict]:
|
9 |
+
"""
|
10 |
+
Fetch training metrics from a Hugging Face repository.
|
11 |
+
|
12 |
+
Args:
|
13 |
+
repo_id (str): The repository ID
|
14 |
+
token (Optional[str]): Hugging Face API token
|
15 |
+
"""
|
16 |
+
try:
|
17 |
+
api = HfApi(token=token)
|
18 |
+
commits = api.list_repo_commits(repo_id=repo_id)
|
19 |
+
|
20 |
+
training_metrics = []
|
21 |
+
processed_commits = 0
|
22 |
+
failed_commits = 0
|
23 |
+
|
24 |
+
print(f"Found {len(commits)} total commits in repository")
|
25 |
+
|
26 |
+
# Create a clean cache directory
|
27 |
+
cache_dir = os.path.expanduser("~/.cache/dashboard_metrics")
|
28 |
+
os.makedirs(cache_dir, exist_ok=True)
|
29 |
+
|
30 |
+
for commit in commits:
|
31 |
+
try:
|
32 |
+
files = api.list_repo_tree(
|
33 |
+
repo_id=repo_id,
|
34 |
+
revision=commit.commit_id
|
35 |
+
)
|
36 |
+
json_files = [f for f in files if f.path.endswith('.json')]
|
37 |
+
|
38 |
+
for json_file in json_files:
|
39 |
+
try:
|
40 |
+
# Use custom cache directory
|
41 |
+
local_path = hf_hub_download(
|
42 |
+
repo_id=repo_id,
|
43 |
+
filename=json_file.path,
|
44 |
+
revision=commit.commit_id,
|
45 |
+
cache_dir=cache_dir,
|
46 |
+
force_download=True # Always get fresh copy
|
47 |
+
)
|
48 |
+
|
49 |
+
with open(local_path, 'r') as f:
|
50 |
+
metrics_data = json.load(f)
|
51 |
+
|
52 |
+
if isinstance(metrics_data, dict) and "metrics" in metrics_data:
|
53 |
+
miner_uid = metrics_data.get("miner_uid")
|
54 |
+
job_id = metrics_data["metrics"].get("job_id")
|
55 |
+
|
56 |
+
if miner_uid and job_id:
|
57 |
+
metrics_entry = {
|
58 |
+
"model_repo": metrics_data.get("model_repo", "unknown"),
|
59 |
+
"metrics": metrics_data["metrics"],
|
60 |
+
"miner_uid": miner_uid,
|
61 |
+
"job_id": job_id,
|
62 |
+
"timestamp": metrics_data.get("timestamp", "unknown")
|
63 |
+
}
|
64 |
+
training_metrics.append(metrics_entry)
|
65 |
+
processed_commits += 1
|
66 |
+
|
67 |
+
except Exception as e:
|
68 |
+
failed_commits += 1
|
69 |
+
logging.warning(f"Error processing file {json_file.path}: {str(e)}")
|
70 |
+
continue
|
71 |
+
|
72 |
+
except Exception as e:
|
73 |
+
failed_commits += 1
|
74 |
+
logging.warning(f"Error processing commit {commit.commit_id}: {str(e)}")
|
75 |
+
continue
|
76 |
+
|
77 |
+
filtered_metrics = [
|
78 |
+
entry for entry in training_metrics
|
79 |
+
if entry.get('miner_uid') and entry['metrics'].get('job_id')
|
80 |
+
]
|
81 |
+
|
82 |
+
logging.info(f"Successfully processed {processed_commits} commits with valid metrics")
|
83 |
+
if failed_commits > 0:
|
84 |
+
logging.warning(f"Failed to process {failed_commits} commits")
|
85 |
+
|
86 |
+
return filtered_metrics
|
87 |
+
|
88 |
+
except Exception as e:
|
89 |
+
logging.error(f"Error fetching commits: {str(e)}")
|
90 |
+
return []
|
91 |
+
|