pingnie commited on
Commit
84f0fa3
·
1 Parent(s): 659bce1

add gpu info

Browse files
backend-cli.py CHANGED
@@ -166,8 +166,13 @@ def process_evaluation(task: Task, eval_request: EvalRequest, limit: Optional[in
166
  gpu_info = analyze_gpu_stats(gpu_stats_list)
167
  for task_name in results['results'].keys():
168
  for key, value in gpu_info.items():
169
- results['results'][task_name][f"{key},none"] = int(value)
 
 
 
170
 
 
 
171
  print("GPU Usage:", gpu_info)
172
 
173
  dumped = json.dumps(results, indent=2, default=lambda o: "<not serializable>")
@@ -430,25 +435,31 @@ if __name__ == "__main__":
430
  if local_debug:
431
  # debug_model_names = [args.model] # Use model from arguments
432
  # debug_task_name = [args.task] # Use task from arguments
433
- debug_model_names = ["mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mixtral-8x7B-v0.1"] # Use model from arguments
 
 
 
434
  debug_task_name = ['mmlu', 'selfcheckgpt'] # Use task from arguments
435
- precisions = ['float16', 'float16', '8bit']
436
  task_lst = TASKS_HARNESS.copy()
437
  for precision in precisions:
438
- for task in task_lst:
439
- for debug_model_name in debug_model_names:
440
  task_name = task.benchmark
441
  if task_name not in debug_task_name:
442
  continue
443
- eval_request = EvalRequest(
444
- model=debug_model_name,
445
- private=False,
446
- status="",
447
- json_filepath="",
448
- precision=args.precision, # Use precision from arguments
449
- inference_framework=args.inference_framework # Use inference framework from arguments
450
- )
451
- results = process_evaluation(task, eval_request, limit=args.limit)
 
 
 
452
  else:
453
  while True:
454
  res = False
 
166
  gpu_info = analyze_gpu_stats(gpu_stats_list)
167
  for task_name in results['results'].keys():
168
  for key, value in gpu_info.items():
169
+ if "GPU" not in key:
170
+ results['results'][task_name][f"{key},none"] = int(value)
171
+ else:
172
+ results['results'][task_name][f"{key},none"] = value
173
 
174
+ results['results'][task_name]['batch_size,none'] = batch_size
175
+ print(f"gpu_stats_list: {gpu_stats_list}")
176
  print("GPU Usage:", gpu_info)
177
 
178
  dumped = json.dumps(results, indent=2, default=lambda o: "<not serializable>")
 
435
  if local_debug:
436
  # debug_model_names = [args.model] # Use model from arguments
437
  # debug_task_name = [args.task] # Use task from arguments
438
+ debug_model_names = ["microsoft/phi-2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mixtral-8x7B-v0.1",
439
+ "databricks/dbrx-instruct", "databricks/dbrx-base",
440
+ "mistralai/Mixtral-8x22B-v0.1", "mistralai/Mixtral-8x22B-Instruct-v0.1", "alpindale/WizardLM-2-8x22B",
441
+ "CohereForAI/c4ai-command-r-plus"] # Use model from arguments
442
  debug_task_name = ['mmlu', 'selfcheckgpt'] # Use task from arguments
443
+ precisions = ['4bit', 'float16', 'float32', '8bit']
444
  task_lst = TASKS_HARNESS.copy()
445
  for precision in precisions:
446
+ for debug_model_name in debug_model_names:
447
+ for task in task_lst:
448
  task_name = task.benchmark
449
  if task_name not in debug_task_name:
450
  continue
451
+ try:
452
+ eval_request = EvalRequest(
453
+ model=debug_model_name,
454
+ private=False,
455
+ status="",
456
+ json_filepath="",
457
+ precision=precision, # Use precision from arguments
458
+ inference_framework=args.inference_framework # Use inference framework from arguments
459
+ )
460
+ results = process_evaluation(task, eval_request, limit=args.limit)
461
+ except Exception as e:
462
+ print(f"debug running error: {e}")
463
  else:
464
  while True:
465
  res = False
src/backend/manage_requests.py CHANGED
@@ -37,12 +37,11 @@ class EvalRequest:
37
  # Quantized models need some added config, the install of bits and bytes, etc
38
  # elif self.precision == "8bit":
39
  # model_args += ",load_in_8bit=True"
40
- # elif self.precision == "4bit":
41
- # model_args += ",load_in_4bit=True"
42
  # elif self.precision == "GPTQ":
43
  # A GPTQ model does not need dtype to be specified,
44
  # it will be inferred from the config
45
- pass
46
  elif self.precision == "8bit":
47
  model_args += ",load_in_8bit=True"
48
  else:
 
37
  # Quantized models need some added config, the install of bits and bytes, etc
38
  # elif self.precision == "8bit":
39
  # model_args += ",load_in_8bit=True"
40
+ elif self.precision == "4bit":
41
+ model_args += ",load_in_4bit=True"
42
  # elif self.precision == "GPTQ":
43
  # A GPTQ model does not need dtype to be specified,
44
  # it will be inferred from the config
 
45
  elif self.precision == "8bit":
46
  model_args += ",load_in_8bit=True"
47
  else:
src/display/utils.py CHANGED
@@ -16,7 +16,24 @@ MULTIPLE_CHOICEs = ["mmlu"]
16
  GPU_TEMP = 'Temp(C)'
17
  GPU_Power = 'Power(W)'
18
  GPU_Mem = 'Mem(M)'
 
19
  GPU_Util = 'Util(%)'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  @dataclass
22
  class Task:
@@ -87,14 +104,16 @@ for task in Tasks:
87
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
88
  # System performance metrics
89
  auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True)])
 
 
90
  if task.value.benchmark in MULTIPLE_CHOICEs:
91
  continue
92
- auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", True)])
93
  auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True)])
94
 
95
  auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True)])
96
- auto_eval_column_dict.append([f"{task.name}_gpu_power", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Power}", "number", True)])
97
- auto_eval_column_dict.append([f"{task.name}_gpu_temp", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_TEMP}", "number", True)])
98
  auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True)])
99
 
100
  # Model information
 
16
  GPU_TEMP = 'Temp(C)'
17
  GPU_Power = 'Power(W)'
18
  GPU_Mem = 'Mem(M)'
19
+ GPU_Name = "GPU"
20
  GPU_Util = 'Util(%)'
21
+ BATCH_SIZE = 'bs'
22
+
23
+ system_metrics_to_name_map = {
24
+ "end_to_end_time": f"{E2Es}",
25
+ "prefilling_time": f"{PREs}",
26
+ "decoding_throughput": f"{TS}",
27
+ }
28
+
29
+ gpu_metrics_to_name_map = {
30
+ GPU_Util: GPU_Util,
31
+ GPU_TEMP: GPU_TEMP,
32
+ GPU_Power: GPU_Power,
33
+ GPU_Mem: GPU_Mem,
34
+ "batch_size": BATCH_SIZE,
35
+ GPU_Name: GPU_Name,
36
+ }
37
 
38
  @dataclass
39
  class Task:
 
104
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
105
  # System performance metrics
106
  auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True)])
107
+ auto_eval_column_dict.append([f"{task.name}_batch_size", ColumnContent, ColumnContent(f"{task.value.col_name} {BATCH_SIZE}", "number", True)])
108
+ auto_eval_column_dict.append([f"{task.name}_gpu", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Name}", "str", True)])
109
  if task.value.benchmark in MULTIPLE_CHOICEs:
110
  continue
111
+ auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False)])
112
  auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True)])
113
 
114
  auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True)])
115
+ auto_eval_column_dict.append([f"{task.name}_gpu_power", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Power}", "number", False)])
116
+ auto_eval_column_dict.append([f"{task.name}_gpu_temp", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_TEMP}", "number", False)])
117
  auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True)])
118
 
119
  # Model information
src/leaderboard/read_evals.py CHANGED
@@ -103,6 +103,10 @@ class EvalResult:
103
 
104
  if to_add is True:
105
  multiplier = 100.0
 
 
 
 
106
  if "rouge" in metric and "truthful" not in benchmark:
107
  multiplier = 1.0
108
  if "squad" in benchmark:
@@ -111,6 +115,10 @@ class EvalResult:
111
  multiplier = 1.0
112
  if "throughput" in metric:
113
  multiplier = 1.0
 
 
 
 
114
  # print('RESULTS', data['results'])
115
  # print('XXX', benchmark, metric, value, multiplier)
116
  results[benchmark][metric] = value * multiplier
 
103
 
104
  if to_add is True:
105
  multiplier = 100.0
106
+ if "GPU" in metric:
107
+ results[benchmark][metric] = value
108
+ continue
109
+
110
  if "rouge" in metric and "truthful" not in benchmark:
111
  multiplier = 1.0
112
  if "squad" in benchmark:
 
115
  multiplier = 1.0
116
  if "throughput" in metric:
117
  multiplier = 1.0
118
+ if "batch_" in metric or "Mem" in metric or "Util" in metric:
119
+ multiplier = 1
120
+
121
+
122
  # print('RESULTS', data['results'])
123
  # print('XXX', benchmark, metric, value, multiplier)
124
  results[benchmark][metric] = value * multiplier
src/populate.py CHANGED
@@ -12,7 +12,7 @@ from src.leaderboard.read_evals import get_raw_eval_results, EvalResult, update_
12
 
13
  from src.backend.envs import Tasks as BackendTasks
14
  from src.display.utils import Tasks
15
- from src.display.utils import E2Es, PREs, TS, GPU_Mem, GPU_Power, GPU_TEMP, GPU_Util
16
 
17
  def get_leaderboard_df(
18
  results_path: str,
@@ -45,19 +45,7 @@ def get_leaderboard_df(
45
  bm = (task.benchmark, task.metric)
46
  name_to_bm_map[name] = bm
47
 
48
- # bm_to_name_map = {bm: name for name, bm in name_to_bm_map.items()}
49
- system_metrics_to_name_map = {
50
- "end_to_end_time": f"{E2Es}",
51
- "prefilling_time": f"{PREs}",
52
- "decoding_throughput": f"{TS}",
53
- }
54
 
55
- gpu_metrics_to_name_map = {
56
- GPU_Util: GPU_Util,
57
- GPU_TEMP: GPU_TEMP,
58
- GPU_Power: GPU_Power,
59
- GPU_Mem: GPU_Mem
60
- }
61
 
62
  all_data_json = []
63
  for entry in all_data_json_:
@@ -73,7 +61,6 @@ def get_leaderboard_df(
73
  for gpu_metric, metric_namne in gpu_metrics_to_name_map.items():
74
  if gpu_metric in entry[k]:
75
  new_entry[f"{k} {metric_namne}"] = entry[k][gpu_metric]
76
-
77
  all_data_json += [new_entry]
78
 
79
  # all_data_json.append(baseline_row)
 
12
 
13
  from src.backend.envs import Tasks as BackendTasks
14
  from src.display.utils import Tasks
15
+ from src.display.utils import system_metrics_to_name_map, gpu_metrics_to_name_map
16
 
17
  def get_leaderboard_df(
18
  results_path: str,
 
45
  bm = (task.benchmark, task.metric)
46
  name_to_bm_map[name] = bm
47
 
 
 
 
 
 
 
48
 
 
 
 
 
 
 
49
 
50
  all_data_json = []
51
  for entry in all_data_json_:
 
61
  for gpu_metric, metric_namne in gpu_metrics_to_name_map.items():
62
  if gpu_metric in entry[k]:
63
  new_entry[f"{k} {metric_namne}"] = entry[k][gpu_metric]
 
64
  all_data_json += [new_entry]
65
 
66
  # all_data_json.append(baseline_row)
src/utils.py CHANGED
@@ -3,10 +3,10 @@ from huggingface_hub import snapshot_download
3
  import subprocess
4
  import re
5
  try:
6
- from src.display.utils import GPU_TEMP, GPU_Mem, GPU_Power, GPU_Util
7
  except:
8
  print("local debug: from display.utils")
9
- from display.utils import GPU_TEMP, GPU_Mem, GPU_Power, GPU_Util
10
 
11
  def my_snapshot_download(repo_id, revision, local_dir, repo_type, max_workers):
12
  for i in range(10):
@@ -49,24 +49,36 @@ def parse_nvidia_smi():
49
 
50
  # Regex to extract the relevant data for each GPU
51
  gpu_info_pattern = re.compile(r'(\d+)C\s+P\d+\s+(\d+)W / \d+W\s+\|\s+(\d+)MiB / \d+MiB\s+\|\s+(\d+)%')
 
52
  lines = output.split('\n')
53
-
54
  for line in lines:
55
  match = gpu_info_pattern.search(line)
 
 
 
 
 
 
 
56
  if match:
57
  temp, power_usage, mem_usage, gpu_util = map(int, match.groups())
58
- gpu_stats.append({
59
  GPU_TEMP: temp,
60
  GPU_Power: power_usage,
61
  GPU_Mem: mem_usage,
62
  GPU_Util: gpu_util
63
  })
64
-
 
 
 
65
  gpu_stats_total = {
66
  GPU_TEMP: 0,
67
  GPU_Power: 0,
68
  GPU_Mem: 0,
69
- GPU_Util: 0
 
70
  }
71
  for gpu_stat in gpu_stats:
72
  gpu_stats_total[GPU_TEMP] += gpu_stat[GPU_TEMP]
@@ -77,7 +89,6 @@ def parse_nvidia_smi():
77
  gpu_stats_total[GPU_TEMP] /= len(gpu_stats)
78
  gpu_stats_total[GPU_Power] /= len(gpu_stats)
79
  gpu_stats_total[GPU_Util] /= len(gpu_stats)
80
-
81
  return [gpu_stats_total]
82
 
83
  def monitor_gpus(stop_event, interval, stats_list):
@@ -88,11 +99,28 @@ def monitor_gpus(stop_event, interval, stats_list):
88
  stop_event.wait(interval)
89
 
90
  def analyze_gpu_stats(stats_list):
 
91
  if not stats_list:
92
  return None
93
- avg_stats = {key: sum(d[key] for d in stats_list) / len(stats_list) for key in stats_list[0]}
94
- return avg_stats
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  if __name__ == "__main__":
98
  print(analyze_gpu_stats(parse_nvidia_smi()))
 
3
  import subprocess
4
  import re
5
  try:
6
+ from src.display.utils import GPU_TEMP, GPU_Mem, GPU_Power, GPU_Util, GPU_Name
7
  except:
8
  print("local debug: from display.utils")
9
+ from display.utils import GPU_TEMP, GPU_Mem, GPU_Power, GPU_Util, GPU_Name
10
 
11
  def my_snapshot_download(repo_id, revision, local_dir, repo_type, max_workers):
12
  for i in range(10):
 
49
 
50
  # Regex to extract the relevant data for each GPU
51
  gpu_info_pattern = re.compile(r'(\d+)C\s+P\d+\s+(\d+)W / \d+W\s+\|\s+(\d+)MiB / \d+MiB\s+\|\s+(\d+)%')
52
+ gpu_name_pattern = re.compile(r'NVIDIA\s+([\w\s]+?\d+GB)')
53
  lines = output.split('\n')
54
+ gpu_name = ""
55
  for line in lines:
56
  match = gpu_info_pattern.search(line)
57
+ name_match = gpu_name_pattern.search(line)
58
+
59
+ gpu_info = {}
60
+
61
+ if name_match:
62
+ # print(name_match)
63
+ gpu_name = name_match.group(1).strip()
64
  if match:
65
  temp, power_usage, mem_usage, gpu_util = map(int, match.groups())
66
+ gpu_info.update({
67
  GPU_TEMP: temp,
68
  GPU_Power: power_usage,
69
  GPU_Mem: mem_usage,
70
  GPU_Util: gpu_util
71
  })
72
+ # print(f"gpu_info: {gpu_info}")
73
+ if len(gpu_info) >= 4:
74
+ gpu_stats.append(gpu_info)
75
+ gpu_name = f"{len(gpu_stats)}x{gpu_name}"
76
  gpu_stats_total = {
77
  GPU_TEMP: 0,
78
  GPU_Power: 0,
79
  GPU_Mem: 0,
80
+ GPU_Util: 0,
81
+ GPU_Name: gpu_name
82
  }
83
  for gpu_stat in gpu_stats:
84
  gpu_stats_total[GPU_TEMP] += gpu_stat[GPU_TEMP]
 
89
  gpu_stats_total[GPU_TEMP] /= len(gpu_stats)
90
  gpu_stats_total[GPU_Power] /= len(gpu_stats)
91
  gpu_stats_total[GPU_Util] /= len(gpu_stats)
 
92
  return [gpu_stats_total]
93
 
94
  def monitor_gpus(stop_event, interval, stats_list):
 
99
  stop_event.wait(interval)
100
 
101
  def analyze_gpu_stats(stats_list):
102
+ # Check if the stats_list is empty, and return None if it is
103
  if not stats_list:
104
  return None
 
 
105
 
106
+ # Initialize dictionaries to store the stats
107
+ avg_stats = {}
108
+ max_stats = {}
109
+
110
+ # Calculate average stats, excluding 'GPU_Mem'
111
+ for key in stats_list[0].keys():
112
+ if key != GPU_Mem and key != GPU_Name:
113
+ total = sum(d[key] for d in stats_list)
114
+ avg_stats[key] = total / len(stats_list)
115
+
116
+ # Calculate max stats for 'GPU_Mem'
117
+ max_stats[GPU_Mem] = max(d[GPU_Mem] for d in stats_list)
118
+ if GPU_Name in stats_list[0]:
119
+ avg_stats[GPU_Name] = stats_list[0][GPU_Name]
120
+ # Update average stats with max GPU memory usage
121
+ avg_stats.update(max_stats)
122
+
123
+ return avg_stats
124
 
125
  if __name__ == "__main__":
126
  print(analyze_gpu_stats(parse_nvidia_smi()))