|
import gradio as gr |
|
import matplotlib.pyplot as plt |
|
import matplotlib.patches as patches |
|
import math |
|
|
|
|
|
|
|
def visualize_cluster(tp, pp, nodes, nodes_per_row, node_spacing=0.5, gpu_spacing=0.1 ): |
|
gpus_per_row = 2 |
|
gpus_per_column = 4 |
|
|
|
nnodes_x = min(nodes, nodes_per_row) |
|
nnodes_y = math.ceil(nodes/nodes_per_row) |
|
|
|
fig, ax = plt.subplots(figsize=(2*nnodes_x, 2*nnodes_y), dpi=200) |
|
|
|
ax.set_xlim(-node_spacing, 2*nnodes_x + nnodes_x*gpu_spacing + (nnodes_x-1)*node_spacing+ node_spacing) |
|
ax.set_ylim(-node_spacing, 4*nnodes_y + nnodes_y*3*gpu_spacing + (nnodes_y-1)*node_spacing+ node_spacing) |
|
ax.set_xticks([]) |
|
ax.set_yticks([]) |
|
ax.grid(False) |
|
ax.set_aspect('equal', 'box') |
|
ax.invert_yaxis() |
|
|
|
model_instance = tp*pp |
|
dp = (nodes*gpus_per_column*gpus_per_row)//model_instance |
|
max_gpu_usage = dp*model_instance |
|
gpu_i = 0 |
|
|
|
|
|
for node in range(nodes): |
|
node_x = (node%nodes_per_row) * (gpus_per_row+(gpus_per_row-1)*gpu_spacing+node_spacing) |
|
node_y = (node//nodes_per_row) * (gpus_per_column+(gpus_per_column-1)*gpu_spacing+node_spacing) |
|
|
|
|
|
for j in range(gpus_per_column): |
|
for i in range(gpus_per_row): |
|
|
|
model_instance_group = gpu_i//model_instance |
|
tp_instance_group = (gpu_i%model_instance)//tp |
|
|
|
alpha = (1+tp_instance_group)/pp |
|
if pp==1: |
|
alpha=1 |
|
|
|
x = node_x + i * (1 + gpu_spacing) |
|
y = node_y + j * (1 + gpu_spacing) |
|
|
|
color = f'C{model_instance_group}' |
|
|
|
if gpu_i >=max_gpu_usage: |
|
color="black" |
|
alpha=1 |
|
|
|
rect = patches.Rectangle((x, y), 1, 1, linewidth=1, edgecolor='black', facecolor=color, alpha=alpha) |
|
ax.add_patch(rect) |
|
if pp>1: |
|
ax.annotate(f"{tp_instance_group+1}", (x+0.5, y+0.5), color='black', weight='bold', fontsize=9, ha='center', va='center') |
|
gpu_i += 1 |
|
plt.tight_layout() |
|
|
|
md = f"""## Resulting configuration |
|
|
|
- **3D config: TP={tp}, PP={pp}, DP={dp}** |
|
- **one model instance requires {model_instance} GPUs** |
|
- **{nodes*8-max_gpu_usage} GPUs (in black) cannot be utilized** |
|
- **numbers and color shades indicate pipeline stage if PP>1**""" |
|
|
|
return md, fig |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# 3D Parallelism") |
|
|
|
gr.Markdown("## 3D Configuration") |
|
with gr.Row(): |
|
|
|
tp = gr.Number(value=4, label="Tensor Parallel") |
|
pp = gr.Number(value=4, label="Pipeline Parallel") |
|
nodes = gr.Number(value=8, label="Number of Compute Nodes") |
|
nodes_per_row = gr.Number(value=8, label="Number Nodes per Row") |
|
|
|
button = gr.Button("Compute!") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
md = gr.Markdown("## Resulting configuration:") |
|
plot = gr.Plot(value=plt) |
|
|
|
button.click(fn=visualize_cluster, inputs=[tp, pp, nodes, nodes_per_row], outputs=[md, plot]) |
|
demo.load(fn=visualize_cluster, inputs=[tp, pp, nodes, nodes_per_row], outputs=[md, plot]) |
|
demo.launch() |