# %% # %cd ~/docs/0425-ml_summit/scripts/ import plotly.express as px from plotly.graph_objs import Figure, FigureWidget import datasets import pandas as pd import huggingface_hub import plotly.graph_objs as go import numpy as np from PIL import Image FIGURES: dict[str, Figure] = {} # %% df = pd.read_csv("nlp_datas.csv") fig = px.treemap( df, path=[px.Constant("nlp-datasets"), "task", "dataset"], values="size", # color="dataset", # hover_data=["iso_alpha"], # color_continuous_scale="RdBu", ) FIGURES["nlp"] = fig fig.update_layout( paper_bgcolor="rgba(0,0,0,0)", # autosize=True, margin=dict(t=0, l=0, r=0, b=0), # plot_bgcolor='rgba(0,0,0,0)', ) # fig.update_traces(marker=dict(pattern=dict(shape=["|"], solidity=0.80))) # fig.update_layout(margin = dict(t=50, l=25, r=25, b=25)) # figs.append(fig) fig # %% df = pd.read_csv("llm.csv") fig = px.treemap( df, path=[px.Constant("LLM"), "dataset"], values="size", # color="dataset", # hover_data=["iso_alpha"], # color_continuous_scale="RdBu", ) FIGURES["gpt"] = fig fig.update_layout( paper_bgcolor="rgba(0,0,0,0)", # autosize=True, margin=dict(t=0, l=0, r=0, b=0), # plot_bgcolor='rgba(0,0,0,0)', ) # fig.update_layout(margin = dict(t=50, l=25, r=25, b=25)) fig # %% df = pd.read_csv("./seq-time.csv", index_col=0) df.index = df.index.map(lambda x: eval(x.replace("k", "*1024"))) df["platformers"] = df["platformers"] / 7 df.drop([df.columns[-1]], axis=1, inplace=True) df = df.reset_index(names="sequence length").melt( id_vars="sequence length", var_name="model", value_name="time" ) fig = px.line(df, x="sequence length", y="time", color="model") FIGURES["seq-time"] = fig fig.update_layout( paper_bgcolor="rgba(0,0,0,0)", # autosize=True, margin=dict(t=0, l=0, r=0, b=0), plot_bgcolor="rgba(0,0,0,0)", legend_font=dict(color="white"), ) fig.update_xaxes( color="white", ) fig.update_yaxes( # showticklabels=False, # zeroline=False, # showline=False, # griddash="4px", # gridcolor="rgba(255,255,255,0.3)", # title="Loss", color="white", ) fig # %% df = pd.read_csv("seq-tflops.csv", index_col=0) # df['sequence length'] # df.index = df.index.map(lambda x: eval(x.replace("K", "*1024"))) df = df.reset_index(names="sequence length").melt( id_vars="sequence length", var_name="model", value_name="tflops" ) fig = px.bar(df, x="sequence length", y="tflops", color="model", barmode="group") FIGURES["seq-tflops"] = fig fig.update_layout( paper_bgcolor="rgba(0,0,0,0)", # autosize=True, margin=dict(t=0, l=0, r=0, b=0), plot_bgcolor="rgba(0,0,0,0)", legend_font=dict(color="white"), ) fig.update_xaxes( color="white", ) fig.update_yaxes( # showticklabels=False, # zeroline=False, # showline=False, # griddash="4px", # gridcolor="rgba(255,255,255,0.3)", # title="Loss", color="white", ) fig # %% df = datasets.load_dataset("SUSTech/webvid", split="train[:100]").to_pandas() df = df.drop(["duration"], axis=1) fig = go.Figure( data=[ go.Table( header=dict( values=list(df.columns), fill_color="paleturquoise", align="left" ), cells=dict( values=[df[col] for col in df.columns], fill_color="lavender", align="left", # alignsrc="center", ), ) ] ) fig.update_layout( paper_bgcolor="rgba(0,0,0,0)", # autosize=True, margin=dict(t=0, l=0, r=0, b=0), # plot_bgcolor='rgba(0,0,0,0)', ) # fig.show() FIGURES["webvid"] = fig # %% fig = go.Figure() data = { "402-page transcripts from Apollo 11’s mission to the moon": 326914, "44-minute silent Buster Keaton movie": 696417, "more than 100,000 lines of code": 816767, "Generate 1min video": 1000000, } df = pd.Series(data, name="token").to_frame().reset_index(names="task") # df = px.data.gapminder().query("continent == 'Europe' and year == 2007 and pop > 2.e6") fig = px.bar( df, y="token", x="task", text_auto=".2s", # template="ggplot2", # color="white", # orientation="h", ) FIGURES["token-bar"] = fig fig.update_traces( textfont_size=12, textangle=0, textposition="outside", cliponaxis=False, textfont_color="white", ) fig.update_layout( paper_bgcolor="rgba(0,0,0,0)", # autosize=True, margin=dict(t=0, l=0, r=0, b=0), plot_bgcolor="rgba(0,0,0,0)", legend_font=dict(color="white"), ) fig.update_xaxes( color="white", # showticklabels=False, zeroline=False, showline=False, showgrid=False, title="", ) fig.update_yaxes( # showticklabels=False, showline=False, showgrid=False, zeroline=False, # griddash="4px", # gridcolor="rgba(255,255,255,0.3)", # title="Loss", color="white", ) fig # %% def generate_loss(steps, initial_loss, decay_rate, noise_factor): loss = initial_loss * np.exp(-decay_rate * steps) noise = noise_factor * loss * np.random.randn(*steps.shape) return loss + noise def splitpoints(total, split): step = total // split for i in range(split - 1): yield slice(i * step, (i + 1) * step) yield slice((i + 1) * step, None) meta = [ { "name": "2xDGX on aws", "color": "red", "icon": "../figures/gc.png", }, { "name": "16xDGX on aliyun", "color": "orange", "icon": "../figures/aws-white.png", }, { "name": "128xDGX on ucloud", "color": "blue", "icon": "../figures/aliyun.png", }, ] steps = np.linspace(0, 1, 1000) loss = generate_loss(steps, initial_loss=1, decay_rate=5, noise_factor=0.1) fig = go.Figure() # fig.update_layout( # title="Training Loss by Steps", xaxis_title="Steps", yaxis_title="Loss" # ) FIGURES["cloud-switch"] = fig for i, idx in enumerate(splitpoints(1000, len(meta))): fig.add_trace( go.Scatter( x=steps[idx], y=loss[idx], mode="lines", name=meta[i]["name"], line=dict(color=meta[i]["color"]), ) ) fig.add_layout_image( x=0.8, sizex=0.2, y=0.2, sizey=0.2, xref="paper", yref="paper", opacity=1.0, layer="above", source=Image.open("../figures/logo/ucloud.png"), ) fig.add_layout_image( x=0.17, sizex=0.15, y=0.7, sizey=0.15, xref="paper", yref="paper", opacity=1.0, layer="above", source=Image.open("../figures/aws-white.png"), ) fig.add_layout_image( x=0.43, sizex=0.15, y=0.3, sizey=0.15, xref="paper", yref="paper", opacity=1.0, layer="above", source=Image.open("../figures/aliyun.png"), ) fig.update_layout( showlegend=False, paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(255,255,255,0)", # plot_bgcolor="rgba(255,255,0)", # width=1120, ) fig.update_xaxes( showticklabels=False, # ticklabelposition="inside left", showline=False, zeroline=False, showgrid=False, # title=dict(text="Steps", standoff=250), automargin=True, ) fig.update_yaxes( showticklabels=False, zeroline=False, showline=False, griddash="4px", gridcolor="rgba(255,255,255,0.3)", title="Loss", color="white", ) fig # %% def plot_gantt(df): fig = px.timeline(df, x_start="Start", x_end="End", y="Task", color="Task") fig.update_layout(xaxis_tickformat="%H:%M") fig.update_layout( showlegend=False, paper_bgcolor="rgba(0,0,0,0)", # plot_bgcolor="rgba(255,255,255,0.3)", plot_bgcolor="rgba(255,255,255,0)", # plot_bgcolor="rgba(255,255,0)", # width=1120, ) fig.update_xaxes( showticklabels=False, # ticklabelposition="inside left", showline=False, zeroline=False, showgrid=False, # title=dict(text="Steps", standoff=250), automargin=True, ) fig.update_yaxes( # showticklabels=False, zeroline=False, showline=False, griddash="4px", gridcolor="rgba(0,0,0,0.3)", title="", color="white", tickfont=dict(size=20), ) return fig # for hour slots randonly assign a task num_rows = 1000 download_prop = 0.65 df = pd.DataFrame( {"Start": pd.date_range("1-jan-2021", periods=num_rows, freq="4h")} ).assign( End=lambda d: d.Start + pd.Timedelta(hours=1), Task=np.random.choice( ["Read", "Transform"], num_rows, p=(download_prop, 1 - download_prop) ), ) df.loc[0, "Task"] = "Read" df.loc[len(df) - 1, "Task"] = "Transform" df = df.groupby(df.Task.ne(df.Task.shift()).cumsum()).agg( {"Start": "min", "End": "max", "Task": "first"} ) timeline = df.copy() # %% df = timeline.copy() ddi = pd.date_range(df.iloc[0].Start, end=df.iloc[-1].End, periods=10) for start, end in zip(ddi[2:-1:3], ddi[3::3]): df.loc[df["Start"].between(start, end), "Task"] = "Train" df.loc[len(df) + 1] = pd.Series({"Start": start, "End": end, "Task": "Train"}) FIGURES["profile-naive"] = plot_gantt(df) FIGURES["profile-naive"] # %% df = timeline.copy() prop = 10 ddi = pd.date_range(df.iloc[0].Start, end=df.iloc[-1].End, periods=(prop + 1) * 10) for start, end in zip(ddi[1 : -1 : prop + 1], ddi[prop :: prop + 1]): df.loc[df["Start"].between(start, end), "Task"] = "Train" df.loc[len(df) + 1] = pd.Series({"Start": start, "End": end, "Task": "Train"}) FIGURES["profile-old"] = plot_gantt(df) FIGURES["profile-old"] # %% df = timeline.copy() df.loc[len(df) + 1] = pd.Series( {"Start": df.iloc[0].Start, "End": df.iloc[-1].Start, "Task": "Train"} ) FIGURES["profile-stream"] = plot_gantt(df) FIGURES["profile-stream"] # %% for k, v in FIGURES.items(): print(k) v.write_html( f"../components/{k}.qmd", full_html=False, include_plotlyjs="cdn", ) # for i in range(100): # print(i) # %% import qrcode from qrcode.image.styledpil import StyledPilImage from qrcode.image.styles.moduledrawers.pil import RoundedModuleDrawer from qrcode.image.styles.colormasks import RadialGradiantColorMask qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L) qr.add_data("https://u.wechat.com/MAmdMGMYjGFC4-2ESxZ1oyw") # img_1 = qr.make_image(image_factory=StyledPilImage, module_drawer=RoundedModuleDrawer()) img_2 = qr.make_image( # image_factory=StyledPilImage, # color_mask=RadialGradiantColorMask(), fill_color="white", back_color="transparent", ) # img_3 = qr.make_image( # image_factory=StyledPilImage, embeded_image_path="../figures/qr/code.png" # ) img_2.save("../figures/qr/jing.png") # %% qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L) qr.add_data("mailto:data@sustech.edu.cn?subject=Hello&body=") # img_1 = qr.make_image(image_factory=StyledPilImage, module_drawer=RoundedModuleDrawer()) img_2 = qr.make_image( # image_factory=StyledPilImage, # color_mask=RadialGradiantColorMask(), fill_color="white", back_color="transparent", ) # img_3 = qr.make_image( # image_factory=StyledPilImage, embeded_image_path="../figures/qr/code.png" # ) img_2.save("../figures/qr/mail-data.png")