File size: 2,693 Bytes
20b2a40
d2e5b38
20b2a40
 
fdda9c6
c120149
fdda9c6
 
7b48c38
 
e83a64b
2548609
 
edcba35
2548609
 
7b48c38
 
 
 
a598557
7b48c38
 
a598557
 
 
 
7b48c38
 
1d1af69
7b48c38
 
88aa939
2133880
 
7b48c38
 
52aac07
2133880
 
 
7b48c38
 
 
 
 
96ce6e5
7b48c38
bed2c29
96ce6e5
 
ea3871a
 
96ce6e5
 
4b293ab
ea3871a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""
This specific file was bodged together by ham-handed hedgehogs. If something looks wrong, it's because it is.
If you're not a hedgehog, you shouldn't reuse this code. Use this instead: https://docs.streamlit.io/library/get-started
"""


import streamlit as st

import mem_calc
from models import models
st.set_page_config(page_title="Memory calculator", layout="wide")
st.markdown("""<style>
.reportview-container {
    top: -80px;
}
</style>""", unsafe_allow_html=True)

models = list(models.keys())  # respect the original order because py37
model = st.selectbox('Model architecture', models, index=models.index("gpt2-l"))

col1, col2 = st.columns(2)
optimizers_names = ('32-bit', '16-bit', '8-bit', 'factorized')
optimizers_values = ['adam', '16-bit-adam', '8-bit-adam', 'adafactor']
optimizer = col1.radio('Adam / LAMB states', optimizers_names)
checkpoint = col2.checkbox("Gradient checkpointing", value=True)
offload = col2.checkbox("Offload optimizer", value=False)
share_params = col2.checkbox("Share parameters", value=False)

with st.expander("More options"):
    batch_size = int(st.number_input('Microbatch size (sequences)', min_value=1, step=1, value=1, format="%i"))
    precisions_names = ('Full', 'Mixed ("O1")', 'Pure 16-bit')
    precisions_values = ('O0', 'O1', 'O3')
    precision = st.selectbox('Precision', precisions_names, index=1)
    sharing_groups = int(st.number_input('Shared parameter groups (used if Share parameters is checked)',
                                         min_value=1, step=1, value=1, format="%i"))

args = mem_calc.parse_args(f"""
    --model {model} --optimizer {optimizers_values[optimizers_names.index(optimizer)]}
    {'--checkpoint' if checkpoint else ''} {'--offload' if offload else ''}
    --fp16-level {precisions_values[precisions_names.index(precision)]} --bsz {batch_size}
    {f'--shared_groups {sharing_groups}' if share_params else ''} 
""".split())


memory = mem_calc.calculate_memory(args)

cols = st.columns(2)
cols[0].metric("GPU total", f"{memory['total_mem']:.2f} GB")
cols[1].metric("Offloaded to RAM", f"{memory['cpu_mem']:.2f} GB")

cols = st.columns(2)
cols[0].metric("Parameters", f"{memory['model']:.2f} GB")#, f"{memory['model']/memory['total_mem'] * 100:.2f} %", delta_color="off")
cols[1].metric("Activations", f"{memory['grad']:.2f} GB")#, f"{memory['grad']/memory['total_mem'] * 100:.2f} %", delta_color="off")

cols = st.columns(2)
cols[0].metric(f"Optimizer ({'CPU' if offload else 'GPU'})", f"{memory['cpu_mem'] if offload else memory['optim']:.2f} GB")#, f"{memory['optim']/memory['total_mem'] * 100:.2f} %", delta_color="off")
cols[1].metric("CPU-GPU Transfer", f"{memory['overhead'] * 1000:.2f} ms")