muellerzr HF staff commited on
Commit
0ecaccb
·
1 Parent(s): 40e2c53

Working version

Browse files
Files changed (2) hide show
  1. app.py +84 -61
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,44 +1,45 @@
 
1
  import re
2
  import webbrowser
3
  import pandas as pd
4
  import gradio as gr
5
  from huggingface_hub import HfApi
6
- from accelerate.commands.estimate import create_empty_model
 
7
  from accelerate.utils import convert_bytes, calculate_maximum_sizes
8
 
9
  # We need to store them as globals because gradio doesn't have a way for us to pass them in to the button
10
  HAS_DISCUSSION = True
11
  MODEL_NAME = None
12
  LIBRARY = None
13
- TRUST_REMOTE_CODE = False
14
-
15
- # We use this class to check if a discussion has been opened on the model by `huggingface_model_memory_bot`
16
- hf_api = HfApi()
17
 
18
  def check_for_discussion(model_name:str):
19
- "Checks if a discussion has been opened on the model"
20
- global hf_api
21
- discussions = list(hf_api.get_repo_discussions(model_name))
22
- return any(discussion.title == "[AUTOMATED] Model Memory Requirements" for discussion in discussions)
23
 
24
  def report_results():
25
- "Reports the results of a memory calculation to the model's discussion"
26
- global MODEL_NAME, LIBRARY, TRUST_REMOTE_CODE
27
- _, results = calculate_memory(MODEL_NAME, LIBRARY, ["float32", "float16", "int8", "int4"], TRUST_REMOTE_CODE, raw=True)
 
28
  post = f"""# Model Memory Requirements\n
29
 
30
- These calculations were measured from the [Model Memory Utility Space](https://hf.co/spaces/muellerzr/model-memory-utility) on the Hub.
31
 
32
- The minimum recommended vRAM needed for this model to perform inference via [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) is denoted by the size of the "largest layer" and training of the model is roughly 4x its total size (for Adam).
 
 
 
33
 
34
- ## Results
35
 
 
36
  """
37
- global hf_api
38
- post += results.to_markdown(index=False)
39
- # Uncomment when ready to go live
40
- # discussion = hf_api.create_discussion(MODEL_NAME, "[AUTOMATED] Model Memory Requirements", description=post)
41
- # webbrowser.open_new_tab(discussion.url)
42
 
43
  def convert_url_to_name(url:str):
44
  "Converts a model URL to its name on the Hub"
@@ -47,18 +48,33 @@ def convert_url_to_name(url:str):
47
  raise ValueError(f"URL {url} is not a valid model URL to the Hugging Face Hub")
48
  return results[0]
49
 
50
- def calculate_memory(model_name:str, library:str, options:list, trust_remote_code:bool, raw=False):
51
  "Calculates the memory usage for a model"
52
  if library == "auto":
53
  library = None
54
- if "huggingface.co" in model_name:
55
- model_name = convert_url_to_name(model_name)
56
- model = create_empty_model(model_name, library_name=library, trust_remote_code=trust_remote_code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  total_size, largest_layer = calculate_maximum_sizes(model)
58
 
59
  data = []
60
 
61
- title = f"Memory Usage for `{model_name}`"
62
  for dtype in options:
63
  dtype_total_size = total_size
64
  dtype_largest_layer = largest_layer[0]
@@ -76,57 +92,64 @@ def calculate_memory(model_name:str, library:str, options:list, trust_remote_cod
76
  dtype_largest_layer = convert_bytes(dtype_largest_layer)
77
  data.append({
78
  "dtype": dtype,
79
- "Largest Layer": dtype_largest_layer,
80
  "Total Size": dtype_total_size,
81
  "Training using Adam": dtype_training_size
82
  })
83
- global HAS_DISCUSSION, MODEL_NAME, LIBRARY, TRUST_REMOTE_CODE
84
  HAS_DISCUSSION = check_for_discussion(model_name)
85
  MODEL_NAME = model_name
86
  LIBRARY = library
87
- TRUST_REMOTE_CODE = trust_remote_code
88
- results = [f'## {title}', pd.DataFrame(data)]
89
- if not raw:
90
- results += [gr.update(visible=not HAS_DISCUSSION)]
 
 
 
 
 
91
  return results
92
 
93
  with gr.Blocks() as demo:
94
- gr.Markdown(
95
- """# Model Memory Calculator
96
-
97
- This tool will help you calculate how much vRAM is needed to train and perform big model inference
98
- on a model hosted on the 🤗 Hugging Face Hub. The minimum recommended vRAM needed for a model
99
- is denoted as the size of the "largest layer", and training of a model is roughly 4x its size (for Adam).
100
-
101
- Currently this tool supports all models hosted that use `transformers` and `timm`.
102
-
103
- To use this tool pass in the URL or model name of the model you want to calculate the memory usage for,
104
- select which framework it originates from ("auto" will try and detect it from the model metadata), and
105
- what precisions you want to use.
106
- """
107
- )
108
- out_text = gr.Markdown()
109
- out = gr.DataFrame(
110
- headers=["dtype", "Largest Layer", "Total Size", "Training using Adam"],
111
- interactive=False,
112
- )
113
 
114
- inp = gr.Textbox(label="Model Name or URL")
115
- with gr.Row():
116
- library = gr.Radio(["auto", "transformers", "timm"], label="Library", value="auto")
117
- options = gr.CheckboxGroup(
118
- ["float32", "float16", "int8", "int4"],
119
- value="float32"
 
 
 
 
 
120
  )
121
- trust_remote_code = gr.Checkbox(label="Trust Remote Code", value=False)
122
- btn = gr.Button("Calculate Memory Usage")
123
- post_to_hub = gr.Button(value = "Report results in this model repo's discussions!", visible=False)
 
 
 
 
 
 
 
 
 
124
 
125
  btn.click(
126
- calculate_memory, inputs=[inp, library, options, trust_remote_code], outputs=[out_text, out, post_to_hub],
127
  )
128
 
129
- post_to_hub.click(report_results)
130
 
131
 
132
  demo.launch()
 
1
+ import os
2
  import re
3
  import webbrowser
4
  import pandas as pd
5
  import gradio as gr
6
  from huggingface_hub import HfApi
7
+ from huggingface_hub.utils import RepositoryNotFoundError, GatedRepoError
8
+ from accelerate.commands.estimate import create_empty_model, check_has_model
9
  from accelerate.utils import convert_bytes, calculate_maximum_sizes
10
 
11
  # We need to store them as globals because gradio doesn't have a way for us to pass them in to the button
12
  HAS_DISCUSSION = True
13
  MODEL_NAME = None
14
  LIBRARY = None
15
+ TOKEN = os.environ.get("HUGGINGFACE_API_LOGIN", None)
 
 
 
16
 
17
  def check_for_discussion(model_name:str):
18
+ "Checks if an automated discussion has been opened on the model by `model-sizer-bot`"
19
+ api = HfApi(token=TOKEN)
20
+ discussions = list(api.get_repo_discussions(model_name))
21
+ return any(discussion.title == "[AUTOMATED] Model Memory Requirements" and discussion.author == "model-sizer-bot" for discussion in discussions)
22
 
23
  def report_results():
24
+ "Reports the results of a memory calculation to the model's discussion page, and opens a new tab to it afterwards"
25
+ global MODEL_NAME, LIBRARY
26
+ api = HfApi(token=TOKEN)
27
+ results = calculate_memory(MODEL_NAME, LIBRARY, ["fp32", "fp16", "int8", "int4"], raw=True)
28
  post = f"""# Model Memory Requirements\n
29
 
30
+ These calculations were measured from the [Model Memory Utility Space](https://hf.co/spaces/hf-accelerate/model-memory-utility) on the Hub.
31
 
32
+ The minimum recommended vRAM needed for this model to be loaded into memory via [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) is denoted by the size of the "largest layer".
33
+ When performing inference, expect to add up to an additional 20% to this, as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/). More tests will be performed in the future to get a more accurate benchmark for each model.
34
+
35
+ When training with `Adam`, you can expect roughly 4x the reported results to be used. (1x for the model, 1x for the gradients, and 2x for the optimizer).
36
 
37
+ ## Results:
38
 
39
+ {results}
40
  """
41
+ discussion = api.create_discussion(MODEL_NAME, "[AUTOMATED] Model Memory Requirements", description=post)
42
+ webbrowser.open_new_tab(discussion.url)
 
 
 
43
 
44
  def convert_url_to_name(url:str):
45
  "Converts a model URL to its name on the Hub"
 
48
  raise ValueError(f"URL {url} is not a valid model URL to the Hugging Face Hub")
49
  return results[0]
50
 
51
+ def calculate_memory(model_name:str, library:str, options:list, access_token:str, raw=False):
52
  "Calculates the memory usage for a model"
53
  if library == "auto":
54
  library = None
55
+ if "http" in model_name and "//" in model_name:
56
+ try:
57
+ model_name = convert_url_to_name(model_name)
58
+ except ValueError:
59
+ raise gr.Error(f"URL `{model_name}` is not a valid model URL to the Hugging Face Hub")
60
+ try:
61
+ model = create_empty_model(model_name, library_name=library, trust_remote_code=True, access_token=access_token)
62
+ except GatedRepoError:
63
+ raise gr.Error(f"Model `{model_name}` is a gated model, please ensure to pass in your access token and try again if you have access.")
64
+ except RepositoryNotFoundError:
65
+ raise gr.Error(f"Model `{model_name}` was not found on the Hub, please try another model name.")
66
+ except ValueError as e:
67
+ raise gr.Error(f"Model `{model_name}` does not have any library metadata on the Hub, please manually select a library_name to use (such as `transformers`)")
68
+ except (RuntimeError, OSError) as e:
69
+ library = check_has_model(e)
70
+ if library != "unknown":
71
+ raise gr.Error(f"Tried to load `{model_name}` with `{library}` but a possible model to load was not found inside the repo.")
72
+
73
  total_size, largest_layer = calculate_maximum_sizes(model)
74
 
75
  data = []
76
 
77
+ title = f"Memory Usage for '{model_name}'"
78
  for dtype in options:
79
  dtype_total_size = total_size
80
  dtype_largest_layer = largest_layer[0]
 
92
  dtype_largest_layer = convert_bytes(dtype_largest_layer)
93
  data.append({
94
  "dtype": dtype,
95
+ "Largest Layer or Residual Group": dtype_largest_layer,
96
  "Total Size": dtype_total_size,
97
  "Training using Adam": dtype_training_size
98
  })
99
+ global HAS_DISCUSSION, MODEL_NAME, LIBRARY
100
  HAS_DISCUSSION = check_for_discussion(model_name)
101
  MODEL_NAME = model_name
102
  LIBRARY = library
103
+
104
+ if raw:
105
+ return pd.DataFrame(data).to_markdown(index=False)
106
+
107
+ results = [
108
+ f'## {title}',
109
+ gr.update(visible=True, value=pd.DataFrame(data)),
110
+ gr.update(visible=not HAS_DISCUSSION)
111
+ ]
112
  return results
113
 
114
  with gr.Blocks() as demo:
115
+ with gr.Column():
116
+ gr.Markdown(
117
+ """# Model Memory Calculator
118
+
119
+ This tool will help you calculate how much vRAM is needed to train and perform big model inference
120
+ on a model hosted on the 🤗 Hugging Face Hub. The minimum recommended vRAM needed for a model
121
+ is denoted as the size of the "largest layer", and training of a model is roughly 4x its size (for Adam).
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
+ Currently this tool supports all models hosted that use `transformers` and `timm`.
124
+
125
+ To use this tool pass in the URL or model name of the model you want to calculate the memory usage for,
126
+ select which framework it originates from ("auto" will try and detect it from the model metadata), and
127
+ what precisions you want to use."""
128
+ )
129
+ out_text = gr.Markdown()
130
+ out = gr.DataFrame(
131
+ headers=["dtype", "Largest Layer", "Total Size", "Training using Adam"],
132
+ interactive=False,
133
+ visible=False,
134
  )
135
+ with gr.Row():
136
+ inp = gr.Textbox(label="Model Name or URL")
137
+ with gr.Row():
138
+ library = gr.Radio(["auto", "transformers", "timm"], label="Library", value="auto")
139
+ options = gr.CheckboxGroup(
140
+ ["float32", "float16", "int8", "int4"],
141
+ value="float32"
142
+ )
143
+ access_token = gr.Textbox(label="API Token", placeholder="Optional (for gated models)")
144
+ with gr.Row():
145
+ btn = gr.Button("Calculate Memory Usage")
146
+ post_to_hub = gr.Button(value = "Report results in this model repo's discussions!\n(Will open in a new tab)", visible=False)
147
 
148
  btn.click(
149
+ calculate_memory, inputs=[inp, library, options, access_token], outputs=[out_text, out, post_to_hub],
150
  )
151
 
152
+ post_to_hub.click(report_results).then(lambda: gr.Button.update(visible=False), outputs=post_to_hub)
153
 
154
 
155
  demo.launch()
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- accelerate @ git+https://github.com/huggingface/accelerate@model-size-estimator
2
  transformers
3
  timm
4
  huggingface_hub
 
1
+ accelerate @ git+https://github.com/huggingface/accelerate
2
  transformers
3
  timm
4
  huggingface_hub