muellerzr HF staff commited on
Commit
0dea527
·
1 Parent(s): be6343c
Files changed (9) hide show
  1. .gitignore +144 -0
  2. Makefile +11 -0
  3. README.md +1 -1
  4. app.py +0 -187
  5. pyproject.toml +16 -0
  6. src/__init__.py +0 -0
  7. src/app.py +73 -0
  8. src/hub_utils.py +44 -0
  9. src/model_utils.py +101 -0
.gitignore ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ venv/
109
+ ENV/
110
+ env.bak/
111
+ venv.bak/
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # Pyre type checker
129
+ .pyre/
130
+
131
+ # VSCode
132
+ .vscode
133
+
134
+ # IntelliJ
135
+ .idea
136
+
137
+ # Mac .DS_Store
138
+ .DS_Store
139
+
140
+ # More test things
141
+ wandb
142
+
143
+ # ruff
144
+ .ruff_cache
Makefile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ check_dirs := src
2
+
3
+ # this target runs checks on all files
4
+ quality:
5
+ black --required-version 23 --check $(check_dirs)
6
+ ruff $(check_dirs)
7
+
8
+ # Format source code automatically and check is there are any problems left that need manual fixing
9
+ style:
10
+ black --required-version 23 $(check_dirs)
11
+ ruff $(check_dirs) --fix
README.md CHANGED
@@ -5,7 +5,7 @@ colorFrom: pink
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.40.1
8
- app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
  ---
 
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.40.1
8
+ app_file: src/app.py
9
  pinned: false
10
  license: apache-2.0
11
  ---
app.py DELETED
@@ -1,187 +0,0 @@
1
- import os
2
- import re
3
- import webbrowser
4
- import pandas as pd
5
- import gradio as gr
6
- from huggingface_hub import HfApi
7
- from huggingface_hub.utils import RepositoryNotFoundError, GatedRepoError
8
- from accelerate.commands.estimate import create_empty_model, check_has_model
9
- from accelerate.utils import convert_bytes, calculate_maximum_sizes
10
- from urllib.parse import urlparse
11
-
12
- # We need to store them as globals because gradio doesn't have a way for us to pass them in to the button
13
- HAS_DISCUSSION = True
14
- MODEL_NAME = None
15
- LIBRARY = None
16
- USER_TOKEN = None
17
- TOKEN = os.environ.get("HUGGINGFACE_API_LOGIN", None)
18
-
19
- def translate_llama2(text):
20
- "Translates llama-2 to its hf counterpart"
21
- if not text.endswith("-hf"):
22
- return text + "-hf"
23
- return text
24
-
25
- def check_for_discussion(model_name:str):
26
- "Checks if an automated discussion has been opened on the model by `model-sizer-bot`"
27
- global TOKEN
28
- api = HfApi(token=TOKEN)
29
- discussions = list(api.get_repo_discussions(model_name))
30
- return any(discussion.title == "[AUTOMATED] Model Memory Requirements" and discussion.author == "model-sizer-bot" for discussion in discussions)
31
-
32
- def report_results():
33
- "Reports the results of a memory calculation to the model's discussion page, and opens a new tab to it afterwards"
34
- global MODEL_NAME, LIBRARY, TOKEN, USER_TOKEN
35
- api = HfApi(token=TOKEN)
36
- results, data = calculate_memory(MODEL_NAME, LIBRARY, ["fp32", "fp16", "int8", "int4"], access_token=USER_TOKEN, raw=True)
37
- minimum = data[0]
38
-
39
- USER_TOKEN = None
40
- post = f"""# Model Memory Requirements\n
41
-
42
- You will need about {minimum[1]} VRAM to load this model for inference, and {minimum[3]} VRAM to train it using Adam.
43
-
44
- These calculations were measured from the [Model Memory Utility Space](https://hf.co/spaces/hf-accelerate/model-memory-utility) on the Hub.
45
-
46
- The minimum recommended vRAM needed for this model assumes using [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) and is denoted by the size of the "largest layer".
47
- When performing inference, expect to add up to an additional 20% to this, as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/). More tests will be performed in the future to get a more accurate benchmark for each model.
48
-
49
- When training with `Adam`, you can expect roughly 4x the reported results to be used. (1x for the model, 1x for the gradients, and 2x for the optimizer).
50
-
51
- ## Results:
52
-
53
- {results}
54
- """
55
- discussion = api.create_discussion(MODEL_NAME, "[AUTOMATED] Model Memory Requirements", description=post)
56
- webbrowser.open_new_tab(discussion.url)
57
-
58
- def extract_from_url(name:str):
59
- "Checks if `name` is a URL, and if so converts it to a model name"
60
- is_url = False
61
- try:
62
- result = urlparse(name)
63
- is_url = all([result.scheme, result.netloc])
64
- except:
65
- is_url = False
66
- # Pass through if not a URL
67
- if not is_url:
68
- return name
69
- else:
70
- path = result.path
71
- return path[1:]
72
-
73
- def calculate_memory(model_name:str, library:str, options:list, access_token:str, raw=False):
74
- "Calculates the memory usage for a model"
75
- if "meta-llama" in model_name:
76
- model_name = translate_llama2(model_name)
77
- if library == "auto":
78
- library = None
79
- model_name = extract_from_url(model_name)
80
- try:
81
- model = create_empty_model(model_name, library_name=library, trust_remote_code=True, access_token=access_token)
82
- except GatedRepoError:
83
- raise gr.Error(f"Model `{model_name}` is a gated model, please ensure to pass in your access token and try again if you have access. You can find your access token here : https://huggingface.co/settings/tokens. ")
84
- except RepositoryNotFoundError:
85
- raise gr.Error(f"Model `{model_name}` was not found on the Hub, please try another model name.")
86
- except ValueError as e:
87
- raise gr.Error(f"Model `{model_name}` does not have any library metadata on the Hub, please manually select a library_name to use (such as `transformers`)")
88
- except (RuntimeError, OSError) as e:
89
- library = check_has_model(e)
90
- if library != "unknown":
91
- raise gr.Error(f"Tried to load `{model_name}` with `{library}` but a possible model to load was not found inside the repo.")
92
- raise gr.Error(f"Model `{model_name}` had an error, please open a discussion on the model's page with the error message and name: `{e}`")
93
- except ImportError:
94
- # hacky way to check if it works with `trust_remote_code=False`
95
- model = create_empty_model(model_name, library_name=library, trust_remote_code=False, access_token=access_token)
96
- except Exception as e:
97
- raise gr.Error(f"Model `{model_name}` had an error, please open a discussion on the model's page with the error message and name: `{e}`")
98
- total_size, largest_layer = calculate_maximum_sizes(model)
99
-
100
- data = []
101
-
102
- title = f"Memory Usage for '{model_name}'"
103
- for dtype in options:
104
- dtype_total_size = total_size
105
- dtype_largest_layer = largest_layer[0]
106
- if dtype in ("fp16", "bf16", "float16/bfloat16"):
107
- dtype_total_size /= 2
108
- dtype_largest_layer /= 2
109
- elif dtype == "int8":
110
- dtype_total_size /= 4
111
- dtype_largest_layer /= 4
112
- elif dtype == "int4":
113
- dtype_total_size /= 8
114
- dtype_largest_layer /= 8
115
- dtype_training_size = convert_bytes(dtype_total_size * 4)
116
- dtype_total_size = convert_bytes(dtype_total_size)
117
- dtype_largest_layer = convert_bytes(dtype_largest_layer)
118
- data.append({
119
- "dtype": dtype,
120
- "Largest Layer or Residual Group": dtype_largest_layer,
121
- "Total Size": dtype_total_size,
122
- "Training using Adam": dtype_training_size
123
- })
124
- global HAS_DISCUSSION, MODEL_NAME, LIBRARY
125
- HAS_DISCUSSION = check_for_discussion(model_name)
126
- MODEL_NAME = model_name
127
- LIBRARY = library
128
-
129
- if raw:
130
- return pd.DataFrame(data).to_markdown(index=False), data
131
-
132
- results = [
133
- f'## {title}',
134
- gr.update(visible=True, value=pd.DataFrame(data)),
135
- gr.update(visible=not HAS_DISCUSSION)
136
- ]
137
- return results
138
-
139
- with gr.Blocks() as demo:
140
- with gr.Column():
141
- gr.Markdown(
142
- """<img src="https://huggingface.co/spaces/hf-accelerate/model-memory-usage/resolve/main/measure_model_size.png" style="float: left;" width="250" height="250"><h1>🤗 Model Memory Calculator</h1>
143
-
144
- This tool will help you calculate how much vRAM is needed to train and perform big model inference
145
- on a model hosted on the 🤗 Hugging Face Hub. The minimum recommended vRAM needed for a model
146
- is denoted as the size of the "largest layer", and training of a model is roughly 4x its size (for Adam).
147
-
148
- These calculations are accurate within a few percent at most, such as `bert-base-cased` being 413.68 MB and the calculator estimating 413.18 MB.
149
-
150
- When performing inference, expect to add up to an additional 20% to this as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/).
151
- More tests will be performed in the future to get a more accurate benchmark for each model.
152
-
153
- Currently this tool supports all models hosted that use `transformers` and `timm`.
154
-
155
- To use this tool pass in the URL or model name of the model you want to calculate the memory usage for,
156
- select which framework it originates from ("auto" will try and detect it from the model metadata), and
157
- what precisions you want to use."""
158
- )
159
- out_text = gr.Markdown()
160
- out = gr.DataFrame(
161
- headers=["dtype", "Largest Layer", "Total Size", "Training using Adam"],
162
- interactive=False,
163
- visible=False,
164
- )
165
- with gr.Row():
166
- inp = gr.Textbox(label="Model Name or URL", value="bert-base-cased")
167
- with gr.Row():
168
- library = gr.Radio(["auto", "transformers", "timm"], label="Library", value="auto")
169
- options = gr.CheckboxGroup(
170
- ["float32", "float16/bfloat16", "int8", "int4"],
171
- value="float32",
172
- label="Model Precision",
173
- )
174
- access_token = gr.Textbox(label="API Token", placeholder="Optional (for gated models)")
175
- with gr.Row():
176
- btn = gr.Button("Calculate Memory Usage")
177
- post_to_hub = gr.Button(value = "Report results in this model repo's discussions!\n(Will open in a new tab)", visible=False)
178
- USER_TOKEN = access_token
179
-
180
- btn.click(
181
- calculate_memory, inputs=[inp, library, options, access_token], outputs=[out_text, out, post_to_hub],
182
- )
183
-
184
- post_to_hub.click(report_results).then(lambda: gr.Button.update(visible=False), outputs=post_to_hub)
185
-
186
-
187
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pyproject.toml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.black]
2
+ line-length = 119
3
+ target-version = ['py37']
4
+
5
+ [tool.ruff]
6
+ # Never enforce `E501` (line length violations).
7
+ ignore = ["E501", "E741", "W605"]
8
+ select = ["E", "F", "I", "W"]
9
+ line-length = 119
10
+
11
+ # Ignore import violations in all `__init__.py` files.
12
+ [tool.ruff.per-file-ignores]
13
+ "__init__.py" = ["E402", "F401", "F403", "F811"]
14
+
15
+ [tool.ruff.isort]
16
+ lines-after-imports = 2
src/__init__.py ADDED
File without changes
src/app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from hub_utils import check_for_discussion, report_results
4
+ from model_utils import calculate_memory, get_model
5
+
6
+
7
+ # We need to store them as globals because gradio doesn't have a way for us to pass them in to the button
8
+ MODEL = None
9
+
10
+
11
+ def get_results(model_name: str, library: str, options: list, access_token: str):
12
+ global MODEL
13
+ MODEL = get_model(model_name, library, access_token)
14
+ has_discussion = check_for_discussion(model_name)
15
+ title = f"## Memory usage for '{model_name}'"
16
+ data = calculate_memory(MODEL, options)
17
+ return [title, gr.update(visible=True, value=pd.DataFrame(data)), gr.update(visible=not has_discussion)]
18
+
19
+
20
+ with gr.Blocks() as demo:
21
+ with gr.Column():
22
+ gr.Markdown(
23
+ """<img src="https://huggingface.co/spaces/hf-accelerate/model-memory-usage/resolve/main/measure_model_size.png" style="float: left;" width="250" height="250"><h1>🤗 Model Memory Calculator</h1>
24
+
25
+ This tool will help you calculate how much vRAM is needed to train and perform big model inference
26
+ on a model hosted on the 🤗 Hugging Face Hub. The minimum recommended vRAM needed for a model
27
+ is denoted as the size of the "largest layer", and training of a model is roughly 4x its size (for Adam).
28
+
29
+ These calculations are accurate within a few percent at most, such as `bert-base-cased` being 413.68 MB and the calculator estimating 413.18 MB.
30
+
31
+ When performing inference, expect to add up to an additional 20% to this as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/).
32
+ More tests will be performed in the future to get a more accurate benchmark for each model.
33
+
34
+ Currently this tool supports all models hosted that use `transformers` and `timm`.
35
+
36
+ To use this tool pass in the URL or model name of the model you want to calculate the memory usage for,
37
+ select which framework it originates from ("auto" will try and detect it from the model metadata), and
38
+ what precisions you want to use."""
39
+ )
40
+ out_text = gr.Markdown()
41
+ out = gr.DataFrame(
42
+ headers=["dtype", "Largest Layer", "Total Size", "Training using Adam"],
43
+ interactive=False,
44
+ visible=False,
45
+ )
46
+ with gr.Row():
47
+ inp = gr.Textbox(label="Model Name or URL", value="bert-base-cased")
48
+ with gr.Row():
49
+ library = gr.Radio(["auto", "transformers", "timm"], label="Library", value="auto")
50
+ options = gr.CheckboxGroup(
51
+ ["float32", "float16/bfloat16", "int8", "int4"],
52
+ value="float32",
53
+ label="Model Precision",
54
+ )
55
+ access_token = gr.Textbox(label="API Token", placeholder="Optional (for gated models)")
56
+ with gr.Row():
57
+ btn = gr.Button("Calculate Memory Usage")
58
+ post_to_hub = gr.Button(
59
+ value="Report results in this model repo's discussions!\n(Will open in a new tab)", visible=False
60
+ )
61
+
62
+ btn.click(
63
+ get_results,
64
+ inputs=[inp, library, options, access_token],
65
+ outputs=[out_text, out, post_to_hub],
66
+ )
67
+
68
+ post_to_hub.click(report_results, inputs=[inp, library, access_token]).then(
69
+ lambda: gr.Button.update(visible=False), outputs=post_to_hub
70
+ )
71
+
72
+
73
+ demo.launch()
src/hub_utils.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Utilities related to searching and posting on the Hub
2
+ import os
3
+ import webbrowser
4
+
5
+ import pandas as pd
6
+ from huggingface_hub import HfApi
7
+ from model_utils import calculate_memory, extract_from_url, get_model
8
+
9
+
10
+ def check_for_discussion(model_name: str):
11
+ "Checks if an automated discussion has been opened on the model by `model-sizer-bot`"
12
+ api = HfApi(token=os.environ.get("HUGGINGFACE_API_LOGIN", None))
13
+ model_name = extract_from_url(model_name)
14
+ discussions = list(api.get_repo_discussions(model_name))
15
+ return any(
16
+ discussion.title == "[AUTOMATED] Model Memory Requirements" and discussion.author == "model-sizer-bot"
17
+ for discussion in discussions
18
+ )
19
+
20
+
21
+ def report_results(model_name, library, access_token):
22
+ "Reports the results of a memory calculation to the model's discussion page, and opens a new tab to it afterwards"
23
+ model = get_model(model_name, library, access_token)
24
+ data = calculate_memory(model, ["float32", "float16/bfloat16", "int8", "int4"])
25
+ df = pd.DataFrame(data).to_markdown(index=False)
26
+
27
+ post = f"""# Model Memory Requirements\n
28
+
29
+ You will need about {data[1]} VRAM to load this model for inference, and {data[3]} VRAM to train it using Adam.
30
+
31
+ These calculations were measured from the [Model Memory Utility Space](https://hf.co/spaces/hf-accelerate/model-memory-utility) on the Hub.
32
+
33
+ The minimum recommended vRAM needed for this model assumes using [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) and is denoted by the size of the "largest layer".
34
+ When performing inference, expect to add up to an additional 20% to this, as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/). More tests will be performed in the future to get a more accurate benchmark for each model.
35
+
36
+ When training with `Adam`, you can expect roughly 4x the reported results to be used. (1x for the model, 1x for the gradients, and 2x for the optimizer).
37
+
38
+ ## Results:
39
+
40
+ {df}
41
+ """
42
+ api = HfApi(token=os.environ.get("HUGGINGFACE_API_LOGIN", None))
43
+ discussion = api.create_discussion(model_name, "[AUTOMATED] Model Memory Requirements", description=post)
44
+ webbrowser.open_new_tab(discussion.url)
src/model_utils.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Utilities related to loading in and working with models/specific models
2
+ from urllib.parse import urlparse
3
+
4
+ import gradio as gr
5
+ import torch
6
+ from accelerate.commands.estimate import check_has_model, create_empty_model
7
+ from accelerate.utils import calculate_maximum_sizes, convert_bytes
8
+ from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
9
+
10
+
11
+ DTYPE_MODIFIER = {"float32": 1, "float16/bfloat16": 2, "int8": 4, "int4": 8}
12
+
13
+
14
+ def extract_from_url(name: str):
15
+ "Checks if `name` is a URL, and if so converts it to a model name"
16
+ is_url = False
17
+ try:
18
+ result = urlparse(name)
19
+ is_url = all([result.scheme, result.netloc])
20
+ except Exception:
21
+ is_url = False
22
+ # Pass through if not a URL
23
+ if not is_url:
24
+ return name
25
+ else:
26
+ path = result.path
27
+ return path[1:]
28
+
29
+
30
+ def translate_llama2(text):
31
+ "Translates llama-2 to its hf counterpart"
32
+ if not text.endswith("-hf"):
33
+ return text + "-hf"
34
+ return text
35
+
36
+
37
+ def get_model(model_name: str, library: str, access_token: str):
38
+ "Finds and grabs model from the Hub, and initializes on `meta`"
39
+ if "meta-llama" in model_name:
40
+ model_name = translate_llama2(model_name)
41
+ if library == "auto":
42
+ library = None
43
+ model_name = extract_from_url(model_name)
44
+ try:
45
+ model = create_empty_model(model_name, library_name=library, trust_remote_code=True, access_token=access_token)
46
+ except GatedRepoError:
47
+ raise gr.Error(
48
+ f"Model `{model_name}` is a gated model, please ensure to pass in your access token and try again if you have access. You can find your access token here : https://huggingface.co/settings/tokens. "
49
+ )
50
+ except RepositoryNotFoundError:
51
+ raise gr.Error(f"Model `{model_name}` was not found on the Hub, please try another model name.")
52
+ except ValueError:
53
+ raise gr.Error(
54
+ f"Model `{model_name}` does not have any library metadata on the Hub, please manually select a library_name to use (such as `transformers`)"
55
+ )
56
+ except (RuntimeError, OSError) as e:
57
+ library = check_has_model(e)
58
+ if library != "unknown":
59
+ raise gr.Error(
60
+ f"Tried to load `{model_name}` with `{library}` but a possible model to load was not found inside the repo."
61
+ )
62
+ raise gr.Error(
63
+ f"Model `{model_name}` had an error, please open a discussion on the model's page with the error message and name: `{e}`"
64
+ )
65
+ except ImportError:
66
+ # hacky way to check if it works with `trust_remote_code=False`
67
+ model = create_empty_model(
68
+ model_name, library_name=library, trust_remote_code=False, access_token=access_token
69
+ )
70
+ except Exception as e:
71
+ raise gr.Error(
72
+ f"Model `{model_name}` had an error, please open a discussion on the model's page with the error message and name: `{e}`"
73
+ )
74
+ return model
75
+
76
+
77
+ def calculate_memory(model: torch.nn.Module, options: list):
78
+ "Calculates the memory usage for a model init on `meta` device"
79
+ total_size, largest_layer = calculate_maximum_sizes(model)
80
+
81
+ data = []
82
+ for dtype in options:
83
+ dtype_total_size = total_size
84
+ dtype_largest_layer = largest_layer[0]
85
+
86
+ modifier = DTYPE_MODIFIER[dtype]
87
+ dtype_total_size /= modifier
88
+ dtype_largest_layer /= modifier
89
+
90
+ dtype_training_size = convert_bytes(dtype_total_size * 4)
91
+ dtype_total_size = convert_bytes(dtype_total_size)
92
+ dtype_largest_layer = convert_bytes(dtype_largest_layer)
93
+ data.append(
94
+ {
95
+ "dtype": dtype,
96
+ "Largest Layer or Residual Group": dtype_largest_layer,
97
+ "Total Size": dtype_total_size,
98
+ "Training using Adam": dtype_training_size,
99
+ }
100
+ )
101
+ return data