File size: 5,221 Bytes
3698d0a
dd4f101
 
d1c8a18
 
 
 
dd4f101
d1c8a18
3698d0a
dd4f101
3698d0a
 
 
 
 
 
 
 
 
 
 
 
 
 
d1c8a18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd4f101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1c8a18
 
 
 
dd4f101
 
d1c8a18
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import requests
import re
from collections import defaultdict
# Utilities related to loading in and working with models/specific models
from urllib.parse import urlparse
import torch
from accelerate.commands.estimate import check_has_model, create_empty_model
from accelerate.utils import compute_module_sizes, named_module_tensors
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError


def fetch_dictionary_content(model_id):
    MODEL_URL = "https://huggingface.co/{model_id}/raw/main/config.json"
    response = requests.get(MODEL_URL.format(model_id=model_id))
    
    # Check if the request was successful
    if response.status_code == 200:
        return response.json()  # Parse the JSON content into a Python dictionary
    else:
        return None
    
def load_parameter(model_dict, cand_keys):
    for k in cand_keys:
        if k in model_dict:
            return model_dict[k]
    return 0

# Reference: https://huggingface.co/spaces/hf-accelerate/model-memory-usage
def extract_from_url(name: str):
    "Checks if `name` is a URL, and if so converts it to a model name"
    is_url = False
    try:
        result = urlparse(name)
        is_url = all([result.scheme, result.netloc])
    except Exception:
        is_url = False
    # Pass through if not a URL
    if not is_url:
        return name
    else:
        path = result.path
        return path[1:]


def translate_llama2(text):
    "Translates llama-2 to its hf counterpart"
    if not text.endswith("-hf"):
        return text + "-hf"
    return text


def get_model(model_name: str, library: str, access_token: str):
    "Finds and grabs model from the Hub, and initializes on `meta`"
    if "meta-llama" in model_name:
        model_name = translate_llama2(model_name)
    if library == "auto":
        library = None
    model_name = extract_from_url(model_name)
    try:
        model = create_empty_model(model_name, library_name=library, trust_remote_code=True, access_token=access_token)
    except GatedRepoError:
        raise RuntimeError(
            f"Model `{model_name}` is a gated model, please ensure to pass in your access token and try again if you have access. You can find your access token here : https://huggingface.co/settings/tokens. "
        )
    except RepositoryNotFoundError:
        raise RuntimeError(f"Model `{model_name}` was not found on the Hub, please try another model name.")
    except ValueError:
        raise RuntimeError(
            f"Model `{model_name}` does not have any library metadata on the Hub, please manually select a library_name to use (such as `transformers`)"
        )
    except (RuntimeError, OSError) as e:
        library = check_has_model(e)
        if library != "unknown":
            raise RuntimeError(
                f"Tried to load `{model_name}` with `{library}` but a possible model to load was not found inside the repo."
            )
        raise RuntimeError(
            f"Model `{model_name}` had an error, please open a discussion on the model's page with the error message and name: `{e}`"
        )
    except ImportError:
        # hacky way to check if it works with `trust_remote_code=False`
        model = create_empty_model(
            model_name, library_name=library, trust_remote_code=False, access_token=access_token
        )
    except Exception as e:
        raise RuntimeError(
            f"Model `{model_name}` had an error, please open a discussion on the model's page with the error message and name: `{e}`"
        )
    return model

def get_module_tensors(model):
    module_tensors = {}
    for name, tensor in named_module_tensors(model, recurse=True):
        module_tensors[name] = tensor.shape

    return module_tensors


def classify_module(module_tensors):
    # A dictionary to store counts for each generic layer type
    module_classes = defaultdict(list)

    # This function removes all numbers from a given string
    def remove_numbers(s):
        return re.sub(r'\d+', '', s)
    
    # Loop through all named parameters of the model
    for name in module_tensors:
        # Remove numbers from the name
        generic_name = remove_numbers(name)
        generic_name = generic_name.replace('..', '.')
        
        # If the name already exists in the dictionary, increase the count, else set it to 1
        module_classes[generic_name].append({name: module_tensors[name]})

    return module_classes

def get_module_tensors_matched(filter_fn, module_classes_dict):
    matched = []
    for generic, module_list in module_classes_dict.items():
        if filter_fn(generic.lower()):
            matched.extend([v for module in module_list for v in module.values()])
       
    return matched


if __name__ == '__main__':
    model = get_model('NousResearch/Nous-Hermes-Llama2-13b', None, None)
    module_tensors = get_module_tensors(model)
    module_classes = classify_module(module_tensors)
    sizes = compute_module_sizes(model, dtype=torch.int8)
    size_dict = {
        'attn':0,
        'mlp':0,
        'embed':0,
    }
    for k, v in sizes.items():
        for kk in size_dict:
            if kk in k and 'weight' in k:
                size_dict[kk] += v/1024**3
    print(sizes)