TransformerAnalyzer

Sleeping

Alan Liu

use real number in model to calculate ops and para

dd4f101 over 1 year ago

5.22 kB

	import requests
	import re
	from collections import defaultdict
	# Utilities related to loading in and working with models/specific models
	from urllib.parse import urlparse
	import torch
	from accelerate.commands.estimate import check_has_model, create_empty_model
	from accelerate.utils import compute_module_sizes, named_module_tensors
	from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError


	def fetch_dictionary_content(model_id):
	MODEL_URL = "https://huggingface.co/{model_id}/raw/main/config.json"
	response = requests.get(MODEL_URL.format(model_id=model_id))

	# Check if the request was successful
	if response.status_code == 200:
	return response.json() # Parse the JSON content into a Python dictionary
	else:
	return None

	def load_parameter(model_dict, cand_keys):
	for k in cand_keys:
	if k in model_dict:
	return model_dict[k]
	return 0

	# Reference: https://huggingface.co/spaces/hf-accelerate/model-memory-usage
	def extract_from_url(name: str):
	"Checks if `name` is a URL, and if so converts it to a model name"
	is_url = False
	try:
	result = urlparse(name)
	is_url = all([result.scheme, result.netloc])
	except Exception:
	is_url = False
	# Pass through if not a URL
	if not is_url:
	return name
	else:
	path = result.path
	return path[1:]


	def translate_llama2(text):
	"Translates llama-2 to its hf counterpart"
	if not text.endswith("-hf"):
	return text + "-hf"
	return text


	def get_model(model_name: str, library: str, access_token: str):
	"Finds and grabs model from the Hub, and initializes on `meta`"
	if "meta-llama" in model_name:
	model_name = translate_llama2(model_name)
	if library == "auto":
	library = None
	model_name = extract_from_url(model_name)
	try:
	model = create_empty_model(model_name, library_name=library, trust_remote_code=True, access_token=access_token)
	except GatedRepoError:
	raise RuntimeError(
	f"Model `{model_name}` is a gated model, please ensure to pass in your access token and try again if you have access. You can find your access token here : https://huggingface.co/settings/tokens. "
	)
	except RepositoryNotFoundError:
	raise RuntimeError(f"Model `{model_name}` was not found on the Hub, please try another model name.")
	except ValueError:
	raise RuntimeError(
	f"Model `{model_name}` does not have any library metadata on the Hub, please manually select a library_name to use (such as `transformers`)"
	)
	except (RuntimeError, OSError) as e:
	library = check_has_model(e)
	if library != "unknown":
	raise RuntimeError(
	f"Tried to load `{model_name}` with `{library}` but a possible model to load was not found inside the repo."
	)
	raise RuntimeError(
	f"Model `{model_name}` had an error, please open a discussion on the model's page with the error message and name: `{e}`"
	)
	except ImportError:
	# hacky way to check if it works with `trust_remote_code=False`
	model = create_empty_model(
	model_name, library_name=library, trust_remote_code=False, access_token=access_token
	)
	except Exception as e:
	raise RuntimeError(
	f"Model `{model_name}` had an error, please open a discussion on the model's page with the error message and name: `{e}`"
	)
	return model

	def get_module_tensors(model):
	module_tensors = {}
	for name, tensor in named_module_tensors(model, recurse=True):
	module_tensors[name] = tensor.shape

	return module_tensors


	def classify_module(module_tensors):
	# A dictionary to store counts for each generic layer type
	module_classes = defaultdict(list)

	# This function removes all numbers from a given string
	def remove_numbers(s):
	return re.sub(r'\d+', '', s)

	# Loop through all named parameters of the model
	for name in module_tensors:
	# Remove numbers from the name
	generic_name = remove_numbers(name)
	generic_name = generic_name.replace('..', '.')

	# If the name already exists in the dictionary, increase the count, else set it to 1
	module_classes[generic_name].append({name: module_tensors[name]})

	return module_classes

	def get_module_tensors_matched(filter_fn, module_classes_dict):
	matched = []
	for generic, module_list in module_classes_dict.items():
	if filter_fn(generic.lower()):
	matched.extend([v for module in module_list for v in module.values()])

	return matched


	if __name__ == '__main__':
	model = get_model('NousResearch/Nous-Hermes-Llama2-13b', None, None)
	module_tensors = get_module_tensors(model)
	module_classes = classify_module(module_tensors)
	sizes = compute_module_sizes(model, dtype=torch.int8)
	size_dict = {
	'attn':0,
	'mlp':0,
	'embed':0,
	}
	for k, v in sizes.items():
	for kk in size_dict:
	if kk in k and 'weight' in k:
	size_dict[kk] += v/1024**3
	print(sizes)