dummy_m4 / m4 /models /vllama /make_tiny_llama.py
ysharma's picture
ysharma HF staff
Duplicate from HuggingFaceM4/m4-dialogue
e7d3e35
#!/usr/bin/env python
# This script creates a super tiny model that is useful inside tests, when we just want to test that
# the machinery works, without needing to check the quality of the outcomes.
#
# usage: adjust the configs if wanted, but otherwise just run the script
from pathlib import Path
from transformers import LlamaConfig, LlamaForCausalLM, LlamaTokenizer
mname_tiny = "tiny-random-LlamaForCausalLM"
path = Path(mname_tiny)
path.mkdir(parents=True, exist_ok=True)
config = LlamaConfig()
config.update(
dict(
vocab_size=32000,
hidden_size=16,
intermediate_size=16 * 4,
num_hidden_layers=2,
num_attention_heads=4,
)
)
model = LlamaForCausalLM(config)
tokenizer = LlamaTokenizer.from_pretrained("path_to_llama_7b")
# Test w/ one text
query = "This is a test"
query_tokens = tokenizer(query, return_tensors="pt")
input = {
"input_ids": query_tokens["input_ids"],
"attention_mask": query_tokens["attention_mask"],
}
out_gen = model.generate(**input)
text = tokenizer.batch_decode(out_gen)
# Save model + config + tokenizer
model.half() # makes it smaller
model.save_pretrained(path)
tokenizer.save_pretrained(path)
# test we can load it back
model = LlamaForCausalLM.from_pretrained(path)
print(f"Generated {mname_tiny} - Upload the generated folder to the hub")