nvidia
/

Mistral-NeMo-12B-Instruct

Model card Files Files and versions Community

Mistral-NeMo-12B-Instruct / Mistral-NeMo-12B-Instruct-HF /run.py

shrimai19's picture

Upload folder using huggingface_hub

be6a907 verified 7 months ago

981 Bytes

	from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
	from mistral_common.protocol.instruct.messages import UserMessage
	from mistral_common.protocol.instruct.request import ChatCompletionRequest
	from transformers import AutoModelForCausalLM
	import torch


	# Load Mistral tokenizer
	model_name = "nemostral"
	tokenizer = MistralTokenizer.from_model(model_name)

	# Tokenize a list of messages
	tokenized = tokenizer.encode_chat_completion(
	ChatCompletionRequest(
	messages=[
	UserMessage(content="How many peolpe live in France and all its neighbours? List all of them!")
	],
	model=model_name,
	)
	)
	tokens, text = tokenized.tokens, tokenized.text

	input_ids = torch.tensor([tokens]).to("cuda")

	model = AutoModelForCausalLM.from_pretrained("./", torch_dtype=torch.bfloat16).to("cuda")


	out = model.generate(input_ids, max_new_tokens=1024)

	generated = out[0, input_ids.shape[-1]:-1].tolist()

	print(tokenizer.decode(generated))