QuantFactory/llama_3.1_8B_Thai_instruct-GGUF
This is quantized version of Suraponn/llama_3.1_8B_Thai_instruct created using llama.cpp
Original Model Card
import json import torch from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
model_id = "Suraponn/llama_3.1_8B_Thai_instruct"
tokenizer = AutoTokenizer.from_pretrained( model_id, )
model = AutoModelForCausalLM.from_pretrained( model_id, device_map="cuda:0", torch_dtype=torch.float16, )
config_setting = AutoConfig.from_pretrained( model_id, add_special_tokens=True, )
if tokenizer.chat_template is None: tokenizer.chat_template = tokenizer.default_chat_template
if not "system" in tokenizer.chat_template and "system" in tokenizer.default_chat_template: tokenizer.chat_template = tokenizer.default_chat_template
s_split = "เขียนบทความเกี่ยวกับการออกกำลังกายให้ถูกต้อง"
chat = [ { "role": "system", "content": "You are a helpfull assistant. Please respond in Thai." }, { "role": "user", "content": s_split, }, ]
tokenizer.use_default_system_prompt = False extract_input = tokenizer.apply_chat_template(chat, tokenize=False , add_generation_prompt=True)
#extract_input = extract_input.split(s_split)[0] print("------------\n" + extract_input + "\n------------")
inputs = tokenizer( extract_input, return_tensors="pt", add_special_tokens = False, ) #print(inputs)
terminators = [ tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>"), ] #print(terminators)
inputs = inputs.to(model.device)
with torch.no_grad(): tokens = model.generate( **inputs, max_new_tokens=2048, do_sample=True, eos_token_id=terminators, temperature=0.7, #top_p=1, )
output = tokenizer.decode(tokens[0]) print(output)
- Downloads last month
- 117
Model tree for QuantFactory/llama_3.1_8B_Thai_instruct-GGUF
Base model
meta-llama/Llama-3.1-8B