metadata
license: mit
language:
- ko
pipeline_tag: text-generation
- base_model: polyglot-ko-3.8b1
- train_data: 12 instruction fine-tuned dataset
- train method: SFT
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained("hyunjae/polyglot-ko-3.8b-total")
tokenizer = AutoTokenizer.from_pretrained("hyunjae/polyglot-ko-3.8b-total")
messages = [
{"role": "system", "content": "λΉμ μ μ¬λλ€μ΄ μ 보λ₯Ό μ°Ύμ μ μλλ‘ λμμ£Όλ μΈκ³΅μ§λ₯ λΉμμ
λλ€."},
{"role": "user", "content": "λνλ―Όκ΅μ μλλ μ΄λμΌ?"},
{"role": "assistant", "content": "λνλ―Όκ΅μ μλλ μμΈμ
λλ€."},
{"role": "user", "content": "μμΈ μΈκ΅¬λ μ΄ λͺ λͺ
μ΄μΌ?"}
]
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
model_inputs = encodeds.to(device)
model.to(device)
generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)
print(decoded[0])