hyunjae's picture
Update README.md
658a043 verified
|
raw
history blame
1.11 kB
metadata
license: mit
language:
  - ko
pipeline_tag: text-generation
  • base_model: polyglot-ko-3.8b1
  • train_data: 12 instruction fine-tuned dataset
  • train method: SFT
from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda" # the device to load the model onto

model = AutoModelForCausalLM.from_pretrained("hyunjae/polyglot-ko-3.8b-total")
tokenizer = AutoTokenizer.from_pretrained("hyunjae/polyglot-ko-3.8b-total")

messages = [
    {"role": "system", "content": "당신은 μ‚¬λžŒλ“€μ΄ 정보λ₯Ό 찾을 수 μžˆλ„λ‘ λ„μ™€μ£ΌλŠ” 인곡지λŠ₯ λΉ„μ„œμž…λ‹ˆλ‹€."},
    {"role": "user", "content": "λŒ€ν•œλ―Όκ΅­μ˜ μˆ˜λ„λŠ” μ–΄λ””μ•Ό?"},
    {"role": "assistant", "content": "λŒ€ν•œλ―Όκ΅­μ˜ μˆ˜λ„λŠ” μ„œμšΈμž…λ‹ˆλ‹€."},
    {"role": "user", "content": "μ„œμšΈ μΈκ΅¬λŠ” 총 λͺ‡ λͺ…이야?"}
]

encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")

model_inputs = encodeds.to(device)
model.to(device)

generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)
print(decoded[0])