Update README.md
Browse files
README.md
CHANGED
@@ -6,32 +6,6 @@ pipeline_tag: text-generation
|
|
6 |
---
|
7 |
|
8 |
|
9 |
-
- base_model: polyglot-ko-3.
|
10 |
- train_data: 12 instruction fine-tuned dataset
|
11 |
-
- train method: SFT
|
12 |
-
|
13 |
-
|
14 |
-
```python
|
15 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
16 |
-
|
17 |
-
device = "cuda" # the device to load the model onto
|
18 |
-
|
19 |
-
model = AutoModelForCausalLM.from_pretrained("hyunjae/polyglot-ko-3.8b-total")
|
20 |
-
tokenizer = AutoTokenizer.from_pretrained("hyunjae/polyglot-ko-3.8b-total")
|
21 |
-
|
22 |
-
messages = [
|
23 |
-
{"role": "system", "content": "λΉμ μ μ¬λλ€μ΄ μ 보λ₯Ό μ°Ύμ μ μλλ‘ λμμ£Όλ μΈκ³΅μ§λ₯ λΉμμ
λλ€."},
|
24 |
-
{"role": "user", "content": "λνλ―Όκ΅μ μλλ μ΄λμΌ?"},
|
25 |
-
{"role": "assistant", "content": "λνλ―Όκ΅μ μλλ μμΈμ
λλ€."},
|
26 |
-
{"role": "user", "content": "μμΈ μΈκ΅¬λ μ΄ λͺ λͺ
μ΄μΌ?"}
|
27 |
-
]
|
28 |
-
|
29 |
-
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
|
30 |
-
|
31 |
-
model_inputs = encodeds.to(device)
|
32 |
-
model.to(device)
|
33 |
-
|
34 |
-
generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
|
35 |
-
decoded = tokenizer.batch_decode(generated_ids)
|
36 |
-
print(decoded[0])
|
37 |
-
```
|
|
|
6 |
---
|
7 |
|
8 |
|
9 |
+
- base_model: polyglot-ko-3.8b
|
10 |
- train_data: 12 instruction fine-tuned dataset
|
11 |
+
- train method: SFT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|