Update README.md
Browse files
README.md
CHANGED
@@ -59,7 +59,7 @@ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat1
|
|
59 |
attn_implementation="flash_attention_2") # if flash enabled
|
60 |
sys_prompt = '한국어로 대답해'
|
61 |
texts = ['안녕', '서울은 오늘 어때']
|
62 |
-
chats = list(map(lambda t: [{'role': 'user', 'content': f'{sys_prompt}\n{t}'}], texts))
|
63 |
prompts = list(map(lambda p: tokenizer.apply_chat_template(p, tokenize=False, add_generation_prompt=True), chats))
|
64 |
input_ids = tokenizer(prompts, return_tensors="pt", padding=True).to("cuda" if device is None else device)
|
65 |
outputs = model.generate(**input_ids, max_new_tokens=100, repetition_penalty=1.05)
|
|
|
59 |
attn_implementation="flash_attention_2") # if flash enabled
|
60 |
sys_prompt = '한국어로 대답해'
|
61 |
texts = ['안녕', '서울은 오늘 어때']
|
62 |
+
chats = list(map(lambda t: [{'role': 'user', 'content': f'{sys_prompt}\n{t}'}], texts)) # ChatML format
|
63 |
prompts = list(map(lambda p: tokenizer.apply_chat_template(p, tokenize=False, add_generation_prompt=True), chats))
|
64 |
input_ids = tokenizer(prompts, return_tensors="pt", padding=True).to("cuda" if device is None else device)
|
65 |
outputs = model.generate(**input_ids, max_new_tokens=100, repetition_penalty=1.05)
|