seonglae commited on
Commit
310ffaf
·
verified ·
1 Parent(s): 5daa092

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -1
README.md CHANGED
@@ -59,7 +59,7 @@ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat1
59
  attn_implementation="flash_attention_2") # if flash enabled
60
  sys_prompt = '한국어로 대답해'
61
  texts = ['안녕', '서울은 오늘 어때']
62
- chats = list(map(lambda t: [{'role': 'user', 'content': f'{sys_prompt}\n{t}'}], texts))
63
  prompts = list(map(lambda p: tokenizer.apply_chat_template(p, tokenize=False, add_generation_prompt=True), chats))
64
  input_ids = tokenizer(prompts, return_tensors="pt", padding=True).to("cuda" if device is None else device)
65
  outputs = model.generate(**input_ids, max_new_tokens=100, repetition_penalty=1.05)
 
59
  attn_implementation="flash_attention_2") # if flash enabled
60
  sys_prompt = '한국어로 대답해'
61
  texts = ['안녕', '서울은 오늘 어때']
62
+ chats = list(map(lambda t: [{'role': 'user', 'content': f'{sys_prompt}\n{t}'}], texts)) # ChatML format
63
  prompts = list(map(lambda p: tokenizer.apply_chat_template(p, tokenize=False, add_generation_prompt=True), chats))
64
  input_ids = tokenizer(prompts, return_tensors="pt", padding=True).to("cuda" if device is None else device)
65
  outputs = model.generate(**input_ids, max_new_tokens=100, repetition_penalty=1.05)