Arturo Jiménez de los Galanes Reguillos commited on
Commit
fde3482
·
1 Parent(s): 87c9e01

Manually build chat template

Browse files
Files changed (1) hide show
  1. app.py +17 -1
app.py CHANGED
@@ -5,6 +5,9 @@ from threading import Thread
5
  import torch
6
 
7
  MODEL = "m-a-p/OpenCodeInterpreter-DS-33B"
 
 
 
8
 
9
  system_message = "You are a computer programmer that can translate python code to C++ in order to improve performance"
10
 
@@ -20,6 +23,19 @@ def messages_for(python):
20
  {"role": "user", "content": user_prompt_for(python)}
21
  ]
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
24
  model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, device_map="auto")
25
  model.eval()
@@ -27,7 +43,7 @@ streamer = TextIteratorStreamer(tokenizer)
27
 
28
  cplusplus = None
29
  def translate(python):
30
- inputs = tokenizer(messages_for(python), return_tensors="pt").to(model.device)
31
  generation_kwargs = dict(
32
  inputs,
33
  streamer=streamer,
 
5
  import torch
6
 
7
  MODEL = "m-a-p/OpenCodeInterpreter-DS-33B"
8
+ "bos_token": "<|begin_of_text|>",
9
+ CHAT_TEMPLATE = "{{ bos_token }}{% for message in messages %}{%
10
+ if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }}{% elif message['role'] == 'assistant' %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }}{% else %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
11
 
12
  system_message = "You are a computer programmer that can translate python code to C++ in order to improve performance"
13
 
 
23
  {"role": "user", "content": user_prompt_for(python)}
24
  ]
25
 
26
+ def apply_chat_template(messages):
27
+ bos_token = "<|begin▁of▁sentence|>"
28
+ result = bos_token
29
+ for message in messages:
30
+ if message['role'] == 'user':
31
+ result += f"<|start_header_id|>user<|end_header_id|>\n\n{message['content']}<|eot_id|>"
32
+ elif message['role'] == 'assistant':
33
+ result += f"<|start_header_id|>assistant<|end_header_id|>\n\n{message['content']}<|eot_id|>"
34
+ else:
35
+ result += f"<|start_header_id|>{message['role']}<|end_header_id|>\n\n{message['content']}<|eot_id|>"
36
+ return result
37
+
38
+
39
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
40
  model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, device_map="auto")
41
  model.eval()
 
43
 
44
  cplusplus = None
45
  def translate(python):
46
+ inputs = tokenizer(apply_chat_template(messages_for(python)), return_tensors="pt").to(model.device)
47
  generation_kwargs = dict(
48
  inputs,
49
  streamer=streamer,