IlyaGusev commited on
Commit
caca471
1 Parent(s): 4f2d5d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -18
app.py CHANGED
@@ -12,25 +12,12 @@ from llama_cpp import Llama
12
 
13
 
14
  SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
15
- SYSTEM_TOKEN = 1587
16
- USER_TOKEN = 2188
17
- BOT_TOKEN = 12435
18
- LINEBREAK_TOKEN = 13
19
-
20
-
21
- ROLE_TOKENS = {
22
- "user": USER_TOKEN,
23
- "bot": BOT_TOKEN,
24
- "system": SYSTEM_TOKEN
25
- }
26
 
27
 
28
  def get_message_tokens(model, role, content):
29
- message_tokens = model.tokenize(content.encode("utf-8"))
30
- message_tokens.insert(1, ROLE_TOKENS[role])
31
- message_tokens.insert(2, LINEBREAK_TOKEN)
32
- message_tokens.append(model.token_eos())
33
- return message_tokens
34
 
35
 
36
  def get_system_tokens(model):
@@ -51,6 +38,7 @@ model = Llama(
51
 
52
  max_new_tokens = 1500
53
 
 
54
  def user(message, history):
55
  new_history = history + [[message, None]]
56
  return "", new_history
@@ -64,7 +52,6 @@ def bot(
64
  temp
65
  ):
66
  tokens = get_system_tokens(model)[:]
67
- tokens.append(LINEBREAK_TOKEN)
68
 
69
  for user_message, bot_message in history[:-1]:
70
  message_tokens = get_message_tokens(model=model, role="user", content=user_message)
@@ -77,7 +64,7 @@ def bot(
77
  message_tokens = get_message_tokens(model=model, role="user", content=last_user_message)
78
  tokens.extend(message_tokens)
79
 
80
- role_tokens = [model.token_bos(), BOT_TOKEN, LINEBREAK_TOKEN]
81
  tokens.extend(role_tokens)
82
  generator = model.generate(
83
  tokens,
 
12
 
13
 
14
  SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
 
 
 
 
 
 
 
 
 
 
 
15
 
16
 
17
  def get_message_tokens(model, role, content):
18
+ content = f"{role}\n{content}\n</s>"
19
+ content = content.encode("utf-8")
20
+ return model.tokenize(content, special=True)
 
 
21
 
22
 
23
  def get_system_tokens(model):
 
38
 
39
  max_new_tokens = 1500
40
 
41
+
42
  def user(message, history):
43
  new_history = history + [[message, None]]
44
  return "", new_history
 
52
  temp
53
  ):
54
  tokens = get_system_tokens(model)[:]
 
55
 
56
  for user_message, bot_message in history[:-1]:
57
  message_tokens = get_message_tokens(model=model, role="user", content=user_message)
 
64
  message_tokens = get_message_tokens(model=model, role="user", content=last_user_message)
65
  tokens.extend(message_tokens)
66
 
67
+ role_tokens = model.tokenize("bot\n".encode("utf-8"), special=True)
68
  tokens.extend(role_tokens)
69
  generator = model.generate(
70
  tokens,