muryshev commited on
Commit
53500bc
1 Parent(s): ad92aae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -12
app.py CHANGED
@@ -6,8 +6,8 @@ from huggingface_hub import snapshot_download, Repository
6
  import huggingface_hub
7
  import gc
8
  import os.path
9
- import csv
10
  from datetime import datetime
 
11
 
12
  SYSTEM_PROMPT = "Ты — русскоязычный автоматический ассистент. Ты максимально точно и отвечаешь на запросы пользователя, используя русский язык."
13
  SYSTEM_TOKEN = 1788
@@ -21,7 +21,7 @@ ROLE_TOKENS = {
21
  "system": SYSTEM_TOKEN
22
  }
23
 
24
- CONTEXT_SIZE = 4000
25
  ENABLE_GPU = True
26
  GPU_LAYERS = 70
27
 
@@ -55,7 +55,7 @@ model_path = snapshot_download(repo_id=repo_name, allow_patterns=model_name) + '
55
  app.logger.info('Model path: ' + model_path)
56
 
57
  DATASET_REPO_URL = "https://huggingface.co/datasets/muryshev/saiga-chat"
58
- DATA_FILENAME = "data.csv"
59
  DATA_FILE = os.path.join("dataset", DATA_FILENAME)
60
 
61
  HF_TOKEN = os.environ.get("HF_TOKEN")
@@ -69,11 +69,15 @@ repo = Repository(
69
 
70
  def log(req: str = '', resp: str = ''):
71
  if req or resp:
72
- with open(DATA_FILE, "a") as csvfile:
73
- writer = csv.DictWriter(csvfile, fieldnames=["request", "response", "time"])
74
- writer.writerow(
75
- {"request": req, "response": resp, "time": str(datetime.now())}
76
- )
 
 
 
 
77
  commit_url = repo.push_to_hub()
78
  app.logger.info(commit_url)
79
 
@@ -257,16 +261,15 @@ def generate_response():
257
  top_p = parameters.get("top_p", 0.85)
258
  repetition_penalty = parameters.get("repetition_penalty", 1.2)
259
  top_k = parameters.get("top_k", 30)
260
- return_full_text = parameters.get("return_full_text", False)
261
-
262
- tokens = get_system_tokens(model)
263
- tokens.append(LINEBREAK_TOKEN)
264
 
265
  tokens = []
266
 
267
  for message in messages:
268
  if message.get("from") == "assistant":
269
  message_tokens = get_message_tokens(model=model, role="bot", content=message.get("content", ""))
 
 
270
  else:
271
  message_tokens = get_message_tokens(model=model, role="user", content=message.get("content", ""))
272
 
 
6
  import huggingface_hub
7
  import gc
8
  import os.path
 
9
  from datetime import datetime
10
+ import xml.etree.ElementTree as ET
11
 
12
  SYSTEM_PROMPT = "Ты — русскоязычный автоматический ассистент. Ты максимально точно и отвечаешь на запросы пользователя, используя русский язык."
13
  SYSTEM_TOKEN = 1788
 
21
  "system": SYSTEM_TOKEN
22
  }
23
 
24
+ CONTEXT_SIZE = 2000
25
  ENABLE_GPU = True
26
  GPU_LAYERS = 70
27
 
 
55
  app.logger.info('Model path: ' + model_path)
56
 
57
  DATASET_REPO_URL = "https://huggingface.co/datasets/muryshev/saiga-chat"
58
+ DATA_FILENAME = "data-saiga-cuda.xml"
59
  DATA_FILE = os.path.join("dataset", DATA_FILENAME)
60
 
61
  HF_TOKEN = os.environ.get("HF_TOKEN")
 
69
 
70
  def log(req: str = '', resp: str = ''):
71
  if req or resp:
72
+ element = ET.Element("row", {"time": str(datetime.now()) })
73
+ req_element = ET.SubElement(element, "request")
74
+ req_element.text = req
75
+ resp_element = ET.SubElement(element, "response")
76
+ resp_element.text = resp
77
+
78
+ with open(DATA_FILE, "ab+") as xml_file:
79
+ xml_file.write(ET.tostring(element, encoding="utf-8"))
80
+
81
  commit_url = repo.push_to_hub()
82
  app.logger.info(commit_url)
83
 
 
261
  top_p = parameters.get("top_p", 0.85)
262
  repetition_penalty = parameters.get("repetition_penalty", 1.2)
263
  top_k = parameters.get("top_k", 30)
264
+ return_full_text = parameters.get("return_full_text", False)
 
 
 
265
 
266
  tokens = []
267
 
268
  for message in messages:
269
  if message.get("from") == "assistant":
270
  message_tokens = get_message_tokens(model=model, role="bot", content=message.get("content", ""))
271
+ else if message.get("from") == "system":
272
+ message_tokens = get_message_tokens(model=model, role="system", content=message.get("content", ""))
273
  else:
274
  message_tokens = get_message_tokens(model=model, role="user", content=message.get("content", ""))
275