Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -22,7 +22,7 @@ ROLE_TOKENS = {
|
|
22 |
}
|
23 |
|
24 |
CONTEXT_SIZE = 2000
|
25 |
-
ENABLE_GPU =
|
26 |
GPU_LAYERS = 70
|
27 |
|
28 |
# Create a lock object
|
@@ -39,8 +39,8 @@ app.logger.setLevel(logging.DEBUG) # Set the desired logging level
|
|
39 |
#repo_name = "IlyaGusev/saiga2_13b_gguf"
|
40 |
#model_name = "model-q4_K.gguf"
|
41 |
|
42 |
-
repo_name = "IlyaGusev/
|
43 |
-
model_name = "
|
44 |
|
45 |
#repo_name = "IlyaGusev/saiga2_7b_gguf"
|
46 |
#model_name = "model-q4_K.gguf"
|
@@ -98,8 +98,8 @@ def init_model(context_size, enable_gpu=False, gpu_layer_number=35):
|
|
98 |
logits_all=True,
|
99 |
#n_threads=12,
|
100 |
verbose=True,
|
101 |
-
n_gpu_layers=gpu_layer_number
|
102 |
-
n_gqa=8 #must be set for 70b models
|
103 |
)
|
104 |
return model
|
105 |
else:
|
@@ -110,8 +110,8 @@ def init_model(context_size, enable_gpu=False, gpu_layer_number=35):
|
|
110 |
#n_batch=100,
|
111 |
logits_all=True,
|
112 |
#n_threads=12,
|
113 |
-
verbose=True
|
114 |
-
n_gqa=8 #must be set for 70b models
|
115 |
)
|
116 |
return model
|
117 |
|
@@ -236,7 +236,7 @@ def generate_and_log_tokens(user_request, model, generator):
|
|
236 |
global response_tokens
|
237 |
for token in generate_tokens(model, generator):
|
238 |
if token == b'': # or (max_new_tokens is not None and i >= max_new_tokens):
|
239 |
-
log(user_request, response_tokens.decode("utf-8", errors="ignore"))
|
240 |
response_tokens = bytearray()
|
241 |
break
|
242 |
response_tokens.extend(token)
|
@@ -271,6 +271,8 @@ def generate_response():
|
|
271 |
for message in messages:
|
272 |
if message.get("from") == "assistant":
|
273 |
message_tokens = get_message_tokens(model=model, role="bot", content=message.get("content", ""))
|
|
|
|
|
274 |
else:
|
275 |
message_tokens = get_message_tokens(model=model, role="user", content=message.get("content", ""))
|
276 |
|
|
|
22 |
}
|
23 |
|
24 |
CONTEXT_SIZE = 2000
|
25 |
+
ENABLE_GPU = False
|
26 |
GPU_LAYERS = 70
|
27 |
|
28 |
# Create a lock object
|
|
|
39 |
#repo_name = "IlyaGusev/saiga2_13b_gguf"
|
40 |
#model_name = "model-q4_K.gguf"
|
41 |
|
42 |
+
repo_name = "IlyaGusev/saiga2_13b_gguf"
|
43 |
+
model_name = "model-q8_0.gguf"
|
44 |
|
45 |
#repo_name = "IlyaGusev/saiga2_7b_gguf"
|
46 |
#model_name = "model-q4_K.gguf"
|
|
|
98 |
logits_all=True,
|
99 |
#n_threads=12,
|
100 |
verbose=True,
|
101 |
+
n_gpu_layers=gpu_layer_number#,
|
102 |
+
#n_gqa=8 #must be set for 70b models
|
103 |
)
|
104 |
return model
|
105 |
else:
|
|
|
110 |
#n_batch=100,
|
111 |
logits_all=True,
|
112 |
#n_threads=12,
|
113 |
+
verbose=True#,
|
114 |
+
#n_gqa=8 #must be set for 70b models
|
115 |
)
|
116 |
return model
|
117 |
|
|
|
236 |
global response_tokens
|
237 |
for token in generate_tokens(model, generator):
|
238 |
if token == b'': # or (max_new_tokens is not None and i >= max_new_tokens):
|
239 |
+
#log(user_request, response_tokens.decode("utf-8", errors="ignore"))
|
240 |
response_tokens = bytearray()
|
241 |
break
|
242 |
response_tokens.extend(token)
|
|
|
271 |
for message in messages:
|
272 |
if message.get("from") == "assistant":
|
273 |
message_tokens = get_message_tokens(model=model, role="bot", content=message.get("content", ""))
|
274 |
+
elif message.get("from") == "system":
|
275 |
+
message_tokens = get_message_tokens(model=model, role="system", content=message.get("content", ""))
|
276 |
else:
|
277 |
message_tokens = get_message_tokens(model=model, role="user", content=message.get("content", ""))
|
278 |
|