Spaces:
Running
on
Zero
Running
on
Zero
tori29umai
commited on
Commit
•
d013ae4
1
Parent(s):
d6f703a
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
import spaces
|
3 |
import sys
|
4 |
import time
|
@@ -115,7 +116,7 @@ class Settings:
|
|
115 |
settings['chat_rep_pen'] = float(config['ChatParameters'].get('repetition_penalty', '1.2'))
|
116 |
settings['chat_n_ctx'] = int(config['ChatParameters'].get('n_ctx', '10000'))
|
117 |
if 'GenerateParameters' in config:
|
118 |
-
settings['gen_n_gpu_layers'] = int(config['GenerateParameters'].get('n_gpu_layers', '
|
119 |
settings['gen_temperature'] = float(config['GenerateParameters'].get('temperature', '0.35'))
|
120 |
settings['gen_top_p'] = float(config['GenerateParameters'].get('top_p', '0.9'))
|
121 |
settings['gen_top_k'] = int(config['GenerateParameters'].get('top_k', '40'))
|
@@ -137,7 +138,7 @@ class Settings:
|
|
137 |
'DEFAULT_GEN_MODEL': settings.get('DEFAULT_GEN_MODEL', '')
|
138 |
}
|
139 |
config['ChatParameters'] = {
|
140 |
-
'n_gpu_layers': str(settings.get('chat_n_gpu_layers',
|
141 |
'temperature': str(settings.get('chat_temperature', 0.5)),
|
142 |
'top_p': str(settings.get('chat_top_p', 0.7)),
|
143 |
'top_k': str(settings.get('chat_top_k', 80)),
|
@@ -174,17 +175,17 @@ class Settings:
|
|
174 |
'gen_author_description': 'あなたは新進気鋭の和風伝奇ミステリー小説家で、細やかな筆致と巧みな構成で若い世代にとても人気があります。',
|
175 |
'DEFAULT_CHAT_MODEL': 'EZO-Common-9B-gemma-2-it.f16.gguf',
|
176 |
'DEFAULT_GEN_MODEL': 'EZO-Common-9B-gemma-2-it.f16.gguf',
|
177 |
-
'chat_n_gpu_layers':
|
178 |
'chat_temperature': 0.5,
|
179 |
-
'chat_top_p': 0
|
180 |
-
'chat_top_k':
|
181 |
-
'chat_rep_pen': 1.
|
182 |
'chat_n_ctx': 10000,
|
183 |
-
'gen_n_gpu_layers':
|
184 |
-
'gen_temperature': 0.
|
185 |
-
'gen_top_p': 0
|
186 |
'gen_top_k': 40,
|
187 |
-
'gen_rep_pen': 1.
|
188 |
'gen_n_ctx': 10000
|
189 |
}
|
190 |
Settings.save_to_ini(default_settings, filename)
|
@@ -196,17 +197,17 @@ class Settings:
|
|
196 |
|
197 |
class GenTextParams:
|
198 |
def __init__(self):
|
199 |
-
self.gen_n_gpu_layers =
|
200 |
-
self.gen_temperature = 0.
|
201 |
self.gen_top_p = 1.0
|
202 |
self.gen_top_k = 40
|
203 |
self.gen_rep_pen = 1.0
|
204 |
self.gen_n_ctx = 10000
|
205 |
-
self.chat_n_gpu_layers =
|
206 |
self.chat_temperature = 0.5
|
207 |
-
self.chat_top_p = 0
|
208 |
-
self.chat_top_k =
|
209 |
-
self.chat_rep_pen = 1.
|
210 |
self.chat_n_ctx = 10000
|
211 |
|
212 |
def update_generate_parameters(self, n_gpu_layers, temperature, top_p, top_k, rep_pen, n_ctx):
|
@@ -312,23 +313,25 @@ class CharacterMaker:
|
|
312 |
model_path = os.path.join(MODEL_DIR, self.settings[f'DEFAULT_{model_type.upper()}_MODEL'])
|
313 |
n_gpu_layers = self.settings[f'{model_type.lower()}_n_gpu_layers']
|
314 |
|
315 |
-
#
|
316 |
if self.llama and self.current_model == model_type:
|
317 |
-
if (self.llama.model_path == model_path and
|
318 |
self.llama.n_gpu_layers == n_gpu_layers):
|
319 |
print(f"{model_type} モデルは既にロードされています。再ロードをスキップします。")
|
320 |
-
self.model_loaded.set()
|
321 |
return
|
322 |
|
323 |
-
|
324 |
if self.llama:
|
325 |
del self.llama
|
326 |
self.llama = None
|
327 |
|
|
|
|
|
328 |
try:
|
329 |
-
self.llama =
|
330 |
self.current_model = model_type
|
331 |
self.model_loaded.set()
|
|
|
332 |
except Exception as e:
|
333 |
print(f"{model_type} モデルのロード中にエラーが発生しました: {str(e)}")
|
334 |
self.model_loaded.set()
|
@@ -588,23 +591,23 @@ def apply_settings():
|
|
588 |
# パラメータを更新
|
589 |
if 'ChatParameters' in new_config:
|
590 |
params.update_chat_parameters(
|
591 |
-
int(new_config['ChatParameters'].get('n_gpu_layers', '
|
592 |
float(new_config['ChatParameters'].get('temperature', '0.5')),
|
593 |
-
float(new_config['ChatParameters'].get('top_p', '0
|
594 |
-
int(new_config['ChatParameters'].get('top_k', '
|
595 |
-
float(new_config['ChatParameters'].get('repetition_penalty', '1.
|
596 |
int(new_config['ChatParameters'].get('n_ctx', '10000'))
|
597 |
)
|
598 |
if 'GenerateParameters' in new_config:
|
599 |
params.update_generate_parameters(
|
600 |
-
int(new_config['GenerateParameters'].get('n_gpu_layers', '
|
601 |
-
float(new_config['GenerateParameters'].get('temperature', '0.
|
602 |
-
float(new_config['GenerateParameters'].get('top_p', '0
|
603 |
int(new_config['GenerateParameters'].get('top_k', '40')),
|
604 |
-
float(new_config['GenerateParameters'].get('repetition_penalty', '1.
|
605 |
int(new_config['GenerateParameters'].get('n_ctx', '10000'))
|
606 |
)
|
607 |
-
|
608 |
# モデルを再ロード
|
609 |
character_maker.current_model = None
|
610 |
|
|
|
1 |
import os
|
2 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = ''
|
3 |
import spaces
|
4 |
import sys
|
5 |
import time
|
|
|
116 |
settings['chat_rep_pen'] = float(config['ChatParameters'].get('repetition_penalty', '1.2'))
|
117 |
settings['chat_n_ctx'] = int(config['ChatParameters'].get('n_ctx', '10000'))
|
118 |
if 'GenerateParameters' in config:
|
119 |
+
settings['gen_n_gpu_layers'] = int(config['GenerateParameters'].get('n_gpu_layers', '-1'))
|
120 |
settings['gen_temperature'] = float(config['GenerateParameters'].get('temperature', '0.35'))
|
121 |
settings['gen_top_p'] = float(config['GenerateParameters'].get('top_p', '0.9'))
|
122 |
settings['gen_top_k'] = int(config['GenerateParameters'].get('top_k', '40'))
|
|
|
138 |
'DEFAULT_GEN_MODEL': settings.get('DEFAULT_GEN_MODEL', '')
|
139 |
}
|
140 |
config['ChatParameters'] = {
|
141 |
+
'n_gpu_layers': str(settings.get('chat_n_gpu_layers', -1)),
|
142 |
'temperature': str(settings.get('chat_temperature', 0.5)),
|
143 |
'top_p': str(settings.get('chat_top_p', 0.7)),
|
144 |
'top_k': str(settings.get('chat_top_k', 80)),
|
|
|
175 |
'gen_author_description': 'あなたは新進気鋭の和風伝奇ミステリー小説家で、細やかな筆致と巧みな構成で若い世代にとても人気があります。',
|
176 |
'DEFAULT_CHAT_MODEL': 'EZO-Common-9B-gemma-2-it.f16.gguf',
|
177 |
'DEFAULT_GEN_MODEL': 'EZO-Common-9B-gemma-2-it.f16.gguf',
|
178 |
+
'chat_n_gpu_layers': -1,
|
179 |
'chat_temperature': 0.5,
|
180 |
+
'chat_top_p': 1.0,
|
181 |
+
'chat_top_k': 40,
|
182 |
+
'chat_rep_pen': 1.0,
|
183 |
'chat_n_ctx': 10000,
|
184 |
+
'gen_n_gpu_layers': -1,
|
185 |
+
'gen_temperature': 0.5,
|
186 |
+
'gen_top_p': 1.0,
|
187 |
'gen_top_k': 40,
|
188 |
+
'gen_rep_pen': 1.0,
|
189 |
'gen_n_ctx': 10000
|
190 |
}
|
191 |
Settings.save_to_ini(default_settings, filename)
|
|
|
197 |
|
198 |
class GenTextParams:
|
199 |
def __init__(self):
|
200 |
+
self.gen_n_gpu_layers = -1
|
201 |
+
self.gen_temperature = 0.5
|
202 |
self.gen_top_p = 1.0
|
203 |
self.gen_top_k = 40
|
204 |
self.gen_rep_pen = 1.0
|
205 |
self.gen_n_ctx = 10000
|
206 |
+
self.chat_n_gpu_layers = -1
|
207 |
self.chat_temperature = 0.5
|
208 |
+
self.chat_top_p = 1.0
|
209 |
+
self.chat_top_k = 40
|
210 |
+
self.chat_rep_pen = 1.0
|
211 |
self.chat_n_ctx = 10000
|
212 |
|
213 |
def update_generate_parameters(self, n_gpu_layers, temperature, top_p, top_k, rep_pen, n_ctx):
|
|
|
313 |
model_path = os.path.join(MODEL_DIR, self.settings[f'DEFAULT_{model_type.upper()}_MODEL'])
|
314 |
n_gpu_layers = self.settings[f'{model_type.lower()}_n_gpu_layers']
|
315 |
|
316 |
+
# モデルが既にロードされていて設定も同じなら再ロードしない
|
317 |
if self.llama and self.current_model == model_type:
|
318 |
+
if (self.llama.model_path == model_path and
|
319 |
self.llama.n_gpu_layers == n_gpu_layers):
|
320 |
print(f"{model_type} モデルは既にロードされています。再ロードをスキップします。")
|
|
|
321 |
return
|
322 |
|
323 |
+
# ここで前のモデルをクリア
|
324 |
if self.llama:
|
325 |
del self.llama
|
326 |
self.llama = None
|
327 |
|
328 |
+
self.model_loaded.clear()
|
329 |
+
|
330 |
try:
|
331 |
+
self.llama = LlamaAdapter(model_path, params, n_gpu_layers)
|
332 |
self.current_model = model_type
|
333 |
self.model_loaded.set()
|
334 |
+
print(f"{model_type} モデル {model_path} のロードが完了しました。(n_gpu_layers: {n_gpu_layers})")
|
335 |
except Exception as e:
|
336 |
print(f"{model_type} モデルのロード中にエラーが発生しました: {str(e)}")
|
337 |
self.model_loaded.set()
|
|
|
591 |
# パラメータを更新
|
592 |
if 'ChatParameters' in new_config:
|
593 |
params.update_chat_parameters(
|
594 |
+
int(new_config['ChatParameters'].get('n_gpu_layers', '-1')),
|
595 |
float(new_config['ChatParameters'].get('temperature', '0.5')),
|
596 |
+
float(new_config['ChatParameters'].get('top_p', '1.0')),
|
597 |
+
int(new_config['ChatParameters'].get('top_k', '40')),
|
598 |
+
float(new_config['ChatParameters'].get('repetition_penalty', '1.0')),
|
599 |
int(new_config['ChatParameters'].get('n_ctx', '10000'))
|
600 |
)
|
601 |
if 'GenerateParameters' in new_config:
|
602 |
params.update_generate_parameters(
|
603 |
+
int(new_config['GenerateParameters'].get('n_gpu_layers', '-1')),
|
604 |
+
float(new_config['GenerateParameters'].get('temperature', '0.5')),
|
605 |
+
float(new_config['GenerateParameters'].get('top_p', '1.0')),
|
606 |
int(new_config['GenerateParameters'].get('top_k', '40')),
|
607 |
+
float(new_config['GenerateParameters'].get('repetition_penalty', '1.0')),
|
608 |
int(new_config['GenerateParameters'].get('n_ctx', '10000'))
|
609 |
)
|
610 |
+
|
611 |
# モデルを再ロード
|
612 |
character_maker.current_model = None
|
613 |
|