Spaces:
Running
on
Zero
Running
on
Zero
tori29umai
commited on
Commit
•
2bdd8a2
1
Parent(s):
4246ad1
Update app.py
Browse files
app.py
CHANGED
@@ -6,9 +6,7 @@ from llama_cpp import Llama
|
|
6 |
import configparser
|
7 |
from functools import partial
|
8 |
from utils.dl_utils import dl_guff_model
|
9 |
-
import
|
10 |
-
import tempfile
|
11 |
-
import csv
|
12 |
|
13 |
# 定数
|
14 |
DEFAULT_INI_FILE = 'settings.ini'
|
@@ -34,7 +32,6 @@ model_path = os.path.join("models", model_filename)
|
|
34 |
if not os.path.exists(model_path):
|
35 |
dl_guff_model("models", f"https://huggingface.co/MCZK/EZO-Common-9B-gemma-2-it-GGUF/resolve/main/{model_filename}")
|
36 |
|
37 |
-
|
38 |
class ConfigManager:
|
39 |
@staticmethod
|
40 |
def load_settings(filename):
|
@@ -106,6 +103,9 @@ class TextGenerator:
|
|
106 |
self.llm = None
|
107 |
self.settings = None
|
108 |
self.current_model = None
|
|
|
|
|
|
|
109 |
|
110 |
@spaces.GPU(duration=120)
|
111 |
def load_model(self):
|
@@ -113,50 +113,79 @@ class TextGenerator:
|
|
113 |
del self.llm
|
114 |
self.llm = None
|
115 |
|
|
|
116 |
try:
|
117 |
model_path = os.path.join(MODEL_DIR, self.settings['DEFAULT_GEN_MODEL'])
|
118 |
n_gpu_layers = self.settings['gen_n_gpu_layers']
|
119 |
self.llm = Llama(model_path=model_path, n_ctx=self.settings['gen_n_ctx'], n_gpu_layers=n_gpu_layers)
|
120 |
self.current_model = 'GEN'
|
121 |
print(f"GEN モデル {model_path} のロードが完了しました。(n_gpu_layers: {n_gpu_layers})")
|
|
|
122 |
except Exception as e:
|
123 |
print(f"GEN モデルのロード中にエラーが発生しました: {str(e)}")
|
|
|
124 |
|
125 |
def generate_text(self, text, gen_characters, gen_token_multiplier, instruction):
|
126 |
if not self.llm:
|
127 |
self.load_model()
|
128 |
|
129 |
-
if not self.llm:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
return "モデルのロードに失敗しました。設定を確認してください。"
|
131 |
|
132 |
-
author_description = self.settings.get('gen_author_description', '')
|
133 |
max_tokens = int(gen_characters * gen_token_multiplier)
|
134 |
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
top_p=self.settings['gen_top_p'],
|
147 |
-
top_k=self.settings['gen_top_k'],
|
148 |
-
repeat_penalty=self.settings['gen_rep_pen'],
|
149 |
-
)
|
150 |
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
|
157 |
def load_settings(self, filename):
|
158 |
self.settings = Settings.load_from_ini(filename)
|
159 |
|
|
|
|
|
|
|
|
|
160 |
# グローバル変数
|
161 |
text_generator = TextGenerator()
|
162 |
model_files = ModelManager.get_model_files()
|
@@ -273,6 +302,8 @@ def build_gradio_interface():
|
|
273 |
input_component = gr.Textbox(label=key, value=value)
|
274 |
|
275 |
input_component.change(
|
|
|
|
|
276 |
partial(ConfigManager.update_setting, 'GenerateParameters', key),
|
277 |
inputs=[input_component],
|
278 |
outputs=[output]
|
@@ -294,4 +325,4 @@ if __name__ == "__main__":
|
|
294 |
text_generator.load_settings(DEFAULT_INI_FILE)
|
295 |
|
296 |
demo = build_gradio_interface()
|
297 |
-
demo.launch(share=True)
|
|
|
6 |
import configparser
|
7 |
from functools import partial
|
8 |
from utils.dl_utils import dl_guff_model
|
9 |
+
import threading
|
|
|
|
|
10 |
|
11 |
# 定数
|
12 |
DEFAULT_INI_FILE = 'settings.ini'
|
|
|
32 |
if not os.path.exists(model_path):
|
33 |
dl_guff_model("models", f"https://huggingface.co/MCZK/EZO-Common-9B-gemma-2-it-GGUF/resolve/main/{model_filename}")
|
34 |
|
|
|
35 |
class ConfigManager:
|
36 |
@staticmethod
|
37 |
def load_settings(filename):
|
|
|
103 |
self.llm = None
|
104 |
self.settings = None
|
105 |
self.current_model = None
|
106 |
+
self.history = []
|
107 |
+
self.use_chat_format = False
|
108 |
+
self.model_loaded = threading.Event()
|
109 |
|
110 |
@spaces.GPU(duration=120)
|
111 |
def load_model(self):
|
|
|
113 |
del self.llm
|
114 |
self.llm = None
|
115 |
|
116 |
+
self.model_loaded.clear()
|
117 |
try:
|
118 |
model_path = os.path.join(MODEL_DIR, self.settings['DEFAULT_GEN_MODEL'])
|
119 |
n_gpu_layers = self.settings['gen_n_gpu_layers']
|
120 |
self.llm = Llama(model_path=model_path, n_ctx=self.settings['gen_n_ctx'], n_gpu_layers=n_gpu_layers)
|
121 |
self.current_model = 'GEN'
|
122 |
print(f"GEN モデル {model_path} のロードが完了しました。(n_gpu_layers: {n_gpu_layers})")
|
123 |
+
self.model_loaded.set()
|
124 |
except Exception as e:
|
125 |
print(f"GEN モデルのロード中にエラーが発生しました: {str(e)}")
|
126 |
+
self.model_loaded.set()
|
127 |
|
128 |
def generate_text(self, text, gen_characters, gen_token_multiplier, instruction):
|
129 |
if not self.llm:
|
130 |
self.load_model()
|
131 |
|
132 |
+
if not self.model_loaded.wait(timeout=30) or not self.llm:
|
133 |
+
return "モデルのロードに失敗しました。設定を確認してください。"
|
134 |
+
|
135 |
+
input_str = f"{instruction}\n\n生成するテキスト(目安は{gen_characters}文字):\n\n{text}"
|
136 |
+
return self.generate_response(input_str, gen_characters, gen_token_multiplier)
|
137 |
+
|
138 |
+
def generate_response(self, input_str, gen_characters, gen_token_multiplier):
|
139 |
+
if not self.model_loaded.wait(timeout=30) or not self.llm:
|
140 |
return "モデルのロードに失敗しました。設定を確認してください。"
|
141 |
|
|
|
142 |
max_tokens = int(gen_characters * gen_token_multiplier)
|
143 |
|
144 |
+
if not self.use_chat_format:
|
145 |
+
try:
|
146 |
+
prompt = self._generate_prompt(input_str)
|
147 |
+
res = self.llm.create_completion(prompt=prompt, max_tokens=max_tokens)
|
148 |
+
res_text = res["choices"][0]["text"]
|
149 |
+
self.history.append({"user": input_str, "assistant": res_text})
|
150 |
+
return res_text
|
151 |
+
except Exception as e:
|
152 |
+
print(f"既存の形式でのレスポンス生成に失敗しました: {str(e)}")
|
153 |
+
print("チャット形式に切り替えます。")
|
154 |
+
self.use_chat_format = True
|
155 |
|
156 |
+
if self.use_chat_format:
|
157 |
+
chat_messages = [
|
158 |
+
{"role": "system", "content": self.settings.get('gen_author_description', '')},
|
159 |
+
{"role": "user", "content": input_str}
|
160 |
+
]
|
|
|
|
|
|
|
|
|
161 |
|
162 |
+
try:
|
163 |
+
response = self.llm.create_chat_completion(
|
164 |
+
messages=chat_messages,
|
165 |
+
max_tokens=max_tokens,
|
166 |
+
temperature=self.settings['gen_temperature'],
|
167 |
+
top_p=self.settings['gen_top_p'],
|
168 |
+
top_k=self.settings['gen_top_k'],
|
169 |
+
repeat_penalty=self.settings['gen_rep_pen'],
|
170 |
+
)
|
171 |
+
|
172 |
+
res_text = response["choices"][0]["message"]["content"].strip()
|
173 |
+
self.history.append({"user": input_str, "assistant": res_text})
|
174 |
+
return res_text
|
175 |
+
except Exception as e:
|
176 |
+
print(f"チャット形式でのレスポンス生成に失敗しました: {str(e)}")
|
177 |
+
return "レスポンス生成中にエラーが発生しました。設定を確認してください。"
|
178 |
+
|
179 |
+
def _generate_prompt(self, input_str):
|
180 |
+
return f"{self.settings.get('gen_author_description', '')}\n\nUser: {input_str}\nAssistant:"
|
181 |
|
182 |
def load_settings(self, filename):
|
183 |
self.settings = Settings.load_from_ini(filename)
|
184 |
|
185 |
+
def reset(self):
|
186 |
+
self.history = []
|
187 |
+
self.use_chat_format = False
|
188 |
+
|
189 |
# グローバル変数
|
190 |
text_generator = TextGenerator()
|
191 |
model_files = ModelManager.get_model_files()
|
|
|
302 |
input_component = gr.Textbox(label=key, value=value)
|
303 |
|
304 |
input_component.change(
|
305 |
+
partial(ConfigManager.update_setting, 'GenerateParameters', key),
|
306 |
+
input_component.change(
|
307 |
partial(ConfigManager.update_setting, 'GenerateParameters', key),
|
308 |
inputs=[input_component],
|
309 |
outputs=[output]
|
|
|
325 |
text_generator.load_settings(DEFAULT_INI_FILE)
|
326 |
|
327 |
demo = build_gradio_interface()
|
328 |
+
demo.launch(share=True)
|