Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -24,6 +24,30 @@ from peft import (
|
|
24 |
)
|
25 |
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
#####################################################
|
29 |
#Hilfsfunktionen für das Training
|
@@ -83,7 +107,8 @@ def compute_metrics_alle(eval_pred):
|
|
83 |
|
84 |
#aus den Secrets importieren (siehe Setting zu diesem Space)
|
85 |
login(token=os.environ["HF_ACCESS_TOKEN"]) #for read access!!!!
|
86 |
-
|
|
|
87 |
#Modelle und Tokenizer
|
88 |
|
89 |
#Alternativ mit beliebigen Modellen:
|
@@ -100,20 +125,9 @@ base_model = "project-baize/baize-v2-7b" #load_8bit = True (in load_tokenizer_a
|
|
100 |
#Tokenizer und Model laden
|
101 |
tokenizer,model,device = load_tokenizer_and_model(base_model, True)
|
102 |
#tokenizer.add_special_tokens({'pad_token': '[PAD]'}) #not necessary with fast Toekenizers like GPT2
|
103 |
-
#für Blaize....
|
104 |
-
tokenizer,model,device = load_tokenizer_and_model_Baize(base_model, True)
|
105 |
-
tokenizer.pad_token_id = 0
|
106 |
-
|
107 |
-
#speziell für Blaize Model:
|
108 |
-
TARGET_MODULES = [
|
109 |
-
"q_proj",
|
110 |
-
"k_proj",
|
111 |
-
"v_proj",
|
112 |
-
"down_proj",
|
113 |
-
"gate_proj",
|
114 |
-
"up_proj",
|
115 |
-
]
|
116 |
|
|
|
|
|
117 |
config = LoraConfig(
|
118 |
r=8,
|
119 |
lora_alpha=16,
|
@@ -124,6 +138,7 @@ config = LoraConfig(
|
|
124 |
)
|
125 |
#config.save_pretrained(OUTPUT_DIR)
|
126 |
model = get_peft_model(model, config)
|
|
|
127 |
tokenizer.pad_token_id = 0
|
128 |
|
129 |
|
@@ -183,7 +198,8 @@ print ("training args")
|
|
183 |
batch_size = 2
|
184 |
|
185 |
'''
|
186 |
-
#
|
|
|
187 |
training_args = TrainingArguments(
|
188 |
output_dir="alexkueck/li-tis-tuned-2",
|
189 |
overwrite_output_dir = 'True',
|
@@ -211,14 +227,11 @@ training_args = TrainingArguments(
|
|
211 |
#load_best_model_at_end=True
|
212 |
#push_to_hub=True,
|
213 |
)
|
214 |
-
'''
|
215 |
|
216 |
-
############################################
|
217 |
-
#def trainieren_neu(name):
|
218 |
#Trainer zusammenstellen
|
219 |
print ("################################")
|
220 |
print ("trainer")
|
221 |
-
|
222 |
trainer = Trainer(
|
223 |
model=model,
|
224 |
args=training_args,
|
@@ -230,7 +243,7 @@ trainer = Trainer(
|
|
230 |
'''
|
231 |
|
232 |
###############################################
|
233 |
-
#Special QA Trainer...#
|
234 |
'''
|
235 |
trainer = QuestionAnsweringTrainer(
|
236 |
model=model,
|
@@ -262,27 +275,6 @@ if ddp:
|
|
262 |
device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
|
263 |
GRADIENT_ACCUMULATION_STEPS = GRADIENT_ACCUMULATION_STEPS // world_size
|
264 |
|
265 |
-
MICRO_BATCH_SIZE = int(arg2)
|
266 |
-
BATCH_SIZE = 64
|
267 |
-
size = arg1
|
268 |
-
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
|
269 |
-
EPOCHS = 1
|
270 |
-
LEARNING_RATE = float(arg3)
|
271 |
-
CUTOFF_LEN = 512
|
272 |
-
LORA_R = 8
|
273 |
-
LORA_ALPHA = 16
|
274 |
-
LORA_DROPOUT = 0.05
|
275 |
-
VAL_SET_SIZE = 2000
|
276 |
-
TARGET_MODULES = [
|
277 |
-
"q_proj",
|
278 |
-
"k_proj",
|
279 |
-
"v_proj",
|
280 |
-
"down_proj",
|
281 |
-
"gate_proj",
|
282 |
-
"up_proj",
|
283 |
-
]
|
284 |
-
#DATA_PATH = "data/data_tmp.json"
|
285 |
-
OUTPUT_DIR = "alexkueck/li-tis-tuned-2"
|
286 |
trainer = transformers.Trainer(
|
287 |
model=model,
|
288 |
train_dataset=lm_datasets["train"],
|
@@ -345,66 +337,3 @@ print("Fertig mit Push to Hub")
|
|
345 |
|
346 |
|
347 |
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
##############################################
|
356 |
-
#Testen des fine-tuned Modells
|
357 |
-
|
358 |
-
#######################################
|
359 |
-
# Load model
|
360 |
-
#print("load model_neu")
|
361 |
-
#login(token=os.environ["HF_ACCESS_TOKEN"])
|
362 |
-
#model_name_neu = "alexkueck/test-tis-1"
|
363 |
-
#model_neu = trainer.load("test-tis-1")
|
364 |
-
|
365 |
-
#oder ...
|
366 |
-
#model_neu, tokenizer_neu, device_neu = load_tokenizer_and_model(model_name_neu, False)
|
367 |
-
#print("done load")
|
368 |
-
|
369 |
-
|
370 |
-
############################
|
371 |
-
#print("Test")
|
372 |
-
#prompt = "Was ist ein TIS?"
|
373 |
-
|
374 |
-
#####################################
|
375 |
-
#mit generate_response - nicht bei allen Tikenizern möglich
|
376 |
-
#response = generate_response(prompt, model_neu, tokenizer_neu)
|
377 |
-
#print(response)
|
378 |
-
#print("response done")
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
#######################################
|
383 |
-
#Encoding Tokenizer..
|
384 |
-
#encoding = tokenizer(text, return_tensors="pt")
|
385 |
-
#encoding = {k: v.to(trainer.model.device) for k,v in encoding.items()}
|
386 |
-
|
387 |
-
#outputs = trainer.model(**encoding)
|
388 |
-
#logits = outputs.logits
|
389 |
-
#print(logits.shape)
|
390 |
-
|
391 |
-
#greedy_output = model.generate(input_ids, max_length=50)
|
392 |
-
|
393 |
-
#print("Output:\n" )
|
394 |
-
#print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
'''
|
399 |
-
#######################################################################
|
400 |
-
#Darstellung mit Gradio
|
401 |
-
|
402 |
-
with gr.Blocks() as demo:
|
403 |
-
name = gr.Textbox(label="Model")
|
404 |
-
output = gr.Textbox(label="Output Box")
|
405 |
-
start_btn = gr.Button("Start")
|
406 |
-
start_btn.click(fn=trainieren_neu, inputs=name, outputs=output, api_name="trainieren_neu")
|
407 |
-
|
408 |
-
|
409 |
-
demo.queue(default_enabled=True).launch(debug=True)
|
410 |
-
'''
|
|
|
24 |
)
|
25 |
|
26 |
|
27 |
+
####################################################
|
28 |
+
# Konstanten
|
29 |
+
####################################################
|
30 |
+
#Konstanten speziell für Baize Model:
|
31 |
+
OUTPUT_DIR = "alexkueck/li-tis-tuned-2"
|
32 |
+
MICRO_BATCH_SIZE = int(arg2)
|
33 |
+
BATCH_SIZE = 64
|
34 |
+
size = arg1
|
35 |
+
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
|
36 |
+
EPOCHS = 1
|
37 |
+
LEARNING_RATE = float(arg3)
|
38 |
+
CUTOFF_LEN = 512
|
39 |
+
LORA_R = 8
|
40 |
+
LORA_ALPHA = 16
|
41 |
+
LORA_DROPOUT = 0.05
|
42 |
+
VAL_SET_SIZE = 2000
|
43 |
+
TARGET_MODULES = [
|
44 |
+
"q_proj",
|
45 |
+
"k_proj",
|
46 |
+
"v_proj",
|
47 |
+
"down_proj",
|
48 |
+
"gate_proj",
|
49 |
+
"up_proj",
|
50 |
+
]
|
51 |
|
52 |
#####################################################
|
53 |
#Hilfsfunktionen für das Training
|
|
|
107 |
|
108 |
#aus den Secrets importieren (siehe Setting zu diesem Space)
|
109 |
login(token=os.environ["HF_ACCESS_TOKEN"]) #for read access!!!!
|
110 |
+
|
111 |
+
####################################################################################
|
112 |
#Modelle und Tokenizer
|
113 |
|
114 |
#Alternativ mit beliebigen Modellen:
|
|
|
125 |
#Tokenizer und Model laden
|
126 |
tokenizer,model,device = load_tokenizer_and_model(base_model, True)
|
127 |
#tokenizer.add_special_tokens({'pad_token': '[PAD]'}) #not necessary with fast Toekenizers like GPT2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
+
#für Baize.... da spezieller Tokenizer geladen werden muss...
|
130 |
+
tokenizer,model,device = load_tokenizer_and_model_Baize(base_model, True)
|
131 |
config = LoraConfig(
|
132 |
r=8,
|
133 |
lora_alpha=16,
|
|
|
138 |
)
|
139 |
#config.save_pretrained(OUTPUT_DIR)
|
140 |
model = get_peft_model(model, config)
|
141 |
+
#da schneller Tokenizer-> Attention Mask setzen
|
142 |
tokenizer.pad_token_id = 0
|
143 |
|
144 |
|
|
|
198 |
batch_size = 2
|
199 |
|
200 |
'''
|
201 |
+
# ########################################
|
202 |
+
#Training Argumente setzen (kleinere LLMs)
|
203 |
training_args = TrainingArguments(
|
204 |
output_dir="alexkueck/li-tis-tuned-2",
|
205 |
overwrite_output_dir = 'True',
|
|
|
227 |
#load_best_model_at_end=True
|
228 |
#push_to_hub=True,
|
229 |
)
|
|
|
230 |
|
|
|
|
|
231 |
#Trainer zusammenstellen
|
232 |
print ("################################")
|
233 |
print ("trainer")
|
234 |
+
|
235 |
trainer = Trainer(
|
236 |
model=model,
|
237 |
args=training_args,
|
|
|
243 |
'''
|
244 |
|
245 |
###############################################
|
246 |
+
# Special QA Trainer...#
|
247 |
'''
|
248 |
trainer = QuestionAnsweringTrainer(
|
249 |
model=model,
|
|
|
275 |
device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
|
276 |
GRADIENT_ACCUMULATION_STEPS = GRADIENT_ACCUMULATION_STEPS // world_size
|
277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
trainer = transformers.Trainer(
|
279 |
model=model,
|
280 |
train_dataset=lm_datasets["train"],
|
|
|
337 |
|
338 |
|
339 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|