alexkueck commited on
Commit
3be2136
·
1 Parent(s): 489736a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -104
app.py CHANGED
@@ -24,6 +24,30 @@ from peft import (
24
  )
25
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  #####################################################
29
  #Hilfsfunktionen für das Training
@@ -83,7 +107,8 @@ def compute_metrics_alle(eval_pred):
83
 
84
  #aus den Secrets importieren (siehe Setting zu diesem Space)
85
  login(token=os.environ["HF_ACCESS_TOKEN"]) #for read access!!!!
86
-
 
87
  #Modelle und Tokenizer
88
 
89
  #Alternativ mit beliebigen Modellen:
@@ -100,20 +125,9 @@ base_model = "project-baize/baize-v2-7b" #load_8bit = True (in load_tokenizer_a
100
  #Tokenizer und Model laden
101
  tokenizer,model,device = load_tokenizer_and_model(base_model, True)
102
  #tokenizer.add_special_tokens({'pad_token': '[PAD]'}) #not necessary with fast Toekenizers like GPT2
103
- #für Blaize....
104
- tokenizer,model,device = load_tokenizer_and_model_Baize(base_model, True)
105
- tokenizer.pad_token_id = 0
106
-
107
- #speziell für Blaize Model:
108
- TARGET_MODULES = [
109
- "q_proj",
110
- "k_proj",
111
- "v_proj",
112
- "down_proj",
113
- "gate_proj",
114
- "up_proj",
115
- ]
116
 
 
 
117
  config = LoraConfig(
118
  r=8,
119
  lora_alpha=16,
@@ -124,6 +138,7 @@ config = LoraConfig(
124
  )
125
  #config.save_pretrained(OUTPUT_DIR)
126
  model = get_peft_model(model, config)
 
127
  tokenizer.pad_token_id = 0
128
 
129
 
@@ -183,7 +198,8 @@ print ("training args")
183
  batch_size = 2
184
 
185
  '''
186
- # Training Argumente setzen (kleinere LLMs)
 
187
  training_args = TrainingArguments(
188
  output_dir="alexkueck/li-tis-tuned-2",
189
  overwrite_output_dir = 'True',
@@ -211,14 +227,11 @@ training_args = TrainingArguments(
211
  #load_best_model_at_end=True
212
  #push_to_hub=True,
213
  )
214
- '''
215
 
216
- ############################################
217
- #def trainieren_neu(name):
218
  #Trainer zusammenstellen
219
  print ("################################")
220
  print ("trainer")
221
- '''
222
  trainer = Trainer(
223
  model=model,
224
  args=training_args,
@@ -230,7 +243,7 @@ trainer = Trainer(
230
  '''
231
 
232
  ###############################################
233
- #Special QA Trainer...#
234
  '''
235
  trainer = QuestionAnsweringTrainer(
236
  model=model,
@@ -262,27 +275,6 @@ if ddp:
262
  device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
263
  GRADIENT_ACCUMULATION_STEPS = GRADIENT_ACCUMULATION_STEPS // world_size
264
 
265
- MICRO_BATCH_SIZE = int(arg2)
266
- BATCH_SIZE = 64
267
- size = arg1
268
- GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
269
- EPOCHS = 1
270
- LEARNING_RATE = float(arg3)
271
- CUTOFF_LEN = 512
272
- LORA_R = 8
273
- LORA_ALPHA = 16
274
- LORA_DROPOUT = 0.05
275
- VAL_SET_SIZE = 2000
276
- TARGET_MODULES = [
277
- "q_proj",
278
- "k_proj",
279
- "v_proj",
280
- "down_proj",
281
- "gate_proj",
282
- "up_proj",
283
- ]
284
- #DATA_PATH = "data/data_tmp.json"
285
- OUTPUT_DIR = "alexkueck/li-tis-tuned-2"
286
  trainer = transformers.Trainer(
287
  model=model,
288
  train_dataset=lm_datasets["train"],
@@ -345,66 +337,3 @@ print("Fertig mit Push to Hub")
345
 
346
 
347
 
348
-
349
-
350
-
351
-
352
-
353
-
354
-
355
- ##############################################
356
- #Testen des fine-tuned Modells
357
-
358
- #######################################
359
- # Load model
360
- #print("load model_neu")
361
- #login(token=os.environ["HF_ACCESS_TOKEN"])
362
- #model_name_neu = "alexkueck/test-tis-1"
363
- #model_neu = trainer.load("test-tis-1")
364
-
365
- #oder ...
366
- #model_neu, tokenizer_neu, device_neu = load_tokenizer_and_model(model_name_neu, False)
367
- #print("done load")
368
-
369
-
370
- ############################
371
- #print("Test")
372
- #prompt = "Was ist ein TIS?"
373
-
374
- #####################################
375
- #mit generate_response - nicht bei allen Tikenizern möglich
376
- #response = generate_response(prompt, model_neu, tokenizer_neu)
377
- #print(response)
378
- #print("response done")
379
-
380
-
381
-
382
- #######################################
383
- #Encoding Tokenizer..
384
- #encoding = tokenizer(text, return_tensors="pt")
385
- #encoding = {k: v.to(trainer.model.device) for k,v in encoding.items()}
386
-
387
- #outputs = trainer.model(**encoding)
388
- #logits = outputs.logits
389
- #print(logits.shape)
390
-
391
- #greedy_output = model.generate(input_ids, max_length=50)
392
-
393
- #print("Output:\n" )
394
- #print(tokenizer.decode(outputs[0], skip_special_tokens=True))
395
-
396
-
397
-
398
- '''
399
- #######################################################################
400
- #Darstellung mit Gradio
401
-
402
- with gr.Blocks() as demo:
403
- name = gr.Textbox(label="Model")
404
- output = gr.Textbox(label="Output Box")
405
- start_btn = gr.Button("Start")
406
- start_btn.click(fn=trainieren_neu, inputs=name, outputs=output, api_name="trainieren_neu")
407
-
408
-
409
- demo.queue(default_enabled=True).launch(debug=True)
410
- '''
 
24
  )
25
 
26
 
27
+ ####################################################
28
+ # Konstanten
29
+ ####################################################
30
+ #Konstanten speziell für Baize Model:
31
+ OUTPUT_DIR = "alexkueck/li-tis-tuned-2"
32
+ MICRO_BATCH_SIZE = int(arg2)
33
+ BATCH_SIZE = 64
34
+ size = arg1
35
+ GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
36
+ EPOCHS = 1
37
+ LEARNING_RATE = float(arg3)
38
+ CUTOFF_LEN = 512
39
+ LORA_R = 8
40
+ LORA_ALPHA = 16
41
+ LORA_DROPOUT = 0.05
42
+ VAL_SET_SIZE = 2000
43
+ TARGET_MODULES = [
44
+ "q_proj",
45
+ "k_proj",
46
+ "v_proj",
47
+ "down_proj",
48
+ "gate_proj",
49
+ "up_proj",
50
+ ]
51
 
52
  #####################################################
53
  #Hilfsfunktionen für das Training
 
107
 
108
  #aus den Secrets importieren (siehe Setting zu diesem Space)
109
  login(token=os.environ["HF_ACCESS_TOKEN"]) #for read access!!!!
110
+
111
+ ####################################################################################
112
  #Modelle und Tokenizer
113
 
114
  #Alternativ mit beliebigen Modellen:
 
125
  #Tokenizer und Model laden
126
  tokenizer,model,device = load_tokenizer_and_model(base_model, True)
127
  #tokenizer.add_special_tokens({'pad_token': '[PAD]'}) #not necessary with fast Toekenizers like GPT2
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
+ #für Baize.... da spezieller Tokenizer geladen werden muss...
130
+ tokenizer,model,device = load_tokenizer_and_model_Baize(base_model, True)
131
  config = LoraConfig(
132
  r=8,
133
  lora_alpha=16,
 
138
  )
139
  #config.save_pretrained(OUTPUT_DIR)
140
  model = get_peft_model(model, config)
141
+ #da schneller Tokenizer-> Attention Mask setzen
142
  tokenizer.pad_token_id = 0
143
 
144
 
 
198
  batch_size = 2
199
 
200
  '''
201
+ # ########################################
202
+ #Training Argumente setzen (kleinere LLMs)
203
  training_args = TrainingArguments(
204
  output_dir="alexkueck/li-tis-tuned-2",
205
  overwrite_output_dir = 'True',
 
227
  #load_best_model_at_end=True
228
  #push_to_hub=True,
229
  )
 
230
 
 
 
231
  #Trainer zusammenstellen
232
  print ("################################")
233
  print ("trainer")
234
+
235
  trainer = Trainer(
236
  model=model,
237
  args=training_args,
 
243
  '''
244
 
245
  ###############################################
246
+ # Special QA Trainer...#
247
  '''
248
  trainer = QuestionAnsweringTrainer(
249
  model=model,
 
275
  device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
276
  GRADIENT_ACCUMULATION_STEPS = GRADIENT_ACCUMULATION_STEPS // world_size
277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  trainer = transformers.Trainer(
279
  model=model,
280
  train_dataset=lm_datasets["train"],
 
337
 
338
 
339