alexkueck commited on
Commit
8e9395a
·
1 Parent(s): 52a97be

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +28 -17
utils.py CHANGED
@@ -22,22 +22,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
22
  import datasets
23
  from datasets import load_dataset
24
 
25
- def reset_state():
26
- return [], [], "Reset Done"
27
-
28
- def reset_textbox():
29
- return gr.update(value=""),""
30
-
31
- def cancel_outputing():
32
- return "Stop Done"
33
-
34
- def transfer_input(inputs):
35
- textbox = reset_textbox()
36
- return (
37
- inputs,
38
- gr.update(value=""),
39
- gr.Button.update(visible=True),
40
- )
41
 
42
  def is_stop_word_or_prefix(s: str, stop_words: list) -> bool:
43
  for stop_word in stop_words:
@@ -206,7 +190,34 @@ def predict(text,
206
  yield a,b,"Generate: Success"
207
  except:
208
  pass
209
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
  def convert_to_markdown(text):
212
  text = text.replace("$","$")
 
22
  import datasets
23
  from datasets import load_dataset
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def is_stop_word_or_prefix(s: str, stop_words: list) -> bool:
27
  for stop_word in stop_words:
 
190
  yield a,b,"Generate: Success"
191
  except:
192
  pass
193
+
194
+
195
+ #Funktion, die den gegebenen Text aus dem Datenset gruppiert
196
+ def group_texts(examples):
197
+ # Concatenate all texts.
198
+ concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
199
+ total_length = len(concatenated_examples[list(examples.keys())[0]])
200
+ # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
201
+ # customize this part to your needs.
202
+ total_length = (total_length // block_size) * block_size
203
+ # Split by chunks of max_len.
204
+ result = {
205
+ k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
206
+ for k, t in concatenated_examples.items()
207
+ }
208
+ result["labels"] = result["input_ids"].copy()
209
+ return result
210
+
211
+ #Funktion, die der trainer braucht, um das Training zu evaluieren - mit einer Metrik
212
+ def compute_metrics(eval_pred):
213
+ #Metrik berechnen, um das training messen zu können - wird es besser???
214
+ metric = evaluate.load("accuracy") #3 Arten von gegebener Metrik: f1 oder roc_auc oder accuracy
215
+ logits, labels = eval_pred
216
+ predictions = np.argmax(logits, axis=-1)
217
+ #Call compute on metric to calculate the accuracy of your predictions.
218
+ #Before passing your predictions to compute, you need to convert the predictions to logits (remember all Transformers models return logits):
219
+ return metric.compute(predictions=predictions, references=labels)
220
+
221
 
222
  def convert_to_markdown(text):
223
  text = text.replace("$","$")