Spaces:
Paused
Paused
Update utils.py
Browse files
utils.py
CHANGED
@@ -22,22 +22,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
22 |
import datasets
|
23 |
from datasets import load_dataset
|
24 |
|
25 |
-
def reset_state():
|
26 |
-
return [], [], "Reset Done"
|
27 |
-
|
28 |
-
def reset_textbox():
|
29 |
-
return gr.update(value=""),""
|
30 |
-
|
31 |
-
def cancel_outputing():
|
32 |
-
return "Stop Done"
|
33 |
-
|
34 |
-
def transfer_input(inputs):
|
35 |
-
textbox = reset_textbox()
|
36 |
-
return (
|
37 |
-
inputs,
|
38 |
-
gr.update(value=""),
|
39 |
-
gr.Button.update(visible=True),
|
40 |
-
)
|
41 |
|
42 |
def is_stop_word_or_prefix(s: str, stop_words: list) -> bool:
|
43 |
for stop_word in stop_words:
|
@@ -206,7 +190,34 @@ def predict(text,
|
|
206 |
yield a,b,"Generate: Success"
|
207 |
except:
|
208 |
pass
|
209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
def convert_to_markdown(text):
|
212 |
text = text.replace("$","$")
|
|
|
22 |
import datasets
|
23 |
from datasets import load_dataset
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
def is_stop_word_or_prefix(s: str, stop_words: list) -> bool:
|
27 |
for stop_word in stop_words:
|
|
|
190 |
yield a,b,"Generate: Success"
|
191 |
except:
|
192 |
pass
|
193 |
+
|
194 |
+
|
195 |
+
#Funktion, die den gegebenen Text aus dem Datenset gruppiert
|
196 |
+
def group_texts(examples):
|
197 |
+
# Concatenate all texts.
|
198 |
+
concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
|
199 |
+
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
200 |
+
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
201 |
+
# customize this part to your needs.
|
202 |
+
total_length = (total_length // block_size) * block_size
|
203 |
+
# Split by chunks of max_len.
|
204 |
+
result = {
|
205 |
+
k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
|
206 |
+
for k, t in concatenated_examples.items()
|
207 |
+
}
|
208 |
+
result["labels"] = result["input_ids"].copy()
|
209 |
+
return result
|
210 |
+
|
211 |
+
#Funktion, die der trainer braucht, um das Training zu evaluieren - mit einer Metrik
|
212 |
+
def compute_metrics(eval_pred):
|
213 |
+
#Metrik berechnen, um das training messen zu können - wird es besser???
|
214 |
+
metric = evaluate.load("accuracy") #3 Arten von gegebener Metrik: f1 oder roc_auc oder accuracy
|
215 |
+
logits, labels = eval_pred
|
216 |
+
predictions = np.argmax(logits, axis=-1)
|
217 |
+
#Call compute on metric to calculate the accuracy of your predictions.
|
218 |
+
#Before passing your predictions to compute, you need to convert the predictions to logits (remember all Transformers models return logits):
|
219 |
+
return metric.compute(predictions=predictions, references=labels)
|
220 |
+
|
221 |
|
222 |
def convert_to_markdown(text):
|
223 |
text = text.replace("$","$")
|