Spaces:
Paused
Paused
Update utils.py
Browse files
utils.py
CHANGED
@@ -193,21 +193,6 @@ def predict(text,
|
|
193 |
pass
|
194 |
|
195 |
|
196 |
-
#Funktion, die den gegebenen Text aus dem Datenset gruppiert
|
197 |
-
def group_texts(examples, block_size):
|
198 |
-
# Concatenate all texts.
|
199 |
-
concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
|
200 |
-
total_length = len(concatenated_examples[list(examples.keys())[0]])
|
201 |
-
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
|
202 |
-
# customize this part to your needs.
|
203 |
-
total_length = (total_length // block_size) * block_size
|
204 |
-
# Split by chunks of max_len.
|
205 |
-
result = {
|
206 |
-
k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
|
207 |
-
for k, t in concatenated_examples.items()
|
208 |
-
}
|
209 |
-
result["labels"] = result["input_ids"].copy()
|
210 |
-
return result
|
211 |
|
212 |
#Funktion, die der trainer braucht, um das Training zu evaluieren - mit einer Metrik
|
213 |
def compute_metrics(eval_pred):
|
|
|
193 |
pass
|
194 |
|
195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
|
197 |
#Funktion, die der trainer braucht, um das Training zu evaluieren - mit einer Metrik
|
198 |
def compute_metrics(eval_pred):
|