Kevin Fink
commited on
Commit
·
dcbf263
1
Parent(s):
4dafb88
dev
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ import spaces
|
|
2 |
import gradio as gr
|
3 |
from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelForSeq2SeqLM
|
4 |
from transformers import DataCollatorForSeq2Seq, AutoConfig
|
5 |
-
from datasets import load_dataset, concatenate_datasets, load_from_disk
|
6 |
import traceback
|
7 |
from sklearn.metrics import accuracy_score
|
8 |
import numpy as np
|
@@ -119,7 +119,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
119 |
second_half = dataset['train'].select(range(half_size, train_size))
|
120 |
dataset['train'] = second_half
|
121 |
tokenized_second_half = dataset.map(tokenize_function, batched=True)
|
122 |
-
|
|
|
123 |
tokenized_test_dataset = tokenize_function(dataset['test'])
|
124 |
|
125 |
# Create Trainer
|
|
|
2 |
import gradio as gr
|
3 |
from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelForSeq2SeqLM
|
4 |
from transformers import DataCollatorForSeq2Seq, AutoConfig
|
5 |
+
from datasets import load_dataset, concatenate_datasets, load_from_disk, DatasetDict
|
6 |
import traceback
|
7 |
from sklearn.metrics import accuracy_score
|
8 |
import numpy as np
|
|
|
119 |
second_half = dataset['train'].select(range(half_size, train_size))
|
120 |
dataset['train'] = second_half
|
121 |
tokenized_second_half = dataset.map(tokenize_function, batched=True)
|
122 |
+
dataset['train'] = concatenate_datasets([tokenized_first_half['train'], tokenized_second_half['train']])
|
123 |
+
tokenized_train_dataset = dataset['train']
|
124 |
tokenized_test_dataset = tokenize_function(dataset['test'])
|
125 |
|
126 |
# Create Trainer
|