Kevin Fink commited on
Commit
dcbf263
·
1 Parent(s): 4dafb88
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -2,7 +2,7 @@ import spaces
2
  import gradio as gr
3
  from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelForSeq2SeqLM
4
  from transformers import DataCollatorForSeq2Seq, AutoConfig
5
- from datasets import load_dataset, concatenate_datasets, load_from_disk
6
  import traceback
7
  from sklearn.metrics import accuracy_score
8
  import numpy as np
@@ -119,7 +119,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
119
  second_half = dataset['train'].select(range(half_size, train_size))
120
  dataset['train'] = second_half
121
  tokenized_second_half = dataset.map(tokenize_function, batched=True)
122
- tokenized_train_dataset = concatenate_datasets([tokenized_first_half, tokenized_second_half])
 
123
  tokenized_test_dataset = tokenize_function(dataset['test'])
124
 
125
  # Create Trainer
 
2
  import gradio as gr
3
  from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelForSeq2SeqLM
4
  from transformers import DataCollatorForSeq2Seq, AutoConfig
5
+ from datasets import load_dataset, concatenate_datasets, load_from_disk, DatasetDict
6
  import traceback
7
  from sklearn.metrics import accuracy_score
8
  import numpy as np
 
119
  second_half = dataset['train'].select(range(half_size, train_size))
120
  dataset['train'] = second_half
121
  tokenized_second_half = dataset.map(tokenize_function, batched=True)
122
+ dataset['train'] = concatenate_datasets([tokenized_first_half['train'], tokenized_second_half['train']])
123
+ tokenized_train_dataset = dataset['train']
124
  tokenized_test_dataset = tokenize_function(dataset['test'])
125
 
126
  # Create Trainer