Kevin Fink
commited on
Commit
·
0043072
1
Parent(s):
7797264
dev
Browse files
app.py
CHANGED
@@ -82,8 +82,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
82 |
print("Loading model from checkpoint...")
|
83 |
model = AutoModelForSeq2SeqLM.from_pretrained(training_args.output_dir)
|
84 |
|
85 |
-
|
86 |
-
max_length = model.get_input_embeddings().weight.shape[0]
|
87 |
try:
|
88 |
tokenized_train_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
|
89 |
tokenized_test_dataset = load_from_disk(f'/data/{hub_id.strip()}_test_dataset')
|
@@ -129,6 +129,10 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
129 |
tokenized_datasets['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
|
130 |
tokenized_datasets['test'].save_to_disk(f'/data/{hub_id.strip()}_test_dataset')
|
131 |
|
|
|
|
|
|
|
|
|
132 |
# Create Trainer
|
133 |
trainer = Trainer(
|
134 |
model=model,
|
|
|
82 |
print("Loading model from checkpoint...")
|
83 |
model = AutoModelForSeq2SeqLM.from_pretrained(training_args.output_dir)
|
84 |
|
85 |
+
max_length = 128
|
86 |
+
#max_length = model.get_input_embeddings().weight.shape[0]
|
87 |
try:
|
88 |
tokenized_train_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
|
89 |
tokenized_test_dataset = load_from_disk(f'/data/{hub_id.strip()}_test_dataset')
|
|
|
129 |
tokenized_datasets['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
|
130 |
tokenized_datasets['test'].save_to_disk(f'/data/{hub_id.strip()}_test_dataset')
|
131 |
|
132 |
+
embedding_size = model.get_input_embeddings().weight.shape[0]
|
133 |
+
if len(tokenizer) > embedding_size:
|
134 |
+
model.resize_token_embeddings(len(tokenizer))
|
135 |
+
|
136 |
# Create Trainer
|
137 |
trainer = Trainer(
|
138 |
model=model,
|