Kevin Fink commited on
Commit
0043072
·
1 Parent(s): 7797264
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -82,8 +82,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
82
  print("Loading model from checkpoint...")
83
  model = AutoModelForSeq2SeqLM.from_pretrained(training_args.output_dir)
84
 
85
- #max_length = 128
86
- max_length = model.get_input_embeddings().weight.shape[0]
87
  try:
88
  tokenized_train_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
89
  tokenized_test_dataset = load_from_disk(f'/data/{hub_id.strip()}_test_dataset')
@@ -129,6 +129,10 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
129
  tokenized_datasets['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
130
  tokenized_datasets['test'].save_to_disk(f'/data/{hub_id.strip()}_test_dataset')
131
 
 
 
 
 
132
  # Create Trainer
133
  trainer = Trainer(
134
  model=model,
 
82
  print("Loading model from checkpoint...")
83
  model = AutoModelForSeq2SeqLM.from_pretrained(training_args.output_dir)
84
 
85
+ max_length = 128
86
+ #max_length = model.get_input_embeddings().weight.shape[0]
87
  try:
88
  tokenized_train_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
89
  tokenized_test_dataset = load_from_disk(f'/data/{hub_id.strip()}_test_dataset')
 
129
  tokenized_datasets['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
130
  tokenized_datasets['test'].save_to_disk(f'/data/{hub_id.strip()}_test_dataset')
131
 
132
+ embedding_size = model.get_input_embeddings().weight.shape[0]
133
+ if len(tokenizer) > embedding_size:
134
+ model.resize_token_embeddings(len(tokenizer))
135
+
136
  # Create Trainer
137
  trainer = Trainer(
138
  model=model,