Kevin Fink commited on
Commit
1888d7d
·
1 Parent(s): 6662b37
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -26,7 +26,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
26
  model = get_peft_model(model, lora_config)
27
  tokenizer = AutoTokenizer.from_pretrained(model_name)
28
 
29
- max_length = 91
30
  try:
31
  tokenized_train_dataset = load_from_disk(f'{hub_id.strip()}_train_dataset')
32
  tokenized_test_dataset = load_from_disk(f'{hub_id.strip()}_test_dataset')
@@ -39,7 +39,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
39
  model_inputs = tokenizer(
40
  examples['text'],
41
  max_length=max_length, # Set to None for dynamic padding
42
- padding='longest', # Disable padding here, we will handle it later
43
  truncation=True,
44
  )
45
 
@@ -47,7 +47,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
47
  labels = tokenizer(
48
  examples['target'],
49
  max_length=max_length, # Set to None for dynamic padding
50
- padding='longest', # Disable padding here, we will handle it later
51
  truncation=True,
52
  text_target=examples['target'] # Use text_target for target text
53
  )
 
26
  model = get_peft_model(model, lora_config)
27
  tokenizer = AutoTokenizer.from_pretrained(model_name)
28
 
29
+ max_length = 128
30
  try:
31
  tokenized_train_dataset = load_from_disk(f'{hub_id.strip()}_train_dataset')
32
  tokenized_test_dataset = load_from_disk(f'{hub_id.strip()}_test_dataset')
 
39
  model_inputs = tokenizer(
40
  examples['text'],
41
  max_length=max_length, # Set to None for dynamic padding
42
+ padding='max_length', # Disable padding here, we will handle it later
43
  truncation=True,
44
  )
45
 
 
47
  labels = tokenizer(
48
  examples['target'],
49
  max_length=max_length, # Set to None for dynamic padding
50
+ padding='max_length', # Disable padding here, we will handle it later
51
  truncation=True,
52
  text_target=examples['target'] # Use text_target for target text
53
  )