dsmueller commited on
Commit
9b37590
1 Parent(s): 72f4295

Update dataset loading and subprocess command in

Browse files
Files changed (1) hide show
  1. train_llm.py +4 -3
train_llm.py CHANGED
@@ -12,7 +12,7 @@ from tqdm.notebook import tqdm
12
 
13
  # Load dataset
14
  dataset_name = 'ai-aerospace/ams_data_train_generic_v0.1_100'
15
- dataset=load_dataset(dataset_name)
16
 
17
  # Write dataset files into data directory
18
  data_directory = '../fine_tune_data/'
@@ -46,10 +46,11 @@ os.environ["train_data"] = train_data
46
  os.environ["validation_data"] = validation_data
47
 
48
  # Set .venv and execute the autotrain script
 
49
  # !autotrain llm --train --project_name my-llm --model TinyLlama/TinyLlama-1.1B-Chat-v0.1 --data_path . --use-peft --use_int4 --learning_rate 2e-4 --train_batch_size 6 --num_train_epochs 3 --trainer sft
50
  # The training dataset to be used must be called training.csv and be located in the data_path folder.
51
  command="""
52
- source ../.venv/bin/activate && autotrain llm --train \
53
  --project_name ${project_name} \
54
  --model ${model_name} \
55
  --data_path ../fine_tune_data \
@@ -66,4 +67,4 @@ source ../.venv/bin/activate && autotrain llm --train \
66
  """
67
 
68
  # Use subprocess.run() to execute the command
69
- subprocess.run(command, shell=True, check=True, env=os.environ)
 
12
 
13
  # Load dataset
14
  dataset_name = 'ai-aerospace/ams_data_train_generic_v0.1_100'
15
+ dataset=load_dataset(dataset_name,cache_dir='.')
16
 
17
  # Write dataset files into data directory
18
  data_directory = '../fine_tune_data/'
 
46
  os.environ["validation_data"] = validation_data
47
 
48
  # Set .venv and execute the autotrain script
49
+ # To see all parameters: autotrain llm --help
50
  # !autotrain llm --train --project_name my-llm --model TinyLlama/TinyLlama-1.1B-Chat-v0.1 --data_path . --use-peft --use_int4 --learning_rate 2e-4 --train_batch_size 6 --num_train_epochs 3 --trainer sft
51
  # The training dataset to be used must be called training.csv and be located in the data_path folder.
52
  command="""
53
+ autotrain llm --train \
54
  --project_name ${project_name} \
55
  --model ${model_name} \
56
  --data_path ../fine_tune_data \
 
67
  """
68
 
69
  # Use subprocess.run() to execute the command
70
+ subprocess.run(command, shell=True, check=True)