pizb commited on
Commit
01247ee
1 Parent(s): cf19d04

feat: check train executable

Browse files
article_base_train_no_qlora_test.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, time, math
2
+ import pandas as pd
3
+ from datasets import Dataset
4
+ from transformers import PaliGemmaProcessor, PaliGemmaForConditionalGeneration, BitsAndBytesConfig, TrainingArguments, Trainer
5
+ import torch
6
+ from PIL import Image
7
+ from peft import get_peft_model, LoraConfig
8
+
9
+ # Function to load custom dataset from CSV
10
+ def load_custom_dataset_from_csv(csv_file, image_folder):
11
+ # Load CSV data using pandas
12
+ data = pd.read_csv(csv_file)
13
+
14
+ # Prepare dataset format for Hugging Face
15
+ questions = data['question'].tolist()
16
+ images = [os.path.join(image_folder, img) for img in data['image'].tolist()]
17
+ answers = data['answer'].tolist()
18
+
19
+ # Create a Hugging Face dataset from the loaded CSV
20
+ return Dataset.from_dict({
21
+ 'question': questions,
22
+ 'image': images,
23
+ 'answer': answers
24
+ })
25
+
26
+ # Main training function
27
+ def main():
28
+ # Load custom datasets
29
+ dataset = load_custom_dataset_from_csv('dataset/train_samples.csv', 'dataset/images/train')
30
+ train_val_split = dataset.train_test_split(test_size=0.1)
31
+
32
+ train_ds = train_val_split['train']
33
+ val_ds = train_val_split['test']
34
+ # train_ds = load_custom_dataset_from_csv('dataset/train_samples.csv', 'dataset/images')
35
+ # val_ds = load_custom_dataset_from_csv('dataset/val.csv', 'dataset/images')
36
+
37
+ model_id = "google/paligemma-3b-pt-224"
38
+ processor = PaliGemmaProcessor.from_pretrained(model_id)
39
+ device = "cuda"
40
+
41
+ # bnb_config = BitsAndBytesConfig(
42
+ # load_in_4bit=True,
43
+ # bnb_4bit_quant_type="nf4",
44
+ # # bnb_4bit_compute_type=torch.bfloat16,
45
+ # # bnb_4bit_compute_type=torch.float16
46
+ # bnb_4bit_compute_dtype=torch.bfloat16
47
+ # # bnb_4bit_use_double_quant=True,
48
+ # )
49
+ # lora_config = LoraConfig(
50
+ # r=8,
51
+ # target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
52
+ # task_type="CAUSAL_LM"
53
+ # )
54
+
55
+ # model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, quantization_config=bnb_config, device_map={"": 0})
56
+ # model.gradient_checkpointing_enable()
57
+ model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(device)
58
+ for param in model.vision_tower.parameters():
59
+ param.requires_grad = False
60
+
61
+ for param in model.multi_modal_projector.parameters():
62
+ param.requires_grad = True
63
+
64
+ # model.print_trainable_parameters()
65
+
66
+ args = TrainingArguments(
67
+ output_dir=f"./output/{math.floor(time.time())}",
68
+ num_train_epochs=2,
69
+ remove_unused_columns=False,
70
+ # per_device_train_batch_size=16,
71
+ per_device_train_batch_size=1,
72
+ gradient_accumulation_steps=4,
73
+ warmup_steps=2,
74
+ learning_rate=2e-5,
75
+ weight_decay=1e-6,
76
+ logging_steps=100,
77
+ # optim="paged_adamw_8bit",
78
+ optim="adamw_hf",
79
+ save_strategy="steps",
80
+ save_steps=1000,
81
+ save_total_limit=1,
82
+ bf16=True,
83
+ report_to=["tensorboard"],
84
+ dataloader_pin_memory=False
85
+ )
86
+
87
+ # Custom collate function
88
+ def collate_fn(examples):
89
+ # texts = ["answer " + example["question"] for example in examples]
90
+ texts = [example["question"] for example in examples]
91
+ labels = [example['answer'] for example in examples]
92
+ # images = [Image.open(image_path).convert("RGB") for image_path in examples['image']]
93
+ images = [Image.open(example['image']).convert("RGB") for example in examples]
94
+ tokens = processor(text=texts, images=images, suffix=labels, return_tensors="pt", padding="longest")
95
+ tokens = tokens.to(torch.bfloat16).to(device)
96
+ return tokens
97
+
98
+ trainer = Trainer(
99
+ model=model,
100
+ train_dataset=train_ds,
101
+ eval_dataset=val_ds,
102
+ data_collator=collate_fn,
103
+ args=args
104
+ )
105
+
106
+ trainer.train()
107
+
108
+ if __name__ == "__main__":
109
+ main()
article_base_train_test.py CHANGED
@@ -1,50 +1,50 @@
1
- import os
2
- import json
3
- from datasets import load_dataset, Dataset
4
  from transformers import PaliGemmaProcessor, PaliGemmaForConditionalGeneration, BitsAndBytesConfig, TrainingArguments, Trainer
5
  import torch
6
  from PIL import Image
7
  from peft import get_peft_model, LoraConfig
8
 
9
- # Function to load custom dataset
10
- def load_custom_dataset(json_file, image_folder):
11
- with open(json_file, 'r') as f:
12
- data = json.load(f)
13
 
14
  # Prepare dataset format for Hugging Face
15
- questions = []
16
- images = []
17
- answers = []
18
- multiple_choice_answers = []
19
-
20
- for item in data:
21
- questions.append(item['question'])
22
- images.append(os.path.join(image_folder, item['image_id']))
23
- answers.append(item['answer'])
24
- multiple_choice_answers.append(item['multiple_choice_answer'])
25
 
 
26
  return Dataset.from_dict({
27
  'question': questions,
28
  'image': images,
29
- 'answer': answers,
30
- 'multiple_choice_answer': multiple_choice_answers
31
  })
32
 
33
  # Main training function
34
  def main():
35
- # Load custom dataset
36
- train_ds = load_custom_dataset('dataset/train.json', 'dataset/images/train')
37
- val_ds = load_custom_dataset('dataset/val.json', 'dataset/images/val')
38
 
 
 
 
 
 
39
  model_id = "google/paligemma-3b-pt-224"
40
  processor = PaliGemmaProcessor.from_pretrained(model_id)
41
- image_token = processor.tokenizer.convert_tokens_to_ids("<image>")
42
  device = "cuda"
43
 
44
  bnb_config = BitsAndBytesConfig(
45
  load_in_4bit=True,
46
  bnb_4bit_quant_type="nf4",
47
- bnb_4bit_compute_type=torch.bfloat16
 
 
 
48
  )
49
  lora_config = LoraConfig(
50
  r=8,
@@ -53,13 +53,16 @@ def main():
53
  )
54
 
55
  model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, quantization_config=bnb_config, device_map={"": 0})
 
56
  model = get_peft_model(model, lora_config)
57
  model.print_trainable_parameters()
58
 
59
  args = TrainingArguments(
 
60
  num_train_epochs=2,
61
  remove_unused_columns=False,
62
- per_device_train_batch_size=16,
 
63
  gradient_accumulation_steps=4,
64
  warmup_steps=2,
65
  learning_rate=2e-5,
@@ -76,9 +79,11 @@ def main():
76
 
77
  # Custom collate function
78
  def collate_fn(examples):
79
- texts = ["answer " + example["question"] for example in examples]
80
- labels = [example['multiple_choice_answer'] for example in examples]
81
- images = [Image.open(image_path).convert("RGB") for image_path in examples['image']]
 
 
82
  tokens = processor(text=texts, images=images, suffix=labels, return_tensors="pt", padding="longest")
83
  tokens = tokens.to(torch.bfloat16).to(device)
84
  return tokens
 
1
+ import os, time, math
2
+ import pandas as pd
3
+ from datasets import Dataset
4
  from transformers import PaliGemmaProcessor, PaliGemmaForConditionalGeneration, BitsAndBytesConfig, TrainingArguments, Trainer
5
  import torch
6
  from PIL import Image
7
  from peft import get_peft_model, LoraConfig
8
 
9
+ # Function to load custom dataset from CSV
10
+ def load_custom_dataset_from_csv(csv_file, image_folder):
11
+ # Load CSV data using pandas
12
+ data = pd.read_csv(csv_file)
13
 
14
  # Prepare dataset format for Hugging Face
15
+ questions = data['question'].tolist()
16
+ images = [os.path.join(image_folder, img) for img in data['image'].tolist()]
17
+ answers = data['answer'].tolist()
 
 
 
 
 
 
 
18
 
19
+ # Create a Hugging Face dataset from the loaded CSV
20
  return Dataset.from_dict({
21
  'question': questions,
22
  'image': images,
23
+ 'answer': answers
 
24
  })
25
 
26
  # Main training function
27
  def main():
28
+ # Load custom datasets
29
+ dataset = load_custom_dataset_from_csv('dataset/train_samples.csv', 'dataset/images/train')
30
+ train_val_split = dataset.train_test_split(test_size=0.1)
31
 
32
+ train_ds = train_val_split['train']
33
+ val_ds = train_val_split['test']
34
+ # train_ds = load_custom_dataset_from_csv('dataset/train_samples.csv', 'dataset/images')
35
+ # val_ds = load_custom_dataset_from_csv('dataset/val.csv', 'dataset/images')
36
+
37
  model_id = "google/paligemma-3b-pt-224"
38
  processor = PaliGemmaProcessor.from_pretrained(model_id)
 
39
  device = "cuda"
40
 
41
  bnb_config = BitsAndBytesConfig(
42
  load_in_4bit=True,
43
  bnb_4bit_quant_type="nf4",
44
+ # bnb_4bit_compute_type=torch.bfloat16,
45
+ # bnb_4bit_compute_type=torch.float16
46
+ bnb_4bit_compute_dtype=torch.bfloat16
47
+ # bnb_4bit_use_double_quant=True,
48
  )
49
  lora_config = LoraConfig(
50
  r=8,
 
53
  )
54
 
55
  model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, quantization_config=bnb_config, device_map={"": 0})
56
+ # model.gradient_checkpointing_enable()
57
  model = get_peft_model(model, lora_config)
58
  model.print_trainable_parameters()
59
 
60
  args = TrainingArguments(
61
+ output_dir=f"./output/{math.floor(time.time())}",
62
  num_train_epochs=2,
63
  remove_unused_columns=False,
64
+ # per_device_train_batch_size=16,
65
+ per_device_train_batch_size=4,
66
  gradient_accumulation_steps=4,
67
  warmup_steps=2,
68
  learning_rate=2e-5,
 
79
 
80
  # Custom collate function
81
  def collate_fn(examples):
82
+ # texts = ["answer " + example["question"] for example in examples]
83
+ texts = [example["question"] for example in examples]
84
+ labels = [example['answer'] for example in examples]
85
+ # images = [Image.open(image_path).convert("RGB") for image_path in examples['image']]
86
+ images = [Image.open(example['image']).convert("RGB") for example in examples]
87
  tokens = processor(text=texts, images=images, suffix=labels, return_tensors="pt", padding="longest")
88
  tokens = tokens.to(torch.bfloat16).to(device)
89
  return tokens
test_inference.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
2
+ from PIL import Image
3
+
4
+
5
+ def main():
6
+ model_id = "google/paligemma-3b-pt-224"
7
+ # model_path = "output/1727488022/checkpoint-112"
8
+ model_path = "output/1727490265/checkpoint-450"
9
+ model = PaliGemmaForConditionalGeneration.from_pretrained(model_path)
10
+ processor = AutoProcessor.from_pretrained(model_id)
11
+
12
+ # prompt = "Analyze image from a critic's point of view."
13
+ prompt = "Please construct a formal analysis paragraph that is coherent and focuses solely on visual characteristic."
14
+ image_file_path = "dataset/images/manual_test/starry_night.jpg"
15
+ raw_image = Image.open(image_file_path)
16
+ inputs = processor(prompt, raw_image, return_tensors="pt")
17
+ output = model.generate(**inputs, max_new_tokens=20)
18
+
19
+ # Starry Night
20
+ print("Response: ", processor.decode(output[0], skip_special_tokens=True)[len(prompt):])
21
+
22
+
23
+ if __name__ == "__main__":
24
+ main()