John6666 commited on
Commit
4a940d2
β€’
1 Parent(s): e122ddb

Upload 9 files

Browse files
9em124t2-499968/text_model/README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: meta-llama/Meta-Llama-3.1-8B
3
  library_name: peft
4
  ---
5
 
 
1
  ---
2
+ base_model: unsloth/Meta-Llama-3.1-8B-Instruct
3
  library_name: peft
4
  ---
5
 
9em124t2-499968/text_model/adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "meta-llama/Meta-Llama-3.1-8B",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "unsloth/Meta-Llama-3.1-8B-Instruct",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
README.md CHANGED
@@ -5,7 +5,7 @@ language:
5
  ---
6
  # Image Captioning App
7
 
8
- This is a mod of [Wi-zz/joy-caption-pre-alpha](https://huggingface.co/Wi-zz/joy-caption-pre-alpha) and [fancyfeast/joy-caption-alpha-one](https://huggingface.co/spaces/fancyfeast/joy-caption-alpha-one). Thanks to [dominic1021](https://huggingface.co/dominic1021).
9
 
10
  # Notice: I will contribute to Wi-zz after shaping the code.
11
 
 
5
  ---
6
  # Image Captioning App
7
 
8
+ This is a mod of [Wi-zz/joy-caption-pre-alpha](https://huggingface.co/Wi-zz/joy-caption-pre-alpha) and [fancyfeast/joy-caption-alpha-one](https://huggingface.co/spaces/fancyfeast/joy-caption-alpha-one). Thanks to [dominic1021](https://huggingface.co/dominic1021), [IceHibiki](https://huggingface.co/IceHibiki).
9
 
10
  # Notice: I will contribute to Wi-zz after shaping the code.
11
 
app.py CHANGED
@@ -12,10 +12,11 @@ from torch import nn
12
  from transformers import AutoModel, AutoProcessor, AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast, AutoModelForCausalLM
13
  from typing import List, Union
14
  import torchvision.transforms.functional as TVF
15
- from peft import PeftConfig
16
  import gc
17
 
18
  # Constants
 
19
  BASE_DIR = Path(__file__).resolve().parent # Define the base directory
20
  CLIP_PATH = "google/siglip-so400m-patch14-384"
21
  DEFAULT_MODEL_PATH = "unsloth/Meta-Llama-3.1-8B-bnb-4bit"
@@ -42,6 +43,7 @@ IMAGE_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.bmp', '.webp')
42
 
43
  # Global Variables
44
  IS_NF4 = True
 
45
  MODEL_PATH = DEFAULT_MODEL_PATH
46
  device = "cuda" if torch.cuda.is_available() else "cpu"
47
  print(f"Running on {device}")
@@ -111,7 +113,7 @@ class ImageAdapter(nn.Module):
111
  return self.other_tokens(torch.tensor([2], device=self.other_tokens.weight.device)).squeeze(0)
112
 
113
  def load_models():
114
- global MODEL_PATH, IS_NF4
115
  try:
116
  if IS_NF4:
117
  from transformers import BitsAndBytesConfig
@@ -136,11 +138,11 @@ def load_models():
136
  print(f"Loading LLM: {MODEL_PATH} πŸ€–")
137
  text_model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16).eval()
138
 
139
- if LORA_PATH.exists():
140
  print("Loading VLM's custom text model πŸ€–")
141
- peft_config = PeftConfig.from_pretrained(LORA_PATH, device_map=device, quantization_config=nf4_config)
142
- text_model.add_adapter(peft_config)
143
- text_model.enable_adapters()
144
 
145
  print("Loading image adapter πŸ–ΌοΈ")
146
  image_adapter = ImageAdapter(clip_model.config.hidden_size, text_model.config.hidden_size, False, False, 38, False).eval().to("cpu")
@@ -164,13 +166,13 @@ def load_models():
164
  assert isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)), f"Tokenizer is of type {type(tokenizer)}"
165
 
166
  print(f"Loading LLM: {MODEL_PATH} πŸ€–")
167
- text_model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, device_map="auto", torch_dtype=torch.bfloat16).eval() # device_map=auto may cause LoRA error
168
 
169
- if LORA_PATH.exists():
170
  print("Loading VLM's custom text model πŸ€–")
171
- peft_config = PeftConfig.from_pretrained(LORA_PATH, device_map=device)
172
- text_model.add_adapter(peft_config)
173
- text_model.enable_adapters()
174
 
175
  print("Loading image adapter πŸ–ΌοΈ")
176
  image_adapter = ImageAdapter(clip_model.config.hidden_size, text_model.config.hidden_size, False, False, 38, False).eval().to("cpu")
@@ -312,7 +314,8 @@ def parse_arguments():
312
  help='Caption Length (default: "any")')
313
  parser.add_argument("--model", type=str, default=DEFAULT_MODEL_PATH,
314
  help='Huggingface LLM repo (default: "unsloth/Meta-Llama-3.1-8B-bnb-4bit")')
315
- parser.add_argument("--bf16", action="store_true", help="Use bfloat16 (default: NF4)")
 
316
  parser.add_argument("--tokens", type=int, default=300, help="Max tokens (default: 300)")
317
  parser.add_argument("--topp", type=float, default=0.9, help="Top-P (default: 0.9)")
318
  parser.add_argument("--temp", type=float, default=0.6, help="Temperature (default: 0.6)")
@@ -331,7 +334,7 @@ def is_valid_repo(repo_id):
331
  return False
332
 
333
  def main():
334
- global MODEL_PATH, IS_NF4
335
  args = parse_arguments()
336
  input_paths = [Path(input_path) for input_path in args.input]
337
  batch_size = args.bs
@@ -341,8 +344,8 @@ def main():
341
  max_new_tokens = args.tokens
342
  top_p = args.topp
343
  temperature = args.temp
344
- if args.bf16: IS_NF4 = False
345
- else: IS_NF4 = True
346
  if is_valid_repo(args.model): MODEL_PATH = args.model
347
  else: sys.exit(1)
348
  models = load_models()
 
12
  from transformers import AutoModel, AutoProcessor, AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast, AutoModelForCausalLM
13
  from typing import List, Union
14
  import torchvision.transforms.functional as TVF
15
+ from peft import PeftModel
16
  import gc
17
 
18
  # Constants
19
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
20
  BASE_DIR = Path(__file__).resolve().parent # Define the base directory
21
  CLIP_PATH = "google/siglip-so400m-patch14-384"
22
  DEFAULT_MODEL_PATH = "unsloth/Meta-Llama-3.1-8B-bnb-4bit"
 
43
 
44
  # Global Variables
45
  IS_NF4 = True
46
+ IS_LORA = True
47
  MODEL_PATH = DEFAULT_MODEL_PATH
48
  device = "cuda" if torch.cuda.is_available() else "cpu"
49
  print(f"Running on {device}")
 
113
  return self.other_tokens(torch.tensor([2], device=self.other_tokens.weight.device)).squeeze(0)
114
 
115
  def load_models():
116
+ global MODEL_PATH, IS_NF4, IS_LORA
117
  try:
118
  if IS_NF4:
119
  from transformers import BitsAndBytesConfig
 
138
  print(f"Loading LLM: {MODEL_PATH} πŸ€–")
139
  text_model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16).eval()
140
 
141
+ if False and IS_LORA and LORA_PATH.exists(): # omitted
142
  print("Loading VLM's custom text model πŸ€–")
143
+ text_model = PeftModel.from_pretrained(model=text_model, model_id=LORA_PATH, device_map=device, quantization_config=nf4_config)
144
+ text_model = text_model.merge_and_unload(safe_merge=True) # to avoid PEFT bug https://github.com/huggingface/transformers/issues/28515
145
+ else: print("VLM's custom text model isn't loaded πŸ€–")
146
 
147
  print("Loading image adapter πŸ–ΌοΈ")
148
  image_adapter = ImageAdapter(clip_model.config.hidden_size, text_model.config.hidden_size, False, False, 38, False).eval().to("cpu")
 
166
  assert isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)), f"Tokenizer is of type {type(tokenizer)}"
167
 
168
  print(f"Loading LLM: {MODEL_PATH} πŸ€–")
169
+ text_model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, device_map="auto", torch_dtype=torch.bfloat16).eval() # device_map="auto" may cause LoRA issue
170
 
171
+ if IS_LORA and LORA_PATH.exists():
172
  print("Loading VLM's custom text model πŸ€–")
173
+ text_model = PeftModel.from_pretrained(model=text_model, model_id=LORA_PATH, device_map=device)
174
+ text_model = text_model.merge_and_unload(safe_merge=True) # to avoid PEFT bug https://github.com/huggingface/transformers/issues/28515
175
+ else: print("VLM's custom text model isn't loaded πŸ€–")
176
 
177
  print("Loading image adapter πŸ–ΌοΈ")
178
  image_adapter = ImageAdapter(clip_model.config.hidden_size, text_model.config.hidden_size, False, False, 38, False).eval().to("cpu")
 
314
  help='Caption Length (default: "any")')
315
  parser.add_argument("--model", type=str, default=DEFAULT_MODEL_PATH,
316
  help='Huggingface LLM repo (default: "unsloth/Meta-Llama-3.1-8B-bnb-4bit")')
317
+ parser.add_argument("--bf16", action="store_true", default=False, help="Use bfloat16 (default: NF4)")
318
+ parser.add_argument("--nolora", action="store_true", default=False, help="Disable VLM's custom text model (default: Enable)")
319
  parser.add_argument("--tokens", type=int, default=300, help="Max tokens (default: 300)")
320
  parser.add_argument("--topp", type=float, default=0.9, help="Top-P (default: 0.9)")
321
  parser.add_argument("--temp", type=float, default=0.6, help="Temperature (default: 0.6)")
 
334
  return False
335
 
336
  def main():
337
+ global MODEL_PATH, IS_NF4, IS_LORA
338
  args = parse_arguments()
339
  input_paths = [Path(input_path) for input_path in args.input]
340
  batch_size = args.bs
 
344
  max_new_tokens = args.tokens
345
  top_p = args.topp
346
  temperature = args.temp
347
+ IS_NF4 = False if args.bf16 else True
348
+ IS_LORA = False if args.nolora else True
349
  if is_valid_repo(args.model): MODEL_PATH = args.model
350
  else: sys.exit(1)
351
  models = load_models()