nielsr HF staff commited on
Commit
d522bbe
1 Parent(s): 5dd3d52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -7
app.py CHANGED
@@ -17,8 +17,8 @@ git_model_large_coco = AutoModelForCausalLM.from_pretrained("microsoft/git-large
17
  blip_processor_large = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
18
  blip_model_large = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to(device)
19
 
20
- blip2_processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-6.7b")
21
- blip2_model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-6.7b", device_map="auto", load_in_4bit=True, torch_dtype=torch.float16)
22
 
23
  instructblip_processor = AutoProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
24
  instructblip_model = InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b", device_map="auto", load_in_4bit=True, torch_dtype=torch.float16)
@@ -39,9 +39,7 @@ def generate_caption(processor, model, image, tokenizer=None, use_float_16=False
39
  return generated_caption
40
 
41
 
42
- def generate_caption_blip2(processor, model, image, replace_token=False):
43
- prompt = "Generate a caption for the image:"
44
-
45
  inputs = processor(images=image, text=prompt, return_tensors="pt").to(device=model.device, dtype=torch.float16)
46
 
47
  generated_ids = model.generate(**inputs,
@@ -59,9 +57,9 @@ def generate_captions(image):
59
 
60
  caption_blip_large = generate_caption(blip_processor_large, blip_model_large, image)
61
 
62
- caption_blip2 = generate_caption_blip2(blip2_processor, blip2_model, image).strip()
63
 
64
- caption_instructblip = generate_caption_blip2(instructblip_processor, instructblip_model, image, replace_token=True)
65
 
66
  return caption_git_large_coco, caption_blip_large, caption_blip2, caption_instructblip
67
 
 
17
  blip_processor_large = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
18
  blip_model_large = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to(device)
19
 
20
+ blip2_processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-6.7b-coco")
21
+ blip2_model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-6.7b-coco", device_map="auto", load_in_4bit=True, torch_dtype=torch.float16)
22
 
23
  instructblip_processor = AutoProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
24
  instructblip_model = InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b", device_map="auto", load_in_4bit=True, torch_dtype=torch.float16)
 
39
  return generated_caption
40
 
41
 
42
+ def generate_caption_blip2(processor, model, image, prompt, replace_token=False):
 
 
43
  inputs = processor(images=image, text=prompt, return_tensors="pt").to(device=model.device, dtype=torch.float16)
44
 
45
  generated_ids = model.generate(**inputs,
 
57
 
58
  caption_blip_large = generate_caption(blip_processor_large, blip_model_large, image)
59
 
60
+ caption_blip2 = generate_caption_blip2(blip2_processor, blip2_model, image, prompt="A photo of").strip()
61
 
62
+ caption_instructblip = generate_caption_blip2(instructblip_processor, instructblip_model, image, prompt="Generate a caption for the image:", replace_token=True)
63
 
64
  return caption_git_large_coco, caption_blip_large, caption_blip2, caption_instructblip
65