Update app.py
Browse files
app.py
CHANGED
@@ -17,8 +17,8 @@ git_model_large_coco = AutoModelForCausalLM.from_pretrained("microsoft/git-large
|
|
17 |
blip_processor_large = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
|
18 |
blip_model_large = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to(device)
|
19 |
|
20 |
-
blip2_processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-6.7b")
|
21 |
-
blip2_model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-6.7b", device_map="auto", load_in_4bit=True, torch_dtype=torch.float16)
|
22 |
|
23 |
instructblip_processor = AutoProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
|
24 |
instructblip_model = InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b", device_map="auto", load_in_4bit=True, torch_dtype=torch.float16)
|
@@ -39,9 +39,7 @@ def generate_caption(processor, model, image, tokenizer=None, use_float_16=False
|
|
39 |
return generated_caption
|
40 |
|
41 |
|
42 |
-
def generate_caption_blip2(processor, model, image, replace_token=False):
|
43 |
-
prompt = "Generate a caption for the image:"
|
44 |
-
|
45 |
inputs = processor(images=image, text=prompt, return_tensors="pt").to(device=model.device, dtype=torch.float16)
|
46 |
|
47 |
generated_ids = model.generate(**inputs,
|
@@ -59,9 +57,9 @@ def generate_captions(image):
|
|
59 |
|
60 |
caption_blip_large = generate_caption(blip_processor_large, blip_model_large, image)
|
61 |
|
62 |
-
caption_blip2 = generate_caption_blip2(blip2_processor, blip2_model, image).strip()
|
63 |
|
64 |
-
caption_instructblip = generate_caption_blip2(instructblip_processor, instructblip_model, image, replace_token=True)
|
65 |
|
66 |
return caption_git_large_coco, caption_blip_large, caption_blip2, caption_instructblip
|
67 |
|
|
|
17 |
blip_processor_large = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
|
18 |
blip_model_large = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to(device)
|
19 |
|
20 |
+
blip2_processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-6.7b-coco")
|
21 |
+
blip2_model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-6.7b-coco", device_map="auto", load_in_4bit=True, torch_dtype=torch.float16)
|
22 |
|
23 |
instructblip_processor = AutoProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
|
24 |
instructblip_model = InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b", device_map="auto", load_in_4bit=True, torch_dtype=torch.float16)
|
|
|
39 |
return generated_caption
|
40 |
|
41 |
|
42 |
+
def generate_caption_blip2(processor, model, image, prompt, replace_token=False):
|
|
|
|
|
43 |
inputs = processor(images=image, text=prompt, return_tensors="pt").to(device=model.device, dtype=torch.float16)
|
44 |
|
45 |
generated_ids = model.generate(**inputs,
|
|
|
57 |
|
58 |
caption_blip_large = generate_caption(blip_processor_large, blip_model_large, image)
|
59 |
|
60 |
+
caption_blip2 = generate_caption_blip2(blip2_processor, blip2_model, image, prompt="A photo of").strip()
|
61 |
|
62 |
+
caption_instructblip = generate_caption_blip2(instructblip_processor, instructblip_model, image, prompt="Generate a caption for the image:", replace_token=True)
|
63 |
|
64 |
return caption_git_large_coco, caption_blip_large, caption_blip2, caption_instructblip
|
65 |
|