weizhiwang
/

LLaVA-Llama-3-8B

Text Generation

Inference Endpoints

Model card Files Files and versions Community

weizhiwang commited on Apr 21

Commit

3269bdc

•

1 Parent(s): a930042

Update README.md

Files changed (1) hide show

README.md +9 -3

README.md CHANGED Viewed

@@ -31,18 +31,24 @@ from llava.model.builder import load_pretrained_model
 from llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path
 from PIL import Image
 import requests
 from io import BytesIO
 # load model and processor
 device = "cuda" if torch.cuda.is_available() else "cpu"
-model_name = get_model_name_from_path(weizhiwang/LLaVA-Llama-3-8B)
-tokenizer, model, image_processor, context_len = load_pretrained_model(weizhiwang/LLaVA-Llama-3-8B, None, model_name, False, False, device=device)
 # prepare inputs for the model
 text = '<image>' + '\n' + "Describe the image."
 conv.append_message(conv.roles[0], text)
 conv.append_message(conv.roles[1], None)
-url = https://upload.wikimedia.org/wikipedia/en/thumb/7/7d/Lenna_%28test_image%29.png/330px-Lenna_%28test_image%29.png" # Lena
 response = requests.get(url)
 image = Image.open(BytesIO(response.content)).convert('RGB')
 image_tensor = image_processor.preprocess(image, return_tensors='pt')['pixel_values'].half().cuda()

 from llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path
 from PIL import Image
 import requests
+import torch
 from io import BytesIO
 # load model and processor
 device = "cuda" if torch.cuda.is_available() else "cpu"
+model_name = get_model_name_from_path("weizhiwang/LLaVA-Llama-3-8B")
+tokenizer, model, image_processor, context_len = load_pretrained_model("weizhiwang/LLaVA-Llama-3-8B", None, model_name, False, False, device=device)
 # prepare inputs for the model
 text = '<image>' + '\n' + "Describe the image."
+conv = conv_templates["llama_3"].copy()
 conv.append_message(conv.roles[0], text)
 conv.append_message(conv.roles[1], None)
+prompt = conv.get_prompt()
+input_ids = tokenizer_image_token(prompt, tokenizer, -200, return_tensors='pt').unsqueeze(0).cuda()
+# prepare image input
+url = "https://upload.wikimedia.org/wikipedia/en/thumb/7/7d/Lenna_%28test_image%29.png/330px-Lenna_%28test_image%29.png"
 response = requests.get(url)
 image = Image.open(BytesIO(response.content)).convert('RGB')
 image_tensor = image_processor.preprocess(image, return_tensors='pt')['pixel_values'].half().cuda()