weizhiwang commited on
Commit
3269bdc
1 Parent(s): a930042

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +9 -3
README.md CHANGED
@@ -31,18 +31,24 @@ from llava.model.builder import load_pretrained_model
31
  from llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path
32
  from PIL import Image
33
  import requests
 
34
  from io import BytesIO
35
 
36
  # load model and processor
37
  device = "cuda" if torch.cuda.is_available() else "cpu"
38
- model_name = get_model_name_from_path(weizhiwang/LLaVA-Llama-3-8B)
39
- tokenizer, model, image_processor, context_len = load_pretrained_model(weizhiwang/LLaVA-Llama-3-8B, None, model_name, False, False, device=device)
40
 
41
  # prepare inputs for the model
42
  text = '<image>' + '\n' + "Describe the image."
 
43
  conv.append_message(conv.roles[0], text)
44
  conv.append_message(conv.roles[1], None)
45
- url = https://upload.wikimedia.org/wikipedia/en/thumb/7/7d/Lenna_%28test_image%29.png/330px-Lenna_%28test_image%29.png" # Lena
 
 
 
 
46
  response = requests.get(url)
47
  image = Image.open(BytesIO(response.content)).convert('RGB')
48
  image_tensor = image_processor.preprocess(image, return_tensors='pt')['pixel_values'].half().cuda()
 
31
  from llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path
32
  from PIL import Image
33
  import requests
34
+ import torch
35
  from io import BytesIO
36
 
37
  # load model and processor
38
  device = "cuda" if torch.cuda.is_available() else "cpu"
39
+ model_name = get_model_name_from_path("weizhiwang/LLaVA-Llama-3-8B")
40
+ tokenizer, model, image_processor, context_len = load_pretrained_model("weizhiwang/LLaVA-Llama-3-8B", None, model_name, False, False, device=device)
41
 
42
  # prepare inputs for the model
43
  text = '<image>' + '\n' + "Describe the image."
44
+ conv = conv_templates["llama_3"].copy()
45
  conv.append_message(conv.roles[0], text)
46
  conv.append_message(conv.roles[1], None)
47
+ prompt = conv.get_prompt()
48
+ input_ids = tokenizer_image_token(prompt, tokenizer, -200, return_tensors='pt').unsqueeze(0).cuda()
49
+
50
+ # prepare image input
51
+ url = "https://upload.wikimedia.org/wikipedia/en/thumb/7/7d/Lenna_%28test_image%29.png/330px-Lenna_%28test_image%29.png"
52
  response = requests.get(url)
53
  image = Image.open(BytesIO(response.content)).convert('RGB')
54
  image_tensor = image_processor.preprocess(image, return_tensors='pt')['pixel_values'].half().cuda()