weizhiwang
commited on
Commit
•
3269bdc
1
Parent(s):
a930042
Update README.md
Browse files
README.md
CHANGED
@@ -31,18 +31,24 @@ from llava.model.builder import load_pretrained_model
|
|
31 |
from llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path
|
32 |
from PIL import Image
|
33 |
import requests
|
|
|
34 |
from io import BytesIO
|
35 |
|
36 |
# load model and processor
|
37 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
38 |
-
model_name = get_model_name_from_path(weizhiwang/LLaVA-Llama-3-8B)
|
39 |
-
tokenizer, model, image_processor, context_len = load_pretrained_model(weizhiwang/LLaVA-Llama-3-8B, None, model_name, False, False, device=device)
|
40 |
|
41 |
# prepare inputs for the model
|
42 |
text = '<image>' + '\n' + "Describe the image."
|
|
|
43 |
conv.append_message(conv.roles[0], text)
|
44 |
conv.append_message(conv.roles[1], None)
|
45 |
-
|
|
|
|
|
|
|
|
|
46 |
response = requests.get(url)
|
47 |
image = Image.open(BytesIO(response.content)).convert('RGB')
|
48 |
image_tensor = image_processor.preprocess(image, return_tensors='pt')['pixel_values'].half().cuda()
|
|
|
31 |
from llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path
|
32 |
from PIL import Image
|
33 |
import requests
|
34 |
+
import torch
|
35 |
from io import BytesIO
|
36 |
|
37 |
# load model and processor
|
38 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
39 |
+
model_name = get_model_name_from_path("weizhiwang/LLaVA-Llama-3-8B")
|
40 |
+
tokenizer, model, image_processor, context_len = load_pretrained_model("weizhiwang/LLaVA-Llama-3-8B", None, model_name, False, False, device=device)
|
41 |
|
42 |
# prepare inputs for the model
|
43 |
text = '<image>' + '\n' + "Describe the image."
|
44 |
+
conv = conv_templates["llama_3"].copy()
|
45 |
conv.append_message(conv.roles[0], text)
|
46 |
conv.append_message(conv.roles[1], None)
|
47 |
+
prompt = conv.get_prompt()
|
48 |
+
input_ids = tokenizer_image_token(prompt, tokenizer, -200, return_tensors='pt').unsqueeze(0).cuda()
|
49 |
+
|
50 |
+
# prepare image input
|
51 |
+
url = "https://upload.wikimedia.org/wikipedia/en/thumb/7/7d/Lenna_%28test_image%29.png/330px-Lenna_%28test_image%29.png"
|
52 |
response = requests.get(url)
|
53 |
image = Image.open(BytesIO(response.content)).convert('RGB')
|
54 |
image_tensor = image_processor.preprocess(image, return_tensors='pt')['pixel_values'].half().cuda()
|