File size: 1,544 Bytes
8e761cc 1900674 ebfa455 67e8921 1900674 ebfa455 1900674 67e8921 ebfa455 67e8921 ebfa455 67e8921 ebfa455 67e8921 ebfa455 67e8921 03ffc4b 67e8921 03ffc4b 67e8921 ebfa455 67e8921 ebfa455 67e8921 ebfa455 67e8921 ebfa455 67e8921 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import torch
from transformers import LlamaForCausalLM, AutoProcessor
from PIL import Image
import base64
import io
# Load model and processor globally
model_id = "kiddobellamy/Llama_Vision"
model = LlamaForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
processor = AutoProcessor.from_pretrained(model_id)
def handler(event, context):
try:
# Parse inputs
inputs = event.get('inputs', {})
image_base64 = inputs.get('image')
prompt = inputs.get('prompt', '')
if not image_base64 or not prompt:
return {'error': 'Both "image" and "prompt" are required in inputs.'}
# Decode the base64 image
image_bytes = base64.b64decode(image_base64)
image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
# Prepare the message
messages = [
{"role": "user", "content": [
{"type": "image"},
{"type": "text", "text": prompt}
]}
]
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
# Process inputs
inputs = processor(image, input_text, return_tensors="pt").to(model.device)
# Generate output
output_ids = model.generate(**inputs, max_new_tokens=50)
generated_text = processor.decode(output_ids[0], skip_special_tokens=True)
# Return the result
return {'generated_text': generated_text}
except Exception as e:
return {'error': str(e)}
|