Spaces:

greencatted
/

ZoomLocation

Running

ZoomLocation / app.py

Use Llama Vision Instruct

0bef8ce 2 months ago

1.04 kB

	import streamlit as st
	from PIL import Image

	import torch
	from transformers import MllamaForConditionalGeneration, AutoProcessor

	model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"

	model = MllamaForConditionalGeneration.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	)
	processor = AutoProcessor.from_pretrained(model_id)

	enable = st.checkbox("Enable camera")
	picture = st.camera_input("Take a picture", disabled=not enable)

	if picture:
	image = Image.open(picture)

	messages = [
	{"role": "user", "content": [
	{"type": "image"},
	{"type": "text", "text": "Provide your best guess as to where this person is holding his online meeting. Just state your guess of location in your response."}
	]}
	]
	input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
	inputs = processor(
	image,
	input_text,
	add_special_tokens=False,
	return_tensors="pt"
	).to(model.device)

	output = model.generate(**inputs, max_new_tokens=30)
	print(processor.decode(output[0]))