Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
import torch | |
from PIL import Image | |
from transformers import MllamaForConditionalGeneration, AutoProcessor | |
def load_model_and_processor(model_id): | |
"""Load the model and processor.""" | |
model = MllamaForConditionalGeneration.from_pretrained( | |
model_id, | |
torch_dtype=torch.bfloat16, | |
device_map="auto", | |
) | |
processor = AutoProcessor.from_pretrained(model_id) | |
return model, processor | |
def generate_text(model, processor, image_url, prompt): | |
"""Generate text using the model and processor.""" | |
try: | |
image = Image.open(requests.get(image_url, stream=True).raw) | |
inputs = processor(image, prompt, return_tensors="pt").to(model.device) | |
output = model.generate(**inputs, max_new_tokens=30) | |
return processor.decode(output[0]) | |
except Exception as e: | |
return f"Error: {e}" | |
# Streamlit App | |
st.title("LLaMA 3 Vision Haiku Generator") | |
# Model ID and loading | |
MODEL_ID = "meta-llama/Llama-3.2-11B-Vision" | |
model, processor = load_model_and_processor(MODEL_ID) | |
# User input for image URL and prompt | |
image_url = st.text_input("Enter the Image URL:", "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg") | |
prompt = st.text_area("Enter your prompt:", "<|image|><|begin_of_text|>If I had to write a haiku for this one") | |
if st.button("Generate Haiku"): | |
with st.spinner("Generating haiku..."): | |
result = generate_text(model, processor, image_url, prompt) | |
st.subheader("Generated Text") | |
st.write(result) | |
try: | |
st.image(image_url, caption="Input Image") | |
except Exception: | |
st.error("Failed to load image. Please check the URL.") | |