Spaces:
Sleeping
Sleeping
File size: 2,370 Bytes
fa07468 2a17089 d17f3c6 2a17089 ba9839b 05e38b4 a8abd47 c12c5de 22a5d55 c12c5de 2a17089 ba9839b 07391f2 2a17089 51c6e45 c12c5de 51c6e45 a8abd47 8a67b9c 3b88e0c a8abd47 51c6e45 a8abd47 51c6e45 2a17089 8a67b9c 2a17089 999f0ff 2a17089 c12c5de 999f0ff 51c6e45 2a17089 51c6e45 2a17089 51c6e45 2a17089 051047a c12c5de a8abd47 49786bf 07391f2 038d603 722c6e5 fa07468 cb4d59e fa07468 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import streamlit as st
import requests
import torch
from PIL import Image
from transformers import MllamaForConditionalGeneration, AutoProcessor
from huggingface_hub import login
import io
# Authenticate with Hugging Face
HF_TOKEN = st.secrets["newfinegrained"]
login(HF_TOKEN)
def load_model_and_processor(model_id):
"""Load the model and processor."""
model = MllamaForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto"
)
processor = AutoProcessor.from_pretrained(model_id)
return model, processor
def generate_text(model, processor, image_url, prompt):
"""Generate text using the model and processor."""
try:
# Fetch the image from the URL
response = requests.get(image_url)
response.raise_for_status() # Raise an error for invalid response
# Validate content type
if "image" not in response.headers["Content-Type"]:
return "Error: The provided URL does not point to a valid image."
# Open the image
image = Image.open(io.BytesIO(response.content))
# Process the image and prompt
inputs = processor(image, prompt, return_tensors="pt").to(model.device)
output = model.generate(**inputs, max_new_tokens=30)
# Decode the output
return processor.decode(output[0])
except Exception as e:
return f"Error: {e}"
# Streamlit App
st.title("LLaMA 3.2 Vision")
# Model ID and loading
MODEL_ID = "meta-llama/Llama-3.2-11B-Vision"
model, processor = load_model_and_processor(MODEL_ID)
# User input for image URL and prompt
image_url = st.text_input(
"Enter the Image URL:",
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg"
)
prompt = st.text_area(
"Enter your prompt:",
"<|image|><|begin_of_text|>If I had to write a haiku for this one"
)
# Button to generate haiku
if st.button("Generate Text"):
with st.spinner("Generating Text..."):
result = generate_text(model, processor, image_url, prompt)
st.subheader("Generated Text")
st.write(result)
try:
st.image(image_url, caption="Input Image")
except Exception:
st.error("Failed to load image. Please check the URL.")
|