Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
import torch | |
from PIL import Image | |
from transformers import MllamaForConditionalGeneration, AutoProcessor | |
from huggingface_hub import login | |
login() | |
HF_TOKEN=st.secrets["newfinegrained"] | |
def load_model_and_processor(model_id): | |
"""Load the model and processor.""" | |
model = MllamaForConditionalGeneration.from_pretrained( | |
model_id, | |
torch_dtype=torch.bfloat16, | |
device_map="auto" | |
) | |
processor = AutoProcessor.from_pretrained(model_id) | |
return model, processor | |
# def generate_text(model, processor, image_url, prompt): | |
# """Generate text using the model and processor.""" | |
# try: | |
# image = Image.open(requests.get(image_url, stream=True).raw) | |
# inputs = processor(image, prompt, return_tensors="pt").to(model.device) | |
# output = model.generate(**inputs, max_new_tokens=30) | |
# return processor.decode(output[0]) | |
# except Exception as e: | |
# return f"Error: {e}" | |
# Streamlit App | |
st.title("LLaMA 3 Vision Haiku Generator") | |
# Model ID and loading | |
MODEL_ID = "meta-llama/Llama-3.2-11B-Vision" | |
model, processor = load_model_and_processor(MODEL_ID) | |
print(model) | |
# User input for image URL and prompt | |
# image_url = st.text_input("Enter the Image URL:", "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg") | |
# prompt = st.text_area("Enter your prompt:", "<|image|><|begin_of_text|>If I had to write a haiku for this one") | |
# if st.button("Generate Haiku"): | |
# with st.spinner("Generating haiku..."): | |
# result = generate_text(model, processor, image_url, prompt) | |
# st.subheader("Generated Text") | |
# st.write(result) | |
# try: | |
# st.image(image_url, caption="Input Image") | |
# except Exception: | |
# st.error("Failed to load image. Please check the URL.") | |