Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline,AutoFeatureExtractor | |
from PIL import Image | |
import torch | |
# Load Hugging Face token | |
HF_TOKEN = st.secrets["Hf_token"] | |
# Load the model and pipeline | |
model_id = "meta-llama/Llama-3.2-11B-Vision" | |
# Secret Token | |
HF_TOKEN = st.secrets["hf_token"] | |
# Model and Task Configuration | |
MODEL_ID = "meta-llama/Llama-3.2-11B-Vision" # Replace with the correct model ID | |
# Initialize the pipeline | |
def initialize_pipeline(): | |
return pipeline( | |
"image-text-to-text", | |
model=MODEL_ID, | |
model_kwargs={"torch_dtype": torch.bfloat16, "use_auth_token": HF_TOKEN} | |
) | |
# Preprocess image function | |
def preprocess_image(image, size=(224, 224)): | |
"""Converts the image to RGB and resizes it to the required dimensions.""" | |
return image.convert("RGB").resize(size) | |
# Streamlit UI | |
st.title("Image and Text to Text Generation") | |
st.write(f"**Using model:** {MODEL_ID}") | |
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"]) | |
input_text = st.text_input("Enter your text input (optional):") | |
if st.button("Generate"): | |
if uploaded_file: | |
try: | |
# Preprocess image | |
image = Image.open(uploaded_file) | |
preprocessed_image = preprocess_image(image) | |
# Initialize pipeline | |
model_pipeline = initialize_pipeline() | |
# Create inputs for the pipeline | |
inputs = {"images": [preprocessed_image], "text": input_text} | |
# Run the model and get the response | |
response = model_pipeline(**inputs) | |
st.write("Generated Response:") | |
st.write(response) | |
except ValueError as ve: | |
if str(ve) == "The number of image token (0) should be the same as in the number of provided images (1)": | |
st.error("Make sure your image is correctly preprocessed and passed to the model.") | |
else: | |
st.error(f"Error: {ve}") | |
except Exception as e: | |
st.error(f"Error: {e}") | |
else: | |
st.error("Please upload an image to proceed.") | |