s2 / app.py
Madhuri123's picture
Update app.py
07391f2 verified
raw
history blame
2.12 kB
import streamlit as st
from transformers import pipeline,AutoFeatureExtractor
from PIL import Image
import torch
# Load Hugging Face token
HF_TOKEN = st.secrets["Hf_token"]
# Load the model and pipeline
model_id = "meta-llama/Llama-3.2-11B-Vision"
# Secret Token
HF_TOKEN = st.secrets["hf_token"]
# Model and Task Configuration
MODEL_ID = "meta-llama/Llama-3.2-11B-Vision" # Replace with the correct model ID
# Initialize the pipeline
@st.cache_resource
def initialize_pipeline():
return pipeline(
"image-text-to-text",
model=MODEL_ID,
model_kwargs={"torch_dtype": torch.bfloat16, "use_auth_token": HF_TOKEN}
)
# Preprocess image function
def preprocess_image(image, size=(224, 224)):
"""Converts the image to RGB and resizes it to the required dimensions."""
return image.convert("RGB").resize(size)
# Streamlit UI
st.title("Image and Text to Text Generation")
st.write(f"**Using model:** {MODEL_ID}")
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
input_text = st.text_input("Enter your text input (optional):")
if st.button("Generate"):
if uploaded_file:
try:
# Preprocess image
image = Image.open(uploaded_file)
preprocessed_image = preprocess_image(image)
# Initialize pipeline
model_pipeline = initialize_pipeline()
# Create inputs for the pipeline
inputs = {"images": [preprocessed_image], "text": input_text}
# Run the model and get the response
response = model_pipeline(**inputs)
st.write("Generated Response:")
st.write(response)
except ValueError as ve:
if str(ve) == "The number of image token (0) should be the same as in the number of provided images (1)":
st.error("Make sure your image is correctly preprocessed and passed to the model.")
else:
st.error(f"Error: {ve}")
except Exception as e:
st.error(f"Error: {e}")
else:
st.error("Please upload an image to proceed.")