Spaces:

Madhuri123
/

s2

Sleeping

App Files Files Community

Madhuri123 commited on Dec 11, 2024

Commit

2a17089

verified ·

1 Parent(s): 051047a

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -49

app.py CHANGED Viewed

@@ -1,55 +1,54 @@
 import streamlit as st
-from transformers import pipeline, AutoFeatureExtractor
-from PIL import Image
 import torch
-# Load Hugging Face token
-HF_TOKEN = st.secrets["Hf_token"]
-# Model ID
-model_id = "meta-llama/Llama-3.2-11B-Vision"
-# Initialize feature extractor and pipeline
-@st.experimental_singleton
-def initialize_pipeline():
-    feature_extractor = AutoFeatureExtractor.from_pretrained(model_id, use_auth_token=HF_TOKEN)
-    model_pipeline = pipeline(
-        "image-to-text",
-        model=model_id,
-        feature_extractor=feature_extractor,
-        model_kwargs={"torch_dtype": torch.float32, "use_auth_token": HF_TOKEN},
     )
-    return feature_extractor, model_pipeline
-# Preprocess function using feature extractor
-def preprocess_image(image, feature_extractor):
-    """Preprocess the image for the model."""
-    inputs = feature_extractor(images=image, return_tensors="pt")
-    return inputs.pixel_values
-# Streamlit UI
-st.title("Image and Text to Text Generation")
-st.write(f"**Using model:** {model_id}")
-uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
-input_text = st.text_input("Enter your text input (optional):")
-if st.button("Generate"):
-    if uploaded_file:
-        try:
-            # Load and preprocess image
-            image = Image.open(uploaded_file).convert("RGB")
-            feature_extractor, model_pipeline = initialize_pipeline()
-            preprocessed_image = preprocess_image(image, feature_extractor)
-            # Run the pipeline
-            response = model_pipeline(images=preprocessed_image, text=input_text)
-            st.write("Generated Response:")
-            st.write(response)
-        except Exception as e:
-            st.error(f"Error: {e}")
-    else:
-        st.error("Please upload an image to proceed.")

 import streamlit as st
+import requests
 import torch
+from PIL import Image
+from transformers import MllamaForConditionalGeneration, AutoProcessor
+def load_model_and_processor(model_id):
+    """Load the model and processor."""
+    model = MllamaForConditionalGeneration.from_pretrained(
+        model_id,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
     )
+    processor = AutoProcessor.from_pretrained(model_id)
+    return model, processor
+def generate_text(model, processor, image_url, prompt):
+    """Generate text using the model and processor."""
+    try:
+        image = Image.open(requests.get(image_url, stream=True).raw)
+        inputs = processor(image, prompt, return_tensors="pt").to(model.device)
+        output = model.generate(**inputs, max_new_tokens=30)
+        return processor.decode(output[0])
+    except Exception as e:
+        return f"Error: {e}"
+# Streamlit App
+st.title("LLaMA 3 Vision Haiku Generator")
+# Model ID and loading
+MODEL_ID = "meta-llama/Llama-3.2-11B-Vision"
+model, processor = load_model_and_processor(MODEL_ID)
+# User input for image URL and prompt
+image_url = st.text_input("Enter the Image URL:", "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg")
+prompt = st.text_area("Enter your prompt:", "<|image|><|begin_of_text|>If I had to write a haiku for this one")
+if st.button("Generate Haiku"):
+    with st.spinner("Generating haiku..."):
+        result = generate_text(model, processor, image_url, prompt)
+    st.subheader("Generated Text")
+    st.write(result)
+    try:
+        st.image(image_url, caption="Input Image")
+    except Exception:
+        st.error("Failed to load image. Please check the URL.")